X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fwrite.c;h=4805913de376fab95c1982eae45f0253a6dd13ec;hp=aa456e07fda54e470f3de09836be3ed154799ccf;hb=HEAD;hpb=fc938fc3886f0c8cacdbc83148484b55a32ca8e7 diff --git a/src/write.c b/src/write.c index aa456e07..4bf1b2be 100644 --- a/src/write.c +++ b/src/write.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers + * Copyright (C) 2012-2016 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -19,7 +19,7 @@ * details. * * You should have received a copy of the GNU Lesser General Public License - * along with this file; if not, see http://www.gnu.org/licenses/. + * along with this file; if not, see https://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H @@ -174,7 +174,12 @@ can_raw_copy(const struct blob_descriptor *blob, int write_resource_flags, rdesc = blob->rdesc; - /* Only reuse compressed resources. */ + /* In the case of an in-place compaction, always reuse resources located + * in the WIM being compacted. */ + if (rdesc->wim->being_compacted) + return true; + + /* Otherwise, only reuse compressed resources. */ if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE || !(rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_SOLID))) @@ -283,7 +288,7 @@ write_pwm_blob_header(const struct blob_descriptor *blob, blob_hdr.flags = cpu_to_le32(reshdr_flags); ret = full_write(out_fd, &blob_hdr, sizeof(blob_hdr)); if (ret) - ERROR_WITH_ERRNO("Write error"); + ERROR_WITH_ERRNO("Error writing blob header to WIM file"); return ret; } @@ -296,7 +301,8 @@ struct write_blobs_progress_data { static int do_write_blobs_progress(struct write_blobs_progress_data *progress_data, - u64 complete_size, u32 complete_count, bool discarded) + u64 complete_size, u64 complete_compressed_size, + u32 complete_count, bool discarded) { union wimlib_progress_info *progress = &progress_data->progress; int ret; @@ -311,6 +317,8 @@ do_write_blobs_progress(struct write_blobs_progress_data *progress_data, } } else { progress->write_streams.completed_bytes += complete_size; + progress->write_streams.completed_compressed_bytes += + complete_compressed_size; progress->write_streams.completed_streams += complete_count; } @@ -372,12 +380,6 @@ struct write_blobs_ctx { * @blobs_being_compressed only when writing a solid resource. */ struct list_head blobs_in_solid_resource; - /* Current uncompressed offset in the blob being read. */ - u64 cur_read_blob_offset; - - /* Uncompressed size of the blob currently being read. */ - u64 cur_read_blob_size; - /* Current uncompressed offset in the blob being written. */ u64 cur_write_blob_offset; @@ -456,8 +458,11 @@ begin_chunk_table(struct write_blobs_ctx *ctx, u64 res_expected_size) reserve_size += sizeof(struct alt_chunk_table_header_disk); memset(ctx->chunk_csizes, 0, reserve_size); ret = full_write(ctx->out_fd, ctx->chunk_csizes, reserve_size); - if (ret) + if (ret) { + ERROR_WITH_ERRNO("Error reserving space for chunk " + "table in WIM file"); return ret; + } } return 0; } @@ -501,8 +506,8 @@ end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size, 0 != (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)); - typedef le64 _may_alias_attribute aliased_le64_t; - typedef le32 _may_alias_attribute aliased_le32_t; + typedef le64 __attribute__((may_alias)) aliased_le64_t; + typedef le32 __attribute__((may_alias)) aliased_le32_t; if (chunk_entry_size == 4) { aliased_le32_t *entries = (aliased_le32_t*)ctx->chunk_csizes; @@ -583,7 +588,7 @@ end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size, return 0; write_error: - ERROR_WITH_ERRNO("Write error"); + ERROR_WITH_ERRNO("Error writing chunk table to WIM file"); return ret; } @@ -634,6 +639,7 @@ do_done_with_blob(struct blob_descriptor *blob, { int ret; struct wim_inode *inode; + const tchar *path; tchar *cookie1; tchar *cookie2; @@ -647,10 +653,12 @@ do_done_with_blob(struct blob_descriptor *blob, if (--inode->i_num_remaining_streams > 0) return 0; - cookie1 = progress_get_streamless_path(blob->file_on_disk); - cookie2 = progress_get_win32_path(blob->file_on_disk); + path = blob_file_path(blob); + + cookie1 = progress_get_streamless_path(path); + cookie2 = progress_get_win32_path(path); - ret = done_with_file(blob->file_on_disk, progfunc, progctx); + ret = done_with_file(path, progfunc, progctx); progress_put_win32_path(cookie2); progress_put_streamless_path(cookie1); @@ -678,9 +686,6 @@ write_blob_begin_read(struct blob_descriptor *blob, void *_ctx) wimlib_assert(blob->size > 0); - ctx->cur_read_blob_offset = 0; - ctx->cur_read_blob_size = blob->size; - /* As an optimization, we allow some blobs to be "unhashed", meaning * their SHA-1 message digests are unknown. This is the case with blobs * that are added by scanning a directory tree with wimlib_add_image(), @@ -711,7 +716,9 @@ write_blob_begin_read(struct blob_descriptor *blob, void *_ctx) * output reference count to the duplicate blob * in the former case. */ ret = do_write_blobs_progress(&ctx->progress_data, - blob->size, 1, true); + blob->size, + blob->size, + 1, true); list_del(&blob->write_blobs_list); list_del(&blob->blob_table_list); if (new_blob->will_be_in_output_wim) @@ -760,7 +767,7 @@ write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd) if (filedes_seek(out_fd, begin_offset) == -1) return 0; - ret = extract_blob_to_fd(blob, out_fd); + ret = extract_blob_to_fd(blob, out_fd, false); if (ret) { /* Error reading the uncompressed data. */ if (out_fd->offset == begin_offset && @@ -818,7 +825,7 @@ should_rewrite_blob_uncompressed(const struct write_blobs_ctx *ctx, * Exception: if the compressed size happens to be *exactly* the same as * the uncompressed size, then the blob *must* be written uncompressed * in order to remain compatible with the Windows Overlay Filesystem - * Filter Driver (WOF). + * filter driver (WOF). * * TODO: we are currently assuming that the optimization for * single-chunk resources in maybe_rewrite_blob_uncompressed() prevents @@ -865,8 +872,7 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk, { int ret; struct blob_descriptor *blob; - u32 completed_blob_count; - u32 completed_size; + u32 completed_blob_count = 0; blob = list_entry(ctx->blobs_being_compressed.next, struct blob_descriptor, write_blobs_list); @@ -913,8 +919,6 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk, ctx->cur_write_blob_offset += usize; - completed_size = usize; - completed_blob_count = 0; if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { /* Wrote chunk in solid mode. It may have finished multiple * blobs. */ @@ -971,11 +975,11 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk, } } - return do_write_blobs_progress(&ctx->progress_data, completed_size, + return do_write_blobs_progress(&ctx->progress_data, usize, csize, completed_blob_count, false); write_error: - ERROR_WITH_ERRNO("Write error"); + ERROR_WITH_ERRNO("Error writing chunk data to WIM file"); return ret; } @@ -1008,7 +1012,8 @@ prepare_chunk_buffer(struct write_blobs_ctx *ctx) /* Process the next chunk of data to be written to a WIM resource. */ static int -write_blob_process_chunk(const void *chunk, size_t size, void *_ctx) +write_blob_process_chunk(const struct blob_descriptor *blob, u64 offset, + const void *chunk, size_t size, void *_ctx) { struct write_blobs_ctx *ctx = _ctx; int ret; @@ -1021,7 +1026,6 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx) ret = write_chunk(ctx, chunk, size, size); if (ret) return ret; - ctx->cur_read_blob_offset += size; return 0; } @@ -1045,8 +1049,7 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx) } else { needed_chunk_size = min(ctx->out_chunk_size, ctx->cur_chunk_buf_filled + - (ctx->cur_read_blob_size - - ctx->cur_read_blob_offset)); + (blob->size - offset)); } bytes_consumed = min(chunkend - chunkptr, @@ -1056,7 +1059,7 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx) chunkptr, bytes_consumed); chunkptr += bytes_consumed; - ctx->cur_read_blob_offset += bytes_consumed; + offset += bytes_consumed; ctx->cur_chunk_buf_filled += bytes_consumed; if (ctx->cur_chunk_buf_filled == needed_chunk_size) { @@ -1077,8 +1080,6 @@ write_blob_end_read(struct blob_descriptor *blob, int status, void *_ctx) { struct write_blobs_ctx *ctx = _ctx; - wimlib_assert(ctx->cur_read_blob_offset == ctx->cur_read_blob_size || status); - if (!blob->will_be_in_output_wim) { /* The blob was a duplicate. Now that its data has finished * being read, it is being discarded in favor of the duplicate @@ -1104,11 +1105,16 @@ write_blob_end_read(struct blob_descriptor *blob, int status, void *_ctx) return status; } -/* Compute statistics about a list of blobs that will be written. +/* + * Compute statistics about a list of blobs that will be written. * * Assumes the blobs are sorted such that all blobs located in each distinct WIM - * (specified by WIMStruct) are together. */ -static void + * (specified by WIMStruct) are together. + * + * For compactions, also verify that there are no overlapping resources. This + * really should be checked earlier, but for now it's easiest to check here. + */ +static int compute_blob_list_stats(struct list_head *blob_list, struct write_blobs_ctx *ctx) { @@ -1117,15 +1123,32 @@ compute_blob_list_stats(struct list_head *blob_list, u64 num_blobs = 0; u64 total_parts = 0; WIMStruct *prev_wim_part = NULL; + const struct wim_resource_descriptor *prev_rdesc = NULL; list_for_each_entry(blob, blob_list, write_blobs_list) { num_blobs++; total_bytes += blob->size; if (blob->blob_location == BLOB_IN_WIM) { - if (prev_wim_part != blob->rdesc->wim) { - prev_wim_part = blob->rdesc->wim; + const struct wim_resource_descriptor *rdesc = blob->rdesc; + WIMStruct *wim = rdesc->wim; + + if (prev_wim_part != wim) { + prev_wim_part = wim; total_parts++; } + if (unlikely(wim->being_compacted) && rdesc != prev_rdesc) { + if (prev_rdesc != NULL && + rdesc->offset_in_wim < + prev_rdesc->offset_in_wim + + prev_rdesc->size_in_wim) + { + WARNING("WIM file contains overlapping " + "resources! Compaction is not " + "possible."); + return WIMLIB_ERR_RESOURCE_ORDER; + } + prev_rdesc = rdesc; + } } } ctx->progress_data.progress.write_streams.total_bytes = total_bytes; @@ -1136,6 +1159,7 @@ compute_blob_list_stats(struct list_head *blob_list, ctx->progress_data.progress.write_streams.total_parts = total_parts; ctx->progress_data.progress.write_streams.completed_parts = 0; ctx->progress_data.next_progress = 0; + return 0; } /* Find blobs in @blob_list that can be copied to the output WIM in raw form @@ -1200,21 +1224,43 @@ write_raw_copy_resource(struct wim_resource_descriptor *in_rdesc, } in_fd = &in_rdesc->wim->in_fd; wimlib_assert(cur_read_offset != end_read_offset); - do { - bytes_to_read = min(sizeof(buf), end_read_offset - cur_read_offset); + if (likely(!in_rdesc->wim->being_compacted) || + in_rdesc->offset_in_wim > out_fd->offset) { + do { + bytes_to_read = min(sizeof(buf), + end_read_offset - cur_read_offset); - ret = full_pread(in_fd, buf, bytes_to_read, cur_read_offset); - if (ret) - return ret; + ret = full_pread(in_fd, buf, bytes_to_read, + cur_read_offset); + if (ret) { + ERROR_WITH_ERRNO("Error reading raw data " + "from WIM file"); + return ret; + } - ret = full_write(out_fd, buf, bytes_to_read); - if (ret) - return ret; + ret = full_write(out_fd, buf, bytes_to_read); + if (ret) { + ERROR_WITH_ERRNO("Error writing raw data " + "to WIM file"); + return ret; + } - cur_read_offset += bytes_to_read; + cur_read_offset += bytes_to_read; - } while (cur_read_offset != end_read_offset); + } while (cur_read_offset != end_read_offset); + } else { + /* Optimization: the WIM file is being compacted and the + * resource being written is already in the desired location. + * Skip over the data instead of re-writing it. */ + + /* Due the earlier check for overlapping resources, it should + * never be the case that we already overwrote the resource. */ + wimlib_assert(!(in_rdesc->offset_in_wim < out_fd->offset)); + + if (-1 == filedes_seek(out_fd, out_fd->offset + in_rdesc->size_in_wim)) + return WIMLIB_ERR_WRITE; + } list_for_each_entry(blob, &in_rdesc->blob_list, rdesc_node) { if (blob->will_be_in_output_wim) { @@ -1243,15 +1289,18 @@ write_raw_copy_resources(struct list_head *raw_copy_blobs, blob->rdesc->raw_copy_ok = 1; list_for_each_entry(blob, raw_copy_blobs, write_blobs_list) { + u64 compressed_size = 0; + if (blob->rdesc->raw_copy_ok) { /* Write each solid resource only one time. */ ret = write_raw_copy_resource(blob->rdesc, out_fd); if (ret) return ret; blob->rdesc->raw_copy_ok = 0; + compressed_size = blob->rdesc->size_in_wim; } ret = do_write_blobs_progress(progress_data, blob->size, - 1, false); + compressed_size, 1, false); if (ret) return ret; } @@ -1296,17 +1345,6 @@ validate_blob_list(struct list_head *blob_list) } } -static inline bool -blob_is_in_file(const struct blob_descriptor *blob) -{ - return blob->blob_location == BLOB_IN_FILE_ON_DISK -#ifdef __WIN32__ - || blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK - || blob->blob_location == BLOB_WIN32_ENCRYPTED -#endif - ; -} - static void init_done_with_file_info(struct list_head *blob_list) { @@ -1420,9 +1458,9 @@ init_done_with_file_info(struct list_head *blob_list) * identical to another blob already being written or one that would be filtered * out of the output WIM using blob_filtered() with the context @filter_ctx. * Each such duplicate blob will be removed from @blob_list, its reference count - * transfered to the pre-existing duplicate blob, its memory freed, and will not - * be written. Alternatively, if a blob in @blob_list is a duplicate with any - * blob in @blob_table that has not been marked for writing or would not be + * transferred to the pre-existing duplicate blob, its memory freed, and will + * not be written. Alternatively, if a blob in @blob_list is a duplicate with + * any blob in @blob_table that has not been marked for writing or would not be * hard-filtered, it is freed and the pre-existing duplicate is written instead, * taking ownership of the reference count and slot in the @blob_table_list. * @@ -1490,7 +1528,9 @@ write_blob_list(struct list_head *blob_list, if (ret) return ret; - compute_blob_list_stats(blob_list, &ctx); + ret = compute_blob_list_stats(blob_list, &ctx); + if (ret) + return ret; if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) { ret = sort_blob_list_for_solid_compression(blob_list); @@ -1505,22 +1545,12 @@ write_blob_list(struct list_head *blob_list, out_ctype, out_chunk_size, &raw_copy_blobs); - /* Copy any compressed resources for which the raw data can be reused - * without decompression. */ - ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd, - &ctx.progress_data); - - if (ret || num_nonraw_bytes == 0) - goto out_destroy_context; - - /* Unless uncompressed output was required, allocate a chunk_compressor - * to do compression. There are serial and parallel implementations of - * the chunk_compressor interface. We default to parallel using the + /* Unless no data needs to be compressed, allocate a chunk_compressor to + * do compression. There are serial and parallel implementations of the + * chunk_compressor interface. We default to parallel using the * specified number of threads, unless the upper bound on the number * bytes needing to be compressed is less than a heuristic value. */ - if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) { - - #ifdef ENABLE_MULTITHREADED_COMPRESSION + if (num_nonraw_bytes != 0 && out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) { if (num_nonraw_bytes > max(2000000, out_chunk_size)) { ret = new_parallel_chunk_compressor(out_ctype, out_chunk_size, @@ -1532,7 +1562,6 @@ write_blob_list(struct list_head *blob_list, wimlib_get_error_string(ret)); } } - #endif if (ctx.compressor == NULL) { ret = new_serial_chunk_compressor(out_ctype, out_chunk_size, @@ -1547,9 +1576,6 @@ write_blob_list(struct list_head *blob_list, else ctx.progress_data.progress.write_streams.num_threads = 1; - INIT_LIST_HEAD(&ctx.blobs_being_compressed); - INIT_LIST_HEAD(&ctx.blobs_in_solid_resource); - ret = call_progress(ctx.progress_data.progfunc, WIMLIB_PROGRESS_MSG_WRITE_STREAMS, &ctx.progress_data.progress, @@ -1557,7 +1583,20 @@ write_blob_list(struct list_head *blob_list, if (ret) goto out_destroy_context; + /* Copy any compressed resources for which the raw data can be reused + * without decompression. */ + ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd, + &ctx.progress_data); + + if (ret || num_nonraw_bytes == 0) + goto out_destroy_context; + + INIT_LIST_HEAD(&ctx.blobs_being_compressed); + if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { + + INIT_LIST_HEAD(&ctx.blobs_in_solid_resource); + ret = begin_write_resource(&ctx, num_nonraw_bytes); if (ret) goto out_destroy_context; @@ -1568,7 +1607,7 @@ write_blob_list(struct list_head *blob_list, struct read_blob_callbacks cbs = { .begin_blob = write_blob_begin_read, - .consume_chunk = write_blob_process_chunk, + .continue_blob = write_blob_process_chunk, .end_blob = write_blob_end_read, .ctx = &ctx, }; @@ -1698,7 +1737,7 @@ write_wim_resource_from_buffer(const void *buf, } blob_set_is_located_in_attached_buffer(&blob, (void *)buf, buf_size); - sha1_buffer(buf, buf_size, blob.hash); + sha1(buf, buf_size, blob.hash); blob.unhashed = 0; blob.is_metadata = is_metadata; @@ -2133,16 +2172,28 @@ write_metadata_resources(WIMStruct *wim, int image, int write_flags) struct wim_image_metadata *imd; imd = wim->image_metadata[i - 1]; - /* Build a new metadata resource only if image was modified from - * the original (or was newly added). Otherwise just copy the - * existing one. */ - if (imd->modified) { + if (is_image_dirty(imd)) { + /* The image was modified from the original, or was + * newly added, so we have to build and write a new + * metadata resource. */ ret = write_metadata_resource(wim, i, write_resource_flags); - } else if (write_flags & WIMLIB_WRITE_FLAG_APPEND) { - blob_set_out_reshdr_for_reuse(imd->metadata_blob); + } else if (is_image_unchanged_from_wim(imd, wim) && + (write_flags & (WIMLIB_WRITE_FLAG_UNSAFE_COMPACT | + WIMLIB_WRITE_FLAG_APPEND))) + { + /* The metadata resource is already in the WIM file. + * For appends, we don't need to write it at all. For + * compactions, we re-write existing metadata resources + * along with the existing file resources, not here. */ + if (write_flags & WIMLIB_WRITE_FLAG_APPEND) + blob_set_out_reshdr_for_reuse(imd->metadata_blob); ret = 0; } else { + /* The metadata resource is in a WIM file other than the + * one being written to. We need to rewrite it, + * possibly compressed differently; but rebuilding the + * metadata itself isn't necessary. */ ret = write_wim_resource(imd->metadata_blob, &wim->out_fd, wim->out_compression_type, @@ -2321,10 +2372,8 @@ finish_write(WIMStruct *wim, int image, int write_flags, if (!(write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)) { ret = write_blob_table(wim, image, write_flags, blob_table_list); - if (ret) { - free_integrity_table(old_integrity_table); - return ret; - } + if (ret) + goto out; } /* Write XML data. */ @@ -2334,13 +2383,13 @@ finish_write(WIMStruct *wim, int image, int write_flags, ret = write_wim_xml_data(wim, image, xml_totalbytes, &wim->out_hdr.xml_data_reshdr, write_resource_flags); - if (ret) { - free_integrity_table(old_integrity_table); - return ret; - } + if (ret) + goto out; /* Write integrity table if needed. */ - if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) { + if ((write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) && + wim->out_hdr.blob_table_reshdr.offset_in_wim != 0) + { if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS) { /* The XML data we wrote may have overwritten part of * the old integrity table, so while calculating the new @@ -2351,10 +2400,8 @@ finish_write(WIMStruct *wim, int image, int write_flags, zero_reshdr(&checkpoint_hdr.integrity_table_reshdr); checkpoint_hdr.flags |= WIM_HDR_FLAG_WRITE_IN_PROGRESS; ret = write_wim_header(&checkpoint_hdr, &wim->out_fd, 0); - if (ret) { - free_integrity_table(old_integrity_table); - return ret; - } + if (ret) + goto out; } new_blob_table_end = wim->out_hdr.blob_table_reshdr.offset_in_wim + @@ -2364,9 +2411,8 @@ finish_write(WIMStruct *wim, int image, int write_flags, new_blob_table_end, old_blob_table_end, old_integrity_table); - free_integrity_table(old_integrity_table); if (ret) - return ret; + goto out; } else { /* No integrity table. */ zero_reshdr(&wim->out_hdr.integrity_table_reshdr); @@ -2382,7 +2428,19 @@ finish_write(WIMStruct *wim, int image, int write_flags, else ret = write_wim_header(&wim->out_hdr, &wim->out_fd, 0); if (ret) - return ret; + goto out; + + ret = WIMLIB_ERR_WRITE; + if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) { + /* Truncate any data the compaction freed up. */ + if (ftruncate(wim->out_fd.fd, wim->out_fd.offset) && + errno != EINVAL) /* allow compaction on untruncatable files, + e.g. block devices */ + { + ERROR_WITH_ERRNO("Failed to truncate the output WIM file"); + goto out; + } + } /* Possibly sync file data to disk before closing. On POSIX systems, it * is necessary to do this before using rename() to overwrite an @@ -2390,19 +2448,24 @@ finish_write(WIMStruct *wim, int image, int write_flags, * the system is abruptly terminated when the metadata for the rename * operation has been written to disk, but the new file data has not. */ + ret = WIMLIB_ERR_WRITE; if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) { if (fsync(wim->out_fd.fd)) { ERROR_WITH_ERRNO("Error syncing data to WIM file"); - return WIMLIB_ERR_WRITE; + goto out; } } + ret = WIMLIB_ERR_WRITE; if (close_wim_writable(wim, write_flags)) { ERROR_WITH_ERRNO("Failed to close the output WIM file"); - return WIMLIB_ERR_WRITE; + goto out; } - return 0; + ret = 0; +out: + free_integrity_table(old_integrity_table); + return ret; } #if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK) @@ -2530,14 +2593,15 @@ write_pipable_wim(WIMStruct *wim, int image, int write_flags, /* At this point, the header at the beginning of the file has already * been written. */ - /* For efficiency, when wimlib adds an image to the WIM with - * wimlib_add_image(), the SHA-1 message digests of files are not - * calculated; instead, they are calculated while the files are being - * written. However, this does not work when writing a pipable WIM, - * since when writing a blob to a pipable WIM, its SHA-1 message digest - * needs to be known before the blob data is written. Therefore, before - * getting much farther, we need to pre-calculate the SHA-1 message - * digests of all blobs that will be written. */ + /* + * For efficiency, wimlib normally delays calculating each newly added + * stream's hash until while that stream being written, or just before + * it is written. However, when writing a pipable WIM (potentially to a + * pipe), we first have to write the metadata resources, which contain + * all the hashes. Moreover each blob is prefixed with its hash (struct + * pwm_blob_hdr). Thus, we have to calculate all the hashes before + * writing anything. + */ ret = wim_checksum_unhashed_blobs(wim); if (ret) return ret; @@ -2573,6 +2637,25 @@ should_default_to_solid_compression(WIMStruct *wim, int write_flags) wim_has_solid_resources(wim); } +/* Update the images' filecount/bytecount stats (in the XML info) to take into + * account any recent modifications. */ +static int +update_image_stats(WIMStruct *wim) +{ + if (!wim_has_metadata(wim)) + return 0; + for (int i = 0; i < wim->hdr.image_count; i++) { + struct wim_image_metadata *imd = wim->image_metadata[i]; + if (imd->stats_outdated) { + int ret = xml_update_image_info(wim, i + 1); + if (ret) + return ret; + imd->stats_outdated = false; + } + } + return 0; +} + /* Write a standalone WIM or split WIM (SWM) part to a new file or to a file * descriptor. */ int @@ -2619,6 +2702,10 @@ write_wim_part(WIMStruct *wim, WIMLIB_WRITE_FLAG_NOT_PIPABLE)) return WIMLIB_ERR_INVALID_PARAM; + /* Only wimlib_overwrite() accepts UNSAFE_COMPACT. */ + if (write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT) + return WIMLIB_ERR_INVALID_PARAM; + /* Include an integrity table by default if no preference was given and * the WIM already had an integrity table. */ if (!(write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY | @@ -2716,6 +2803,11 @@ write_wim_part(WIMStruct *wim, wim->out_hdr.boot_idx = 1; } + /* Update image stats if needed. */ + ret = update_image_stats(wim); + if (ret) + return ret; + /* Set up the output file descriptor. */ if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR) { /* File descriptor was explicitly provided. */ @@ -2818,11 +2910,16 @@ wimlib_write_to_fd(WIMStruct *wim, int fd, return write_standalone_wim(wim, &fd, image, write_flags, num_threads); } +/* Have there been any changes to images in the specified WIM, including updates + * as well as deletions and additions of entire images, but excluding changes to + * the XML document? */ static bool -any_images_modified(WIMStruct *wim) +any_images_changed(WIMStruct *wim) { + if (wim->image_deletion_occurred) + return true; for (int i = 0; i < wim->hdr.image_count; i++) - if (wim->image_metadata[i]->modified) + if (!is_image_unchanged_from_wim(wim->image_metadata[i], wim)) return true; return false; } @@ -2862,6 +2959,20 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset) return 0; } +static int +free_blob_if_invalidated(struct blob_descriptor *blob, void *_wim) +{ + const WIMStruct *wim = _wim; + + if (!blob->will_be_in_output_wim && + blob->blob_location == BLOB_IN_WIM && blob->rdesc->wim == wim) + { + blob_table_unlink(wim->blob_table, blob); + free_blob_descriptor(blob); + } + return 0; +} + /* * Overwrite a WIM, possibly appending new resources to it. * @@ -2917,13 +3028,25 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset) * is that a small hole is left in the WIM where the old blob table, xml data, * and integrity table were. (These usually only take up a small amount of * space compared to the blobs, however.) + * + * Finally, this function also supports "compaction" overwrites as an + * alternative to the normal "append" overwrites described above. In a + * compaction, data is written starting immediately from the end of the header. + * All existing resources are written first, in order by file offset. New + * resources are written afterwards, and at the end any extra data is truncated + * from the file. The advantage of this approach is that is that the WIM file + * ends up fully optimized, without any holes remaining. The main disadavantage + * is that this operation is fundamentally unsafe and cannot be interrupted + * without data corruption. Consequently, compactions are only ever done when + * explicitly requested by the library user with the flag + * WIMLIB_WRITE_FLAG_UNSAFE_COMPACT. (Another disadvantage is that a compaction + * can be much slower than an append.) */ static int overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) { int ret; off_t old_wim_end; - u64 old_blob_table_end, old_xml_begin, old_xml_end; struct list_head blob_list; struct list_head blob_table_list; struct filter_context filter_ctx; @@ -2949,66 +3072,113 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) if (should_default_to_solid_compression(wim, write_flags)) write_flags |= WIMLIB_WRITE_FLAG_SOLID; - /* Set additional flags for overwrite. */ - write_flags |= WIMLIB_WRITE_FLAG_APPEND | - WIMLIB_WRITE_FLAG_STREAMS_OK; - - /* Make sure there is no data after the XML data, except possibily an - * integrity table. If this were the case, then this data would be - * overwritten. */ - old_xml_begin = wim->hdr.xml_data_reshdr.offset_in_wim; - old_xml_end = old_xml_begin + wim->hdr.xml_data_reshdr.size_in_wim; - old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim + - wim->hdr.blob_table_reshdr.size_in_wim; - if (wim_has_integrity_table(wim) && - wim->hdr.integrity_table_reshdr.offset_in_wim < old_xml_end) { - WARNING("Didn't expect the integrity table to be before the XML data"); - ret = WIMLIB_ERR_RESOURCE_ORDER; - goto out; - } + if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) { - if (old_blob_table_end > old_xml_begin) { - WARNING("Didn't expect the blob table to be after the XML data"); - ret = WIMLIB_ERR_RESOURCE_ORDER; - goto out; - } + /* In-place compaction */ - /* Set @old_wim_end, which indicates the point beyond which we don't - * allow any file and metadata resources to appear without returning - * WIMLIB_ERR_RESOURCE_ORDER (due to the fact that we would otherwise - * overwrite these resources). */ - if (!wim->image_deletion_occurred && !any_images_modified(wim)) { - /* If no images have been modified and no images have been - * deleted, a new blob table does not need to be written. We - * shall write the new XML data and optional integrity table - * immediately after the blob table. Note that this may - * overwrite an existing integrity table. */ - old_wim_end = old_blob_table_end; - write_flags |= WIMLIB_WRITE_FLAG_NO_NEW_BLOBS; - } else if (wim_has_integrity_table(wim)) { - /* Old WIM has an integrity table; begin writing new blobs after - * it. */ - old_wim_end = wim->hdr.integrity_table_reshdr.offset_in_wim + - wim->hdr.integrity_table_reshdr.size_in_wim; + WARNING("The WIM file \"%"TS"\" is being compacted in place.\n" + " Do *not* interrupt the operation, or else " + "the WIM file will be\n" + " corrupted!", wim->filename); + wim->being_compacted = 1; + old_wim_end = WIM_HEADER_DISK_SIZE; + + ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES, + write_flags, &blob_list, + &blob_table_list, &filter_ctx); + if (ret) + goto out; + + /* Prevent new files from being deduplicated with existing blobs + * in the WIM that we haven't decided to write. Such blobs will + * be overwritten during the compaction. */ + for_blob_in_table(wim->blob_table, free_blob_if_invalidated, wim); + + if (wim_has_metadata(wim)) { + /* Add existing metadata resources to be compacted along + * with the file resources. */ + for (int i = 0; i < wim->hdr.image_count; i++) { + struct wim_image_metadata *imd = wim->image_metadata[i]; + if (is_image_unchanged_from_wim(imd, wim)) { + fully_reference_blob_for_write(imd->metadata_blob, + &blob_list); + } + } + } } else { - /* No existing integrity table; begin writing new blobs after - * the old XML data. */ - old_wim_end = old_xml_end; - } + u64 old_blob_table_end, old_xml_begin, old_xml_end; + + /* Set additional flags for append. */ + write_flags |= WIMLIB_WRITE_FLAG_APPEND | + WIMLIB_WRITE_FLAG_STREAMS_OK; + + /* Make sure there is no data after the XML data, except + * possibily an integrity table. If this were the case, then + * this data would be overwritten. */ + old_xml_begin = wim->hdr.xml_data_reshdr.offset_in_wim; + old_xml_end = old_xml_begin + wim->hdr.xml_data_reshdr.size_in_wim; + if (wim->hdr.blob_table_reshdr.offset_in_wim == 0) + old_blob_table_end = WIM_HEADER_DISK_SIZE; + else + old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim + + wim->hdr.blob_table_reshdr.size_in_wim; + if (wim_has_integrity_table(wim) && + wim->hdr.integrity_table_reshdr.offset_in_wim < old_xml_end) { + WARNING("Didn't expect the integrity table to be " + "before the XML data"); + ret = WIMLIB_ERR_RESOURCE_ORDER; + goto out; + } - ret = check_resource_offsets(wim, old_wim_end); - if (ret) - goto out; + if (old_blob_table_end > old_xml_begin) { + WARNING("Didn't expect the blob table to be after " + "the XML data"); + ret = WIMLIB_ERR_RESOURCE_ORDER; + goto out; + } + /* Set @old_wim_end, which indicates the point beyond which we + * don't allow any file and metadata resources to appear without + * returning WIMLIB_ERR_RESOURCE_ORDER (due to the fact that we + * would otherwise overwrite these resources). */ + if (!any_images_changed(wim)) { + /* If no images have been modified, added, or deleted, + * then a new blob table does not need to be written. + * We shall write the new XML data and optional + * integrity table immediately after the blob table. + * Note that this may overwrite an existing integrity + * table. */ + old_wim_end = old_blob_table_end; + write_flags |= WIMLIB_WRITE_FLAG_NO_NEW_BLOBS; + } else if (wim_has_integrity_table(wim)) { + /* Old WIM has an integrity table; begin writing new + * blobs after it. */ + old_wim_end = wim->hdr.integrity_table_reshdr.offset_in_wim + + wim->hdr.integrity_table_reshdr.size_in_wim; + } else { + /* No existing integrity table; begin writing new blobs + * after the old XML data. */ + old_wim_end = old_xml_end; + } + + ret = check_resource_offsets(wim, old_wim_end); + if (ret) + goto out; - ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES, write_flags, - &blob_list, &blob_table_list, - &filter_ctx); + ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES, + write_flags, &blob_list, + &blob_table_list, &filter_ctx); + if (ret) + goto out; + + if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS) + wimlib_assert(list_empty(&blob_list)); + } + + /* Update image stats if needed. */ + ret = update_image_stats(wim); if (ret) goto out; - if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS) - wimlib_assert(list_empty(&blob_list)); - ret = open_wim_writable(wim, wim->filename, O_RDWR); if (ret) goto out; @@ -3050,12 +3220,12 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) return 0; out_truncate: - if (!(write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)) { + if (!(write_flags & (WIMLIB_WRITE_FLAG_NO_NEW_BLOBS | + WIMLIB_WRITE_FLAG_UNSAFE_COMPACT))) { WARNING("Truncating \"%"TS"\" to its original size " "(%"PRIu64" bytes)", wim->filename, old_wim_end); - /* Return value of ftruncate() is ignored because this is - * already an error path. */ - (void)ftruncate(wim->out_fd.fd, old_wim_end); + if (ftruncate(wim->out_fd.fd, old_wim_end)) + WARNING_WITH_ERRNO("Failed to truncate WIM file!"); } out_restore_hdr: (void)write_wim_header_flags(wim->hdr.flags, &wim->out_fd); @@ -3064,6 +3234,7 @@ out_unlock_wim: out_close_wim: (void)close_wim_writable(wim, write_flags); out: + wim->being_compacted = 0; return ret; } @@ -3078,7 +3249,7 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, unsigned num_threads) wim_name_len = tstrlen(wim->filename); tchar tmpfile[wim_name_len + 10]; tmemcpy(tmpfile, wim->filename, wim_name_len); - randomize_char_array_with_alnum(tmpfile + wim_name_len, 9); + get_random_alnum_chars(tmpfile + wim_name_len, 9); tmpfile[wim_name_len + 9] = T('\0'); ret = wimlib_write(wim, tmpfile, WIMLIB_ALL_IMAGES, @@ -3103,7 +3274,7 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, unsigned num_threads) if (ret) { ERROR_WITH_ERRNO("Failed to rename `%"TS"' to `%"TS"'", tmpfile, wim->filename); - #ifdef __WIN32__ + #ifdef _WIN32 if (ret < 0) #endif { @@ -3162,6 +3333,20 @@ wimlib_overwrite(WIMStruct *wim, int write_flags, unsigned num_threads) if (!wim->filename) return WIMLIB_ERR_NO_FILENAME; + if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) { + /* + * In UNSAFE_COMPACT mode: + * - RECOMPRESS is forbidden + * - REBUILD is ignored + * - SOFT_DELETE and NO_SOLID_SORT are implied + */ + if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS) + return WIMLIB_ERR_COMPACTION_NOT_POSSIBLE; + write_flags &= ~WIMLIB_WRITE_FLAG_REBUILD; + write_flags |= WIMLIB_WRITE_FLAG_SOFT_DELETE; + write_flags |= WIMLIB_WRITE_FLAG_NO_SOLID_SORT; + } + orig_hdr_flags = wim->hdr.flags; if (write_flags & WIMLIB_WRITE_FLAG_IGNORE_READONLY_FLAG) wim->hdr.flags &= ~WIM_HDR_FLAG_READONLY; @@ -3176,5 +3361,7 @@ wimlib_overwrite(WIMStruct *wim, int write_flags, unsigned num_threads) return ret; WARNING("Falling back to re-building entire WIM"); } + if (write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT) + return WIMLIB_ERR_COMPACTION_NOT_POSSIBLE; return overwrite_wim_via_tmpfile(wim, write_flags, num_threads); }