X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fwrite.c;h=159fd57da9f731195f4aa5ae4c985195d7df36ff;hp=1891c89b20685a0caf05b3ce3bb8cb6755c51566;hb=f3e97b29c4a8c564d54b0fd11cd43a9b4cd6a8ad;hpb=fc8276d0a3efb3df5f7512b3fa9499eb1b3449eb diff --git a/src/write.c b/src/write.c index 1891c89b..159fd57d 100644 --- a/src/write.c +++ b/src/write.c @@ -69,25 +69,6 @@ # define INVALID_HANDLE_VALUE ((HANDLE)(-1)) #endif -static int -fflush_and_ftruncate(FILE *fp, off_t size) -{ - int ret; - - ret = fflush(fp); - if (ret != 0) { - ERROR_WITH_ERRNO("Failed to flush data to output WIM file"); - return WIMLIB_ERR_WRITE; - } - ret = ftruncate(fileno(fp), size); - if (ret != 0) { - ERROR_WITH_ERRNO("Failed to truncate output WIM file to " - "%"PRIu64" bytes", size); - return WIMLIB_ERR_WRITE; - } - return 0; -} - /* Chunk table that's located at the beginning of each compressed resource in * the WIM. (This is not the on-disk format; the on-disk format just has an * array of offsets.) */ @@ -137,13 +118,14 @@ begin_wim_resource_chunk_tab(const struct wim_lookup_table_entry *lte, chunk_tab->table_disk_size) { ERROR_WITH_ERRNO("Failed to write chunk table in compressed " "file resource"); + FREE(chunk_tab); ret = WIMLIB_ERR_WRITE; goto out; } ret = 0; -out: *chunk_tab_ret = chunk_tab; +out: return ret; } @@ -269,29 +251,6 @@ finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab, return 0; } -static int -write_uncompressed_resource_and_truncate(struct wim_lookup_table_entry *lte, - FILE *out_fp, - off_t file_offset, - struct resource_entry *out_res_entry) -{ - int ret; - if (fseeko(out_fp, file_offset, SEEK_SET) != 0) { - ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of " - "output WIM file", file_offset); - return WIMLIB_ERR_WRITE; - } - ret = write_wim_resource(lte, out_fp, - WIMLIB_COMPRESSION_TYPE_NONE, - out_res_entry, - 0); - if (ret) - return ret; - - return fflush_and_ftruncate(out_fp, - file_offset + wim_resource_size(lte)); -} - struct write_resource_ctx { compress_func_t compress; struct chunk_table *chunk_tab; @@ -322,6 +281,30 @@ write_resource_cb(const void *chunk, size_t chunk_size, void *_ctx) } } +/* + * Write a resource to an output WIM. + * + * @lte: Lookup table entry for the resource, which could be in another WIM, + * in an external file, or in another location. + * + * @out_fp: FILE * opened to the output WIM. + * + * @out_ctype: One of the WIMLIB_COMPRESSION_TYPE_* constants to indicate + * which compression algorithm to use. + * + * @out_res_entry: On success, this is filled in with the offset, flags, + * compressed size, and uncompressed size of the resource + * in the output WIM. + * + * @flags: WIMLIB_RESOURCE_FLAG_RECOMPRESS to force data to be recompressed + * even if it could otherwise be copied directly from the input. + * + * Additional notes: The SHA1 message digest of the uncompressed data is + * calculated (except when doing a raw copy --- see below). If the @unhashed + * flag is set on the lookup table entry, this message digest is simply copied + * to it; otherwise, the message digest is compared with the existing one, and + * the function will fail if they do not match. + */ int write_wim_resource(struct wim_lookup_table_entry *lte, FILE *out_fp, int out_ctype, @@ -333,22 +316,29 @@ write_wim_resource(struct wim_lookup_table_entry *lte, off_t offset; int ret; + flags &= ~WIMLIB_RESOURCE_FLAG_RECOMPRESS; + if (wim_resource_size(lte) == 0) { /* Empty resource; nothing needs to be done, so just return * success. */ return 0; } + /* Get current position in output WIM */ offset = ftello(out_fp); if (offset == -1) { ERROR_WITH_ERRNO("Can't get position in output WIM"); return WIMLIB_ERR_WRITE; } - /* Can we simply copy the compressed data without recompressing it? */ - + /* If we are not forcing the data to be recompressed, and the input + * resource is located in a WIM with the same compression type as that + * desired other than no compression, we can simply copy the compressed + * data without recompressing it. This also means we must skip + * calculating the SHA1, as we never will see the uncompressed data. */ if (!(flags & WIMLIB_RESOURCE_FLAG_RECOMPRESS) && lte->resource_location == RESOURCE_IN_WIM && + out_ctype != WIMLIB_COMPRESSION_TYPE_NONE && wimlib_get_compression_type(lte->wim) == out_ctype) { flags |= WIMLIB_RESOURCE_FLAG_RAW; @@ -373,10 +363,12 @@ write_wim_resource(struct wim_lookup_table_entry *lte, return ret; } - /* Write the data */ + /* Write the entire resource by reading the entire resource and feeding + * the data through the write_resource_cb function. */ write_ctx.out_fp = out_fp; +try_write_again: ret = read_resource_prefix(lte, wim_resource_size(lte), - write_resource_cb, &write_ctx, 0); + write_resource_cb, &write_ctx, flags); /* Verify SHA1 message digest of the resource, or set the hash for the * first time. */ @@ -420,11 +412,23 @@ write_wim_resource(struct wim_lookup_table_entry *lte, if (new_size >= wim_resource_size(lte)) { /* Oops! We compressed the resource to larger than the original * size. Write the resource uncompressed instead. */ - ret = write_uncompressed_resource_and_truncate(lte, - out_fp, - offset, - out_res_entry); - goto out_free_chunk_tab; + if (fseeko(out_fp, offset, SEEK_SET) || + fflush(out_fp) || + ftruncate(fileno(out_fp), + offset + wim_resource_size(lte))) + { + ERROR_WITH_ERRNO("Failed to flush and/or truncate " + "output WIM file"); + ret = WIMLIB_ERR_WRITE; + goto out_free_chunk_tab; + } + DEBUG("Compressed %"PRIu64" => %"PRIu64" bytes; " + "writing uncompressed instead", + wim_resource_size(lte), new_size); + write_ctx.compress = NULL; + write_ctx.doing_sha = false; + out_ctype = WIMLIB_COMPRESSION_TYPE_NONE; + goto try_write_again; } out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED; } @@ -625,7 +629,7 @@ enum { }; static int -do_write_stream_list(struct list_head *my_resources, +do_write_stream_list(struct list_head *stream_list, struct wim_lookup_table *lookup_table, FILE *out_fp, int out_ctype, @@ -636,44 +640,86 @@ do_write_stream_list(struct list_head *my_resources, int ret; struct wim_lookup_table_entry *lte; - while (!list_empty(my_resources)) { - lte = container_of(my_resources->next, + /* For each stream in @stream_list ... */ + while (!list_empty(stream_list)) { + lte = container_of(stream_list->next, struct wim_lookup_table_entry, write_streams_list); list_del(<e->write_streams_list); if (lte->unhashed && !lte->unique_size) { + + /* Unhashed stream that shares a size with some other + * stream in the WIM we are writing. The stream must be + * checksummed to know if we need to write it or not. */ struct wim_lookup_table_entry *duplicate_lte; - struct wim_lookup_table_entry **my_ptr; + struct wim_lookup_table_entry **back_ptr; - my_ptr = lte->my_ptr; + /* back_ptr must be saved because it's in union with the + * SHA1 message digest and will no longer be valid once + * the SHA1 has been calculated. */ + back_ptr = lte->back_ptr; + + /* Checksum the stream */ ret = sha1_resource(lte); if (ret) return ret; + + /* Look for a duplicate stream */ duplicate_lte = __lookup_resource(lookup_table, lte->hash); if (duplicate_lte) { - bool new_stream = (duplicate_lte->out_refcnt == 0); + /* We have a duplicate stream. Transfer the + * reference counts from this stream to the + * duplicate, update the reference to this + * stream (in an inode or ads_entry) to point to + * the duplicate, then free this stream. */ + wimlib_assert(!(duplicate_lte->unhashed)); + bool is_new_stream = (duplicate_lte->out_refcnt == 0); duplicate_lte->refcnt += lte->refcnt; duplicate_lte->out_refcnt += lte->refcnt; - *my_ptr = duplicate_lte; + *back_ptr = duplicate_lte; + list_del(<e->unhashed_list); free_lookup_table_entry(lte); lte = duplicate_lte; - if (new_stream) { - DEBUG("Stream of length %"PRIu64" is duplicate " - "with one already in WIM", - wim_resource_size(lte)); + + if (is_new_stream) { + /* The duplicate stream is one we + * weren't already planning to write. + * But, now we must write it. + * + * XXX: Currently, the copy of the + * stream in the WIM is always chosen + * for writing, rather than the extra + * copy we just read (which may be in an + * external file). This may not always + * be fastest. */ } else { + /* We have already written, or are going + * to write, the duplicate stream. So + * just skip to the next stream. */ DEBUG("Discarding duplicate stream of length %"PRIu64, wim_resource_size(lte)); goto skip_to_progress; } } else { + /* No duplicate stream, so we need to insert + * this stream into the lookup table and treat + * it as a hashed stream. */ + list_del(<e->unhashed_list); lookup_table_insert(lookup_table, lte); lte->out_refcnt = lte->refcnt; lte->unhashed = 0; } } + /* Here, @lte either a hashed stream or an unhashed stream with + * a unique size. In either case we know that the stream has to + * be written. In either case the SHA1 message digest will be + * calculated over the stream while writing it; however, in the + * former case this is done merely to check the data, while in + * the latter case this is done because we do not have the SHA1 + * message digest yet. */ + wimlib_assert(lte->out_refcnt != 0); ret = write_wim_resource(lte, @@ -684,6 +730,7 @@ do_write_stream_list(struct list_head *my_resources, if (ret) return ret; if (lte->unhashed) { + list_del(<e->unhashed_list); lookup_table_insert(lookup_table, lte); lte->unhashed = 0; } @@ -704,12 +751,10 @@ write_stream_list_serial(struct list_head *stream_list, wimlib_progress_func_t progress_func, union wimlib_progress_info *progress) { - int write_resource_flags; - + int write_resource_flags = 0; if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS) - write_resource_flags = WIMLIB_RESOURCE_FLAG_RECOMPRESS; - else - write_resource_flags = 0; + write_resource_flags |= WIMLIB_RESOURCE_FLAG_RECOMPRESS; + progress->write_streams.num_threads = 1; if (progress_func) progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS, progress); @@ -903,16 +948,16 @@ main_writer_thread_proc(struct list_head *stream_list, } next_lte = container_of(next_resource, struct wim_lookup_table_entry, - staging_list); + write_streams_list); next_resource = next_resource->next; if ((!(write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS) && wim_resource_compression_type(next_lte) == out_ctype) || wim_resource_size(next_lte) == 0) { - list_add_tail(&next_lte->staging_list, + list_add_tail(&next_lte->write_streams_list, &my_resources); } else { - list_add_tail(&next_lte->staging_list, + list_add_tail(&next_lte->write_streams_list, &outstanding_resources); next_chunk = 0; next_num_chunks = wim_resource_chunks(next_lte); @@ -1094,15 +1139,15 @@ main_writer_thread_proc(struct list_head *stream_list, FREE(cur_chunk_tab); cur_chunk_tab = NULL; - struct list_head *next = cur_lte->staging_list.next; - list_del(&cur_lte->staging_list); + struct list_head *next = cur_lte->write_streams_list.next; + list_del(&cur_lte->write_streams_list); if (next == &outstanding_resources) cur_lte = NULL; else - cur_lte = container_of(cur_lte->staging_list.next, + cur_lte = container_of(cur_lte->write_streams_list.next, struct wim_lookup_table_entry, - staging_list); + write_streams_list); // Since we just finished writing a stream, // write any streams that have been added to the @@ -1398,8 +1443,8 @@ stream_size_table_insert(struct wim_lookup_table_entry *lte, void *_tab) struct lte_overwrite_prepare_args { WIMStruct *wim; off_t end_offset; - struct list_head *stream_list; - struct stream_size_table *stream_size_tab; + struct list_head stream_list; + struct stream_size_table stream_size_tab; }; static int @@ -1422,22 +1467,20 @@ lte_overwrite_prepare(struct wim_lookup_table_entry *lte, void *arg) return WIMLIB_ERR_RESOURCE_ORDER; } } else { - if (!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)) - list_add_tail(<e->write_streams_list, args->stream_list); + wimlib_assert(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)); + list_add_tail(<e->write_streams_list, &args->stream_list); } lte->out_refcnt = lte->refcnt; - stream_size_table_insert(lte, args->stream_size_tab); + stream_size_table_insert(lte, &args->stream_size_tab); return 0; } static int lte_set_output_res_entry(struct wim_lookup_table_entry *lte, void *_wim) { - if (lte->resource_location == RESOURCE_IN_WIM && - lte->wim == _wim) - { - memcpy(<e->output_resource_entry, <e->resource_entry, - sizeof(struct resource_entry)); + if (lte->resource_location == RESOURCE_IN_WIM && lte->wim == _wim) { + copy_resource_entry(<e->output_resource_entry, + <e->resource_entry); } return 0; } @@ -1460,19 +1503,16 @@ prepare_streams_for_overwrite(WIMStruct *wim, off_t end_offset, struct list_head *stream_list) { int ret; - struct stream_size_table stream_size_tab; - struct lte_overwrite_prepare_args args = { - .wim = wim, - .end_offset = end_offset, - .stream_list = stream_list, - .stream_size_tab = &stream_size_tab, - }; - - ret = init_stream_size_table(&stream_size_tab, 9001); + struct lte_overwrite_prepare_args args; + + args.wim = wim; + args.end_offset = end_offset; + ret = init_stream_size_table(&args.stream_size_tab, + wim->lookup_table->capacity); if (ret) return ret; - INIT_LIST_HEAD(stream_list); + INIT_LIST_HEAD(&args.stream_list); for (int i = 0; i < wim->hdr.image_count; i++) { struct wim_image_metadata *imd; struct wim_lookup_table_entry *lte; @@ -1492,10 +1532,11 @@ prepare_streams_for_overwrite(WIMStruct *wim, off_t end_offset, for (int i = 0; i < wim->hdr.image_count; i++) lte_set_output_res_entry(wim->image_metadata[i]->metadata_lte, wim); - ret = for_lookup_table_entry(wim->lookup_table, - lte_set_output_res_entry, wim); + for_lookup_table_entry(wim->lookup_table, lte_set_output_res_entry, wim); + INIT_LIST_HEAD(stream_list); + list_splice(&args.stream_list, stream_list); out_destroy_stream_size_table: - destroy_stream_size_table(&stream_size_tab); + destroy_stream_size_table(&args.stream_size_tab); return ret; } @@ -1505,7 +1546,7 @@ struct find_streams_ctx { struct stream_size_table stream_size_tab; }; -static int +static void inode_find_streams_to_write(struct wim_inode *inode, struct wim_lookup_table *table, struct list_head *stream_list, @@ -1523,7 +1564,6 @@ inode_find_streams_to_write(struct wim_inode *inode, lte->out_refcnt += inode->i_nlink; } } - return 0; } static int @@ -1540,7 +1580,7 @@ image_find_streams_to_write(WIMStruct *w) image_for_each_unhashed_stream(lte, imd) { lte->out_refcnt = 0; wimlib_assert(lte->unhashed); - wimlib_assert(lte->my_ptr != NULL); + wimlib_assert(lte->back_ptr != NULL); } /* Go through this image's inodes to find any streams that have not been @@ -1573,19 +1613,21 @@ prepare_stream_list(WIMStruct *wim, int image, struct list_head *stream_list) struct find_streams_ctx ctx; for_lookup_table_entry(wim->lookup_table, lte_zero_out_refcnt, NULL); - ret = init_stream_size_table(&ctx.stream_size_tab, 9001); + ret = init_stream_size_table(&ctx.stream_size_tab, + wim->lookup_table->capacity); if (ret) return ret; for_lookup_table_entry(wim->lookup_table, stream_size_table_insert, &ctx.stream_size_tab); INIT_LIST_HEAD(&ctx.stream_list); wim->private = &ctx; - for_image(wim, image, image_find_streams_to_write); + ret = for_image(wim, image, image_find_streams_to_write); destroy_stream_size_table(&ctx.stream_size_tab); - - INIT_LIST_HEAD(stream_list); - list_splice(&ctx.stream_list, stream_list); - return 0; + if (ret == 0) { + INIT_LIST_HEAD(stream_list); + list_splice(&ctx.stream_list, stream_list); + } + return ret; } /* Writes the streams for the specified @image in @wim to @wim->out_fp. @@ -1841,7 +1883,7 @@ begin_write(WIMStruct *w, const tchar *path, int write_flags) int ret; ret = open_wim_writable(w, path, true, (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) != 0); - if (ret != 0) + if (ret) return ret; /* Write dummy header. It will be overwritten later. */ return write_header(&w->hdr, w->out_fp); @@ -2031,7 +2073,8 @@ overwrite_wim_inplace(WIMStruct *w, int write_flags, w->lookup_table, w->out_fp, wimlib_get_compression_type(w), - write_flags, num_threads, + write_flags, + num_threads, progress_func); if (ret) goto out_ftruncate;