From 7ce0d372fae285051cbc9740c9fa316d22465d9d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 30 Mar 2013 23:13:25 -0500 Subject: [PATCH] Fixes --- src/add_image.c | 3 +- src/delete_image.c | 5 +- src/dentry.c | 4 +- src/dentry.h | 9 +-- src/export_image.c | 14 ++++- src/extract_image.c | 6 +- src/join.c | 27 ++++---- src/lookup_table.c | 76 ++++++++++++++++++++++- src/lookup_table.h | 47 +++++++++----- src/mount_image.c | 79 +++++++++++++----------- src/ntfs-capture.c | 14 +++-- src/resource.c | 28 ++++++++- src/split.c | 5 ++ src/wim.c | 18 ++++++ src/wimlib_internal.h | 9 +++ src/win32.c | 15 +++-- src/write.c | 139 ++++++++++++------------------------------ 17 files changed, 303 insertions(+), 195 deletions(-) diff --git a/src/add_image.c b/src/add_image.c index 904f3545..e3f022ad 100644 --- a/src/add_image.c +++ b/src/add_image.c @@ -113,7 +113,8 @@ unix_capture_regular_file(const char *path, lte->file_on_disk = file_on_disk; lte->resource_location = RESOURCE_IN_FILE_ON_DISK; lte->resource_entry.original_size = size; - lookup_table_insert_unhashed(lookup_table, lte, &inode->i_lte); + lookup_table_insert_unhashed(lookup_table, lte, inode, 0); + inode->i_lte = lte; } return 0; } diff --git a/src/delete_image.c b/src/delete_image.c index c95819e9..14f8e7e7 100644 --- a/src/delete_image.c +++ b/src/delete_image.c @@ -69,9 +69,8 @@ wimlib_delete_image(WIMStruct *w, int image) put_image_metadata(w->image_metadata[image - 1], w->lookup_table); /* Get rid of the empty slot in the image metadata array. */ - memmove(w->image_metadata[image - 1], - &w->image_metadata[image], - (w->hdr.image_count - image) * sizeof(w->image_metadata[0])); + for (int i = image - 1; i < w->hdr.image_count - 1; i++) + w->image_metadata[i] = w->image_metadata[i + 1]; /* Decrement the image count. */ --w->hdr.image_count; diff --git a/src/dentry.c b/src/dentry.c index 16d9b416..7f0c1d06 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -692,8 +692,8 @@ new_timeless_inode() if (inode) { inode->i_security_id = -1; inode->i_nlink = 1; - #ifdef WITH_FUSE inode->i_next_stream_id = 1; + #ifdef WITH_FUSE if (pthread_mutex_init(&inode->i_mutex, NULL) != 0) { ERROR_WITH_ERRNO("Error initializing mutex"); FREE(inode); @@ -1025,9 +1025,7 @@ do_inode_add_ads(struct wim_inode *inode, const void *stream_name, new_entry = &inode->i_ads_entries[num_ads - 1]; if (init_ads_entry(new_entry, stream_name, stream_name_nbytes, is_utf16le)) return NULL; -#ifdef WITH_FUSE new_entry->stream_id = inode->i_next_stream_id++; -#endif inode->i_num_ads = num_ads; return new_entry; } diff --git a/src/dentry.h b/src/dentry.h index 299abc9f..5f7ccd5b 100644 --- a/src/dentry.h +++ b/src/dentry.h @@ -81,11 +81,9 @@ struct wim_ads_entry { /* Stream name (UTF-16LE) */ utf16lechar *stream_name; -#ifdef WITH_FUSE /* Number to identify an alternate data stream even after it's possibly * been moved or renamed. */ u32 stream_id; -#endif }; @@ -286,15 +284,14 @@ struct wim_inode { * noted in the @attributes field.) */ struct rb_root i_children; + /* Next alternate data stream ID to be assigned */ + u32 i_next_stream_id; + #ifdef WITH_FUSE /* wimfs file descriptors table for the inode */ u16 i_num_opened_fds; u16 i_num_allocated_fds; struct wimfs_fd **i_fds; - - /* Next alternate data stream ID to be assigned */ - u32 i_next_stream_id; - /* This mutex protects the inode's file descriptors table during * read-only mounts. Read-write mounts are still restricted to 1 * thread. */ diff --git a/src/export_image.c b/src/export_image.c index deeaddb4..34564e22 100644 --- a/src/export_image.c +++ b/src/export_image.c @@ -47,7 +47,7 @@ inode_allocate_needed_ltes(struct wim_inode *inode, dest_lte = clone_lookup_table_entry(src_lte); if (!dest_lte) return WIMLIB_ERR_NOMEM; - list_add_tail(&dest_lte->new_stream_list, + list_add_tail(&dest_lte->export_stream_list, lte_list_head); } } @@ -77,7 +77,7 @@ inode_move_ltes_to_table(struct wim_inode *inode, list_del(next); dest_lte = container_of(next, struct wim_lookup_table_entry, - new_stream_list); + export_stream_list); dest_lte->part_number = 1; dest_lte->refcnt = 0; wimlib_assert(hashes_equal(dest_lte->hash, src_lte->hash)); @@ -186,6 +186,13 @@ wimlib_export_image(WIMStruct *src_wim, if (ret) return ret; + ret = wim_checksum_unhashed_streams(src_wim); + if (ret) + return ret; + ret = wim_checksum_unhashed_streams(dest_wim); + if (ret) + return ret; + if (num_additional_swms) { ret = new_joined_lookup_table(src_wim, additional_swms, num_additional_swms, @@ -231,6 +238,7 @@ wimlib_export_image(WIMStruct *src_wim, /* The `struct image_metadata' is now referenced by both the @src_wim * and the @dest_wim. */ src_imd->refcnt++; + src_imd->modified = 1; /* All memory allocations have been taken care of, so it's no longer * possible for this function to fail. Go ahead and update the lookup @@ -251,7 +259,7 @@ out_xml_delete_image: out_free_ltes: { struct wim_lookup_table_entry *lte, *tmp; - list_for_each_entry_safe(lte, tmp, <e_list_head, new_stream_list) + list_for_each_entry_safe(lte, tmp, <e_list_head, export_stream_list) free_lookup_table_entry(lte); } out: diff --git a/src/extract_image.c b/src/extract_image.c index dff7dd4f..fa72edb8 100644 --- a/src/extract_image.c +++ b/src/extract_image.c @@ -1017,7 +1017,11 @@ wimlib_extract_image(WIMStruct *w, } ret = verify_swm_set(w, additional_swms, num_additional_swms); - if (ret != 0) + if (ret) + return ret; + + ret = wim_checksum_unhashed_streams(w); + if (ret) return ret; if (num_additional_swms) { diff --git a/src/join.c b/src/join.c index 0d0df255..b5c7c37b 100644 --- a/src/join.c +++ b/src/join.c @@ -109,10 +109,10 @@ join_wims(WIMStruct **swms, unsigned num_swms, } if (progress_func) { - progress.join.total_bytes = total_bytes; - progress.join.total_parts = swms[0]->hdr.total_parts; - progress.join.completed_bytes = 0; - progress.join.completed_parts = 0; + progress.join.total_bytes = total_bytes; + progress.join.total_parts = swms[0]->hdr.total_parts; + progress.join.completed_bytes = 0; + progress.join.completed_parts = 0; progress_func(WIMLIB_PROGRESS_MSG_JOIN_STREAMS, &progress); } @@ -157,7 +157,6 @@ join_wims(WIMStruct **swms, unsigned num_swms, } /* Write lookup table, XML data, and optional integrity table */ - joined_wim->hdr.image_count = swms[0]->hdr.image_count; for (i = 0; i < num_swms; i++) lookup_table_join(joined_wim->lookup_table, swms[i]->lookup_table); @@ -205,8 +204,8 @@ wimlib_join(const tchar * const *swm_names, for (i = 0; i < num_swms; i++) { ret = wimlib_open_wim(swm_names[i], swm_open_flags, &swms[i], progress_func); - if (ret != 0) - goto out; + if (ret) + goto out_free_wims; /* Don't open all the parts at the same time, in case there are * a lot of them */ @@ -217,20 +216,20 @@ wimlib_join(const tchar * const *swm_names, qsort(swms, num_swms, sizeof(swms[0]), cmp_swms_by_part_number); ret = verify_swm_set(swms[0], &swms[1], num_swms - 1); - if (ret != 0) - goto out; + if (ret) + goto out_free_wims; ret = wimlib_create_new_wim(wimlib_get_compression_type(swms[0]), &joined_wim); - if (ret != 0) - goto out; + if (ret) + goto out_free_wims; ret = begin_write(joined_wim, output_path, wim_write_flags); - if (ret != 0) - goto out; + if (ret) + goto out_free_wims; ret = join_wims(swms, num_swms, joined_wim, wim_write_flags, progress_func); -out: +out_free_wims: for (i = 0; i < num_swms; i++) wimlib_free(swms[i]); wimlib_free(joined_wim); diff --git a/src/lookup_table.c b/src/lookup_table.c index 8ab54b6e..0ebaf61e 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -88,8 +88,11 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) switch (new->resource_location) { #ifdef __WIN32__ case RESOURCE_WIN32: + case RESOURCE_WIN32_ENCRYPTED: #endif +#ifdef WITH_FUSE case RESOURCE_IN_STAGING_FILE: +#endif case RESOURCE_IN_FILE_ON_DISK: BUILD_BUG_ON((void*)&old->file_on_disk != (void*)&old->staging_file_name); @@ -323,8 +326,11 @@ for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, qsort(lte_array, num_streams, sizeof(lte_array[0]), cmp_streams_by_wim_position); ret = 0; - for (size_t i = 0; i < num_streams && ret == 0; i++) + for (size_t i = 0; i < num_streams; i++) { ret = visitor(lte_array[i], arg); + if (ret) + break; + } FREE(lte_array); return ret; } @@ -634,7 +640,8 @@ print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) tfprintf(out, T("Reference Count = %u\n"), lte->refcnt); if (lte->unhashed) { - tfprintf(out, T("(Unhashed, back ptr at %p)\n"), lte->back_ptr); + tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"), + lte->back_inode, lte->back_stream_id); } else { tfprintf(out, T("Hash = 0x")); print_hash(lte->hash, out); @@ -891,3 +898,68 @@ lookup_table_total_stream_size(struct wim_lookup_table *table) for_lookup_table_entry(table, lte_add_stream_size, &total_size); return total_size; } + +struct wim_lookup_table_entry ** +retrieve_lte_pointer(struct wim_lookup_table_entry *lte) +{ + wimlib_assert(lte->unhashed); + struct wim_inode *inode = lte->back_inode; + u32 stream_id = lte->back_stream_id; + if (stream_id == 0) + return &inode->i_lte; + else + for (u16 i = 0; i < inode->i_num_ads; i++) + if (inode->i_ads_entries[i].stream_id == stream_id) + return &inode->i_ads_entries[i].lte; + wimlib_assert(0); + return NULL; +} + +int +hash_unhashed_stream(struct wim_lookup_table_entry *lte, + struct wim_lookup_table *lookup_table, + struct wim_lookup_table_entry **lte_ret) +{ + int ret; + struct wim_lookup_table_entry *duplicate_lte; + struct wim_lookup_table_entry **back_ptr; + + wimlib_assert(lte->unhashed); + + /* back_ptr must be saved because @back_inode and @back_stream_id are in + * union with the SHA1 message digest and will no longer be valid once + * the SHA1 has been calculated. */ + back_ptr = retrieve_lte_pointer(lte); + + ret = sha1_resource(lte); + if (ret) + return ret; + + /* Look for a duplicate stream */ + duplicate_lte = __lookup_resource(lookup_table, lte->hash); + list_del(<e->unhashed_list); + if (duplicate_lte) { + /* We have a duplicate stream. Transfer the reference counts + * from this stream to the duplicate, update the reference to + * this stream (in an inode or ads_entry) to point to the + * duplicate, then free this stream. */ + wimlib_assert(!(duplicate_lte->unhashed)); + duplicate_lte->refcnt += lte->refcnt; + duplicate_lte->out_refcnt += lte->refcnt; + *back_ptr = duplicate_lte; + free_lookup_table_entry(lte); + lte = duplicate_lte; + } else { + /* No duplicate stream, so we need to insert + * this stream into the lookup table and treat + * it as a hashed stream. */ + list_del(<e->unhashed_list); + lookup_table_insert(lookup_table, lte); + lte->out_refcnt = lte->refcnt; + lte->unhashed = 0; + } + if (lte_ret) + *lte_ret = lte; + return 0; +} + diff --git a/src/lookup_table.h b/src/lookup_table.h index 81cb50a5..48ec965e 100644 --- a/src/lookup_table.h +++ b/src/lookup_table.h @@ -159,11 +159,14 @@ struct wim_lookup_table_entry { * table. */ size_t hash_short; - /* Unhashed entries only (unhashed == 1): this points directly - * to the pointer to this 'struct wim_lookup_table_entry' - * contained in a 'struct wim_ads_entry' or 'struct wim_inode'. - * */ - struct wim_lookup_table_entry **back_ptr; + /* Unhashed entries only (unhashed == 1): these variables make + * it possible to find the to the pointer to this 'struct + * wim_lookup_table_entry' contained in a 'struct wim_ads_entry' + * or 'struct wim_inode'. */ + struct { + struct wim_inode *back_inode; + u32 back_stream_id; + }; }; /* When a WIM file is written, out_refcnt starts at 0 and is incremented @@ -188,17 +191,12 @@ struct wim_lookup_table_entry { #endif }; - /* Pointer to inode that contains the opened file descriptors to - * this stream (valid iff resource_location == - * RESOURCE_IN_STAGING_FILE) */ - struct wim_inode *lte_inode; - u32 real_refcnt; union { - #ifdef WITH_FUSE + #ifdef WITH_FUSE u16 num_opened_fds; - #endif + #endif /* This field is used for the special hardlink or symlink image * extraction mode. In these mode, all identical files are linked @@ -225,6 +223,13 @@ struct wim_lookup_table_entry { struct list_head write_streams_list; }; + + #ifdef WITH_FUSE + /* Pointer to inode that contains the opened file descriptors to + * this stream (valid when resource_location == + * RESOURCE_IN_STAGING_FILE) */ + struct wim_inode *lte_inode; + #endif }; /* Temporary list fields */ @@ -232,7 +237,7 @@ struct wim_lookup_table_entry { struct list_head unhashed_list; struct list_head swm_stream_list; struct list_head extraction_list; - struct list_head new_stream_list; + struct list_head export_stream_list; }; }; @@ -300,6 +305,7 @@ lookup_table_insert(struct wim_lookup_table *table, struct wim_lookup_table_entr static inline void lookup_table_unlink(struct wim_lookup_table *table, struct wim_lookup_table_entry *lte) { + wimlib_assert(!lte->unhashed); hlist_del(<e->hash_list); wimlib_assert(table->num_entries != 0); table->num_entries--; @@ -485,12 +491,21 @@ lookup_table_total_stream_size(struct wim_lookup_table *table); static inline void lookup_table_insert_unhashed(struct wim_lookup_table *table, struct wim_lookup_table_entry *lte, - struct wim_lookup_table_entry **back_ptr) + struct wim_inode *back_inode, + u32 back_stream_id) { lte->unhashed = 1; + lte->back_inode = back_inode; + lte->back_stream_id = back_stream_id; list_add_tail(<e->unhashed_list, table->unhashed_streams); - lte->back_ptr = back_ptr; - *back_ptr = lte; } +extern int +hash_unhashed_stream(struct wim_lookup_table_entry *lte, + struct wim_lookup_table *lookup_table, + struct wim_lookup_table_entry **lte_ret); + +extern struct wim_lookup_table_entry ** +retrieve_lte_pointer(struct wim_lookup_table_entry *lte); + #endif diff --git a/src/mount_image.c b/src/mount_image.c index 82b80b9a..d0b5d2c8 100644 --- a/src/mount_image.c +++ b/src/mount_image.c @@ -642,26 +642,14 @@ extract_resource_to_staging_dir(struct wim_inode *inode, } } - new_lte->refcnt = inode->i_nlink; - new_lte->resource_location = RESOURCE_IN_STAGING_FILE; - new_lte->staging_file_name = staging_file_name; - new_lte->lte_inode = inode; - - struct wim_lookup_table_entry **back_ptr; - - if (stream_id == 0) { - back_ptr = &inode->i_lte; - } else { - for (u16 i = 0; ; i++) { - wimlib_assert(i < inode->i_num_ads); - if (inode->i_ads_entries[i].stream_id == stream_id) { - back_ptr = &inode->i_ads_entries[i].lte; - break; - } - } - } - - lookup_table_insert_unhashed(ctx->wim->lookup_table, new_lte, back_ptr); + new_lte->refcnt = inode->i_nlink; + new_lte->resource_location = RESOURCE_IN_STAGING_FILE; + new_lte->staging_file_name = staging_file_name; + new_lte->lte_inode = inode; + new_lte->resource_entry.original_size = size; + + lookup_table_insert_unhashed(ctx->wim->lookup_table, new_lte, + inode, stream_id); *lte = new_lte; return 0; out_revert_fd_changes: @@ -802,21 +790,26 @@ rebuild_wim(struct wimfs_context *ctx, int write_flags, wimlib_progress_func_t progress_func) { int ret; - struct wim_lookup_table_entry *lte; + struct wim_lookup_table_entry *lte, *tmp; WIMStruct *w = ctx->wim; struct wim_image_metadata *imd = wim_get_current_image_metadata(ctx->wim); DEBUG("Closing all staging file descriptors."); - image_for_each_unhashed_stream(lte, imd) { + image_for_each_unhashed_stream_safe(lte, tmp, imd) { ret = inode_close_fds(lte->lte_inode); if (ret) return ret; } DEBUG("Freeing entries for zero-length streams"); - image_for_each_unhashed_stream(lte, imd) { + image_for_each_unhashed_stream_safe(lte, tmp, imd) { + wimlib_assert(lte->unhashed); if (wim_resource_size(lte) == 0) { - *lte->back_ptr = NULL; + print_lookup_table_entry(lte, stderr); + struct wim_lookup_table_entry **back_ptr; + back_ptr = retrieve_lte_pointer(lte); + *back_ptr = NULL; + list_del(<e->unhashed_list); free_lookup_table_entry(lte); } } @@ -1813,6 +1806,7 @@ wimfs_open(const char *path, struct fuse_file_info *fi) u16 stream_idx; u32 stream_id; struct wimfs_context *ctx = wimfs_get_context(); + struct wim_lookup_table_entry **back_ptr; ret = lookup_resource(ctx->wim, path, get_lookup_flags(ctx), &dentry, <e, &stream_idx); @@ -1821,10 +1815,13 @@ wimfs_open(const char *path, struct fuse_file_info *fi) inode = dentry->d_inode; - if (stream_idx == 0) + if (stream_idx == 0) { stream_id = 0; - else + back_ptr = &inode->i_lte; + } else { stream_id = inode->i_ads_entries[stream_idx - 1].stream_id; + back_ptr = &inode->i_ads_entries[stream_idx - 1].lte; + } /* The file resource may be in the staging directory (read-write mounts * only) or in the WIM. If it's in the staging directory, we need to @@ -1840,7 +1837,9 @@ wimfs_open(const char *path, struct fuse_file_info *fi) <e, size, ctx); if (ret != 0) return ret; + *back_ptr = lte; } + print_lookup_table_entry(lte, stderr); ret = alloc_wimfs_fd(inode, stream_id, lte, &fd, wimfs_ctx_readonly(ctx)); @@ -2209,24 +2208,23 @@ wimfs_truncate(const char *path, off_t size) if (lte == NULL && size == 0) return 0; - inode = dentry->d_inode; - if (stream_idx == 0) - stream_id = 0; - else - stream_id = inode->i_ads_entries[stream_idx - 1].stream_id; - if (lte->resource_location == RESOURCE_IN_STAGING_FILE) { ret = truncate(lte->staging_file_name, size); - if (ret != 0) + if (ret) ret = -errno; + else + lte->resource_entry.original_size = size; } else { /* File in WIM. Extract it to the staging directory, but only * the first @size bytes of it. */ + inode = dentry->d_inode; + if (stream_idx == 0) + stream_id = 0; + else + stream_id = inode->i_ads_entries[stream_idx - 1].stream_id; ret = extract_resource_to_staging_dir(inode, stream_id, <e, size, ctx); } - if (ret == 0) - lte->resource_entry.original_size = size; return ret; } @@ -2329,8 +2327,12 @@ wimfs_write(const char *path, const char *buf, size_t size, return -errno; /* Update file size */ - if (offset + size > fd->f_lte->resource_entry.original_size) + if (offset + size > fd->f_lte->resource_entry.original_size) { + DEBUG("Update file size %"PRIu64 " => %"PRIu64"", + fd->f_lte->resource_entry.original_size, + offset + size); fd->f_lte->resource_entry.original_size = offset + size; + } /* Update timestamps */ touch_inode(fd->f_inode); @@ -2444,6 +2446,10 @@ wimlib_mount_image(WIMStruct *wim, int image, const char *dir, goto out; } + ret = wim_checksum_unhashed_streams(wim); + if (ret) + goto out; + ret = select_wim_image(wim, image); if (ret) goto out; @@ -2486,6 +2492,7 @@ wimlib_mount_image(WIMStruct *wim, int image, const char *dir, ctx.image_inode_list = &imd->inode_list; ctx.default_uid = getuid(); ctx.default_gid = getgid(); + wimlib_assert(list_empty(&imd->unhashed_streams)); ctx.wim->lookup_table->unhashed_streams = &imd->unhashed_streams; if (mount_flags & WIMLIB_MOUNT_FLAG_STREAM_INTERFACE_WINDOWS) ctx.default_lookup_flags = LOOKUP_FLAG_ADS_OK; diff --git a/src/ntfs-capture.c b/src/ntfs-capture.c index 7f4f2bb4..327040b3 100644 --- a/src/ntfs-capture.c +++ b/src/ntfs-capture.c @@ -165,7 +165,8 @@ capture_ntfs_streams(struct wim_inode *inode, { u64 data_size = ntfs_get_attribute_value_length(actx->attr); u64 name_length = actx->attr->name_length; - struct wim_lookup_table_entry **back_ptr; + u32 stream_id; + if (data_size == 0) { if (errno != 0) { ERROR_WITH_ERRNO("Failed to get size of attribute of " @@ -224,7 +225,8 @@ capture_ntfs_streams(struct wim_inode *inode, ret = WIMLIB_ERR_NTFS_3G; goto out_free_lte; } - back_ptr = &inode->i_lte; + stream_id = 0; + inode->i_lte = lte; } else { /* Named data stream. Put the reference to it in the * alternate data stream entries */ @@ -236,9 +238,13 @@ capture_ntfs_streams(struct wim_inode *inode, if (!new_ads_entry) goto out_free_lte; wimlib_assert(new_ads_entry->stream_name_nbytes == name_length * 2); - back_ptr = &new_ads_entry->lte; + stream_id = new_ads_entry->stream_id; + new_ads_entry->lte = lte; + } + if (lte) { + lookup_table_insert_unhashed(lookup_table, lte, + inode, stream_id); } - lookup_table_insert_unhashed(lookup_table, lte, back_ptr); } ret = 0; goto out_put_actx; diff --git a/src/resource.c b/src/resource.c index 5237b88a..dc5b7a80 100644 --- a/src/resource.c +++ b/src/resource.c @@ -593,7 +593,11 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte, } goto out_release_fp; read_error: - ERROR_WITH_ERRNO("Error reading data from WIM"); + if (ferror(wim_fp)) { + ERROR_WITH_ERRNO("Error reading data from WIM"); + } else { + ERROR("Unexpected EOF in WIM!"); + } ret = WIMLIB_ERR_READ; out_release_fp: if (flags & WIMLIB_RESOURCE_FLAG_THREADSAFE_READ) @@ -838,6 +842,28 @@ extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte, return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, &fd); } + +static int +sha1_chunk(const void *buf, size_t len, void *ctx) +{ + sha1_update(ctx, buf, len); + return 0; +} + +int +sha1_resource(struct wim_lookup_table_entry *lte) +{ + int ret; + SHA_CTX sha_ctx; + + sha1_init(&sha_ctx); + ret = read_resource_prefix(lte, wim_resource_size(lte), + sha1_chunk, &sha_ctx, 0); + if (ret == 0) + sha1_final(lte->hash, &sha_ctx); + return ret; +} + /* * Copies the file resource specified by the lookup table entry @lte from the * input WIM to the output WIM that has its FILE * given by diff --git a/src/split.c b/src/split.c index a24ac4b4..6a2a0b49 100644 --- a/src/split.c +++ b/src/split.c @@ -145,6 +145,10 @@ wimlib_split(WIMStruct *w, const tchar *swm_name, write_flags &= WIMLIB_WRITE_MASK_PUBLIC; + ret = wim_checksum_unhashed_streams(w); + if (ret) + return ret; + swm_name_len = tstrlen(swm_name); tchar swm_base_name[swm_name_len + 20]; @@ -190,6 +194,7 @@ wimlib_split(WIMStruct *w, const tchar *swm_name, for (int i = 0; i < w->hdr.image_count; i++) { struct wim_lookup_table_entry *metadata_lte; metadata_lte = w->image_metadata[i]->metadata_lte; + print_lookup_table_entry(metadata_lte, stderr); ret = copy_resource(metadata_lte, w); if (ret) goto out; diff --git a/src/wim.c b/src/wim.c index 4060fa58..8f579f2c 100644 --- a/src/wim.c +++ b/src/wim.c @@ -616,6 +616,24 @@ new_image_metadata_array(unsigned num_images) return imd_array; } +int +wim_checksum_unhashed_streams(WIMStruct *w) +{ + int ret; + for (int i = 0; i < w->hdr.image_count; i++) { + struct wim_lookup_table_entry *lte, *tmp; + list_for_each_entry_safe(lte, tmp, + &w->image_metadata[i]->unhashed_streams, + unhashed_list) + { + ret = hash_unhashed_stream(lte, w->lookup_table, NULL); + if (ret) + return ret; + } + } + return 0; +} + /* Frees the memory for the WIMStruct, including all internal memory; also * closes all files associated with the WIMStruct. */ WIMLIBAPI void diff --git a/src/wimlib_internal.h b/src/wimlib_internal.h index a7ac5dc2..f07935c1 100644 --- a/src/wimlib_internal.h +++ b/src/wimlib_internal.h @@ -369,6 +369,9 @@ resource_is_compressed(const struct resource_entry *entry) #define image_for_each_unhashed_stream(lte, imd) \ list_for_each_entry(lte, &imd->unhashed_streams, unhashed_list) +#define image_for_each_unhashed_stream_safe(lte, tmp, imd) \ + list_for_each_entry_safe(lte, tmp, &imd->unhashed_streams, unhashed_list) + #if 1 # define copy_resource_entry(dst, src) memcpy(dst, src, sizeof(struct resource_entry)) #else @@ -593,6 +596,9 @@ extern int extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte, int fd, u64 size); +extern int +sha1_resource(struct wim_lookup_table_entry *lte); + extern int copy_resource(struct wim_lookup_table_entry *lte, void *w); @@ -660,6 +666,9 @@ new_image_metadata(); extern struct wim_image_metadata ** new_image_metadata_array(unsigned num_images); +extern int +wim_checksum_unhashed_streams(WIMStruct *w); + /* write.c */ /* Internal use only */ diff --git a/src/win32.c b/src/win32.c index e8ef17a1..f2e7eeb0 100644 --- a/src/win32.c +++ b/src/win32.c @@ -586,13 +586,16 @@ win32_capture_stream(const wchar_t *path, lte->resource_location = RESOURCE_WIN32; lte->resource_entry.original_size = (u64)dat->StreamSize.QuadPart; - struct wim_lookup_table_entry **back_ptr; - if (is_named_stream) - back_ptr = &ads_entry->lte; - else - back_ptr = &inode->i_lte; + u32 stream_id; + if (is_named_stream) { + stream_id = ads_entry->stream_id; + ads_entry->lte = lte; + } else { + stream_id = 0; + inode->i_lte = lte; + } - lookup_table_insert_unhashed(lookup_table, lte, back_ptr); + lookup_table_insert_unhashed(lookup_table, lte, inode, stream_id); out_free_spath: FREE(spath); out: diff --git a/src/write.c b/src/write.c index 159fd57d..ba3b3158 100644 --- a/src/write.c +++ b/src/write.c @@ -312,6 +312,7 @@ write_wim_resource(struct wim_lookup_table_entry *lte, int flags) { struct write_resource_ctx write_ctx; + u64 read_size; u64 new_size; off_t offset; int ret; @@ -343,9 +344,11 @@ write_wim_resource(struct wim_lookup_table_entry *lte, { flags |= WIMLIB_RESOURCE_FLAG_RAW; write_ctx.doing_sha = false; + read_size = lte->resource_entry.size; } else { write_ctx.doing_sha = true; sha1_init(&write_ctx.sha_ctx); + read_size = lte->resource_entry.original_size; } /* Initialize the chunk table and set the compression function if @@ -367,8 +370,10 @@ write_wim_resource(struct wim_lookup_table_entry *lte, * the data through the write_resource_cb function. */ write_ctx.out_fp = out_fp; try_write_again: - ret = read_resource_prefix(lte, wim_resource_size(lte), + ret = read_resource_prefix(lte, read_size, write_resource_cb, &write_ctx, flags); + if (ret) + goto out_free_chunk_tab; /* Verify SHA1 message digest of the resource, or set the hash for the * first time. */ @@ -602,27 +607,6 @@ do_write_streams_progress(union wimlib_progress_info *progress, } } -static int -sha1_chunk(const void *buf, size_t len, void *ctx) -{ - sha1_update(ctx, buf, len); - return 0; -} - -static int -sha1_resource(struct wim_lookup_table_entry *lte) -{ - int ret; - SHA_CTX sha_ctx; - - sha1_init(&sha_ctx); - ret = read_resource_prefix(lte, wim_resource_size(lte), - sha1_chunk, &sha_ctx, 0); - if (ret == 0) - sha1_final(lte->hash, &sha_ctx); - return ret; -} - enum { STREAMS_MERGED = 0, STREAMS_NOT_MERGED = 1, @@ -647,52 +631,21 @@ do_write_stream_list(struct list_head *stream_list, write_streams_list); list_del(<e->write_streams_list); if (lte->unhashed && !lte->unique_size) { - /* Unhashed stream that shares a size with some other * stream in the WIM we are writing. The stream must be * checksummed to know if we need to write it or not. */ - struct wim_lookup_table_entry *duplicate_lte; - struct wim_lookup_table_entry **back_ptr; + struct wim_lookup_table_entry *tmp; + u32 orig_refcnt = lte->out_refcnt; - /* back_ptr must be saved because it's in union with the - * SHA1 message digest and will no longer be valid once - * the SHA1 has been calculated. */ - back_ptr = lte->back_ptr; - - /* Checksum the stream */ - ret = sha1_resource(lte); + ret = hash_unhashed_stream(lte, + lookup_table, + &tmp); if (ret) return ret; - - /* Look for a duplicate stream */ - duplicate_lte = __lookup_resource(lookup_table, lte->hash); - if (duplicate_lte) { - /* We have a duplicate stream. Transfer the - * reference counts from this stream to the - * duplicate, update the reference to this - * stream (in an inode or ads_entry) to point to - * the duplicate, then free this stream. */ - wimlib_assert(!(duplicate_lte->unhashed)); - bool is_new_stream = (duplicate_lte->out_refcnt == 0); - duplicate_lte->refcnt += lte->refcnt; - duplicate_lte->out_refcnt += lte->refcnt; - *back_ptr = duplicate_lte; - list_del(<e->unhashed_list); - free_lookup_table_entry(lte); - lte = duplicate_lte; - - if (is_new_stream) { - /* The duplicate stream is one we - * weren't already planning to write. - * But, now we must write it. - * - * XXX: Currently, the copy of the - * stream in the WIM is always chosen - * for writing, rather than the extra - * copy we just read (which may be in an - * external file). This may not always - * be fastest. */ - } else { + if (tmp != lte) { + lte = tmp; + /* We found a duplicate stream. */ + if (orig_refcnt != tmp->out_refcnt) { /* We have already written, or are going * to write, the duplicate stream. So * just skip to the next stream. */ @@ -700,28 +653,17 @@ do_write_stream_list(struct list_head *stream_list, wim_resource_size(lte)); goto skip_to_progress; } - - } else { - /* No duplicate stream, so we need to insert - * this stream into the lookup table and treat - * it as a hashed stream. */ - list_del(<e->unhashed_list); - lookup_table_insert(lookup_table, lte); - lte->out_refcnt = lte->refcnt; - lte->unhashed = 0; } } - /* Here, @lte either a hashed stream or an unhashed stream with - * a unique size. In either case we know that the stream has to - * be written. In either case the SHA1 message digest will be - * calculated over the stream while writing it; however, in the - * former case this is done merely to check the data, while in - * the latter case this is done because we do not have the SHA1 - * message digest yet. */ - + /* Here, @lte is either a hashed stream or an unhashed stream + * with a unique size. In either case we know that the stream + * has to be written. In either case the SHA1 message digest + * will be calculated over the stream while writing it; however, + * in the former case this is done merely to check the data, + * while in the latter case this is done because we do not have + * the SHA1 message digest yet. */ wimlib_assert(lte->out_refcnt != 0); - ret = write_wim_resource(lte, out_fp, out_ctype, @@ -1577,11 +1519,8 @@ image_find_streams_to_write(WIMStruct *w) ctx = w->private; imd = wim_get_current_image_metadata(w); - image_for_each_unhashed_stream(lte, imd) { + image_for_each_unhashed_stream(lte, imd) lte->out_refcnt = 0; - wimlib_assert(lte->unhashed); - wimlib_assert(lte->back_ptr != NULL); - } /* Go through this image's inodes to find any streams that have not been * found yet. */ @@ -1697,43 +1636,43 @@ finish_write(WIMStruct *w, int image, int write_flags, if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) { ret = write_lookup_table(w, image, &hdr.lookup_table_res_entry); - if (ret != 0) - goto out; + if (ret) + goto out_close_wim; } ret = write_xml_data(w->wim_info, image, out, (write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE) ? wim_info_get_total_bytes(w->wim_info) : 0, &hdr.xml_res_entry); - if (ret != 0) - goto out; + if (ret) + goto out_close_wim; if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) { if (write_flags & WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) { struct wim_header checkpoint_hdr; memcpy(&checkpoint_hdr, &hdr, sizeof(struct wim_header)); memset(&checkpoint_hdr.integrity, 0, sizeof(struct resource_entry)); - if (fseeko(out, 0, SEEK_SET) != 0) { + if (fseeko(out, 0, SEEK_SET)) { ERROR_WITH_ERRNO("Failed to seek to beginning " "of WIM being written"); ret = WIMLIB_ERR_WRITE; - goto out; + goto out_close_wim; } ret = write_header(&checkpoint_hdr, out); - if (ret != 0) - goto out; + if (ret) + goto out_close_wim; if (fflush(out) != 0) { ERROR_WITH_ERRNO("Can't write data to WIM"); ret = WIMLIB_ERR_WRITE; - goto out; + goto out_close_wim; } if (fseeko(out, 0, SEEK_END) != 0) { ERROR_WITH_ERRNO("Failed to seek to end " "of WIM being written"); ret = WIMLIB_ERR_WRITE; - goto out; + goto out_close_wim; } } @@ -1753,8 +1692,8 @@ finish_write(WIMStruct *w, int image, int write_flags, new_lookup_table_end, old_lookup_table_end, progress_func); - if (ret != 0) - goto out; + if (ret) + goto out_close_wim; } else { memset(&hdr.integrity, 0, sizeof(struct resource_entry)); } @@ -1790,12 +1729,12 @@ finish_write(WIMStruct *w, int image, int write_flags, ERROR_WITH_ERRNO("Failed to seek to beginning of WIM " "being written"); ret = WIMLIB_ERR_WRITE; - goto out; + goto out_close_wim; } ret = write_header(&hdr, out); if (ret) - goto out; + goto out_close_wim; if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) { if (fflush(out) != 0 @@ -1805,7 +1744,7 @@ finish_write(WIMStruct *w, int image, int write_flags, ret = WIMLIB_ERR_WRITE; } } -out: +out_close_wim: if (fclose(out) != 0) { ERROR_WITH_ERRNO("Failed to close the WIM file"); if (ret == 0) @@ -2038,6 +1977,8 @@ overwrite_wim_inplace(WIMStruct *w, int write_flags, if (!w->deletion_occurred && !any_images_modified(w)) { /* If no images have been modified and no images have been * deleted, a new lookup table does not need to be written. */ + DEBUG("Skipping writing lookup table " + "(no images modified or deleted)"); old_wim_end = w->hdr.lookup_table_res_entry.offset + w->hdr.lookup_table_res_entry.size; write_flags |= WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE | -- 2.43.0