From 9fe06c98e9d52ae2f5fdfc31cf07cef10ffaf8c3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 3 Jan 2022 14:33:20 -0600 Subject: [PATCH] wimlib_iterate_dir_tree(): don't checksum unhashed blobs wimlib_iterate_dir_tree() on a modified-but-not-committed image is very slow because it checksums all unhashed blobs. This was originally implemented by commit 681faad85f73 ("wimlib_iterate_dir_tree(): checksum unhashed blobs"), presumably to make the sha1_hash field always valid. However, I can't remember a real use case for this. The current behavior is causing problems, so let's just revert it and update the documentation accordingly. Reported at https://wimlib.net/forums/viewtopic.php?f=1&t=572 --- include/wimlib.h | 24 ++++++++++++++---------- src/iterate_dir.c | 5 ----- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/include/wimlib.h b/include/wimlib.h index c032429d..f5c9e0e8 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -1399,20 +1399,23 @@ struct wimlib_wim_info { * sha1_hash. This case can only occur with wimlib_iterate_dir_tree(), never * wimlib_iterate_lookup_table(). * - * 2. Otherwise we know the sha1_hash, the uncompressed_size, the - * reference_count, and the is_metadata flag. In addition: + * 2. Otherwise we know the the uncompressed_size, the reference_count, and the + * is_metadata flag. In addition: * * A. If the blob is located in a non-solid WIM resource, then we also know - * the compressed_size and offset. + * the sha1_hash, compressed_size, and offset. * * B. If the blob is located in a solid WIM resource, then we also know the - * offset, raw_resource_offset_in_wim, raw_resource_compressed_size, and - * raw_resource_uncompressed_size. But the "offset" is actually the - * offset in the uncompressed solid resource rather than the offset from - * the beginning of the WIM file. + * sha1_hash, offset, raw_resource_offset_in_wim, + * raw_resource_compressed_size, and raw_resource_uncompressed_size. But + * the "offset" is actually the offset in the uncompressed solid resource + * rather than the offset from the beginning of the WIM file. * - * C. If the blob is *not* located in any type of WIM resource, then we don't - * know any additional information. + * C. If the blob is *not* located in any type of WIM resource, for example + * if it's in a external file that was scanned by wimlib_add_image(), then + * we usually won't know any more information. The sha1_hash might be + * known, and prior to wimlib v1.13.6 it always was; however, in wimlib + * v1.13.6 and later, the sha1_hash might not be known in this case. * * Unknown or irrelevant fields are left zeroed. */ @@ -1432,7 +1435,8 @@ struct wimlib_resource_entry { * of this blob within that solid resource when uncompressed. */ uint64_t offset; - /** The SHA-1 message digest of the blob's uncompressed contents. */ + /** If this blob is located in a WIM resource, then this is the SHA-1 + * message digest of the blob's uncompressed contents. */ uint8_t sha1_hash[20]; /** If this blob is located in a WIM resource, then this is the part diff --git a/src/iterate_dir.c b/src/iterate_dir.c index 940fc24b..b6fdb032 100644 --- a/src/iterate_dir.c +++ b/src/iterate_dir.c @@ -269,11 +269,6 @@ wimlib_iterate_dir_tree(WIMStruct *wim, int image, const tchar *_path, path = canonicalize_wim_path(_path); if (path == NULL) return WIMLIB_ERR_NOMEM; - - ret = wim_checksum_unhashed_blobs(wim); - if (ret) - return ret; - struct image_iterate_dir_tree_ctx ctx = { .path = path, .flags = flags, -- 2.43.0