/* * blob_table.c * * A blob table maps SHA-1 message digests to "blobs", which are nonempty * sequences of binary data. Within a WIM file, blobs are single-instanced. * * This file also contains code to read and write the corresponding on-disk * representation of this table in the WIM file format. */ /* * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free * Software Foundation; either version 3 of the License, or (at your option) any * later version. * * This file is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public License * along with this file; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #include #include #include /* for unlink() */ #include "wimlib/assert.h" #include "wimlib/blob_table.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/metadata.h" #include "wimlib/ntfs_3g.h" #include "wimlib/resource.h" #include "wimlib/unaligned.h" #include "wimlib/util.h" #include "wimlib/write.h" /* A hash table mapping SHA-1 message digests to blob descriptors */ struct blob_table { struct hlist_head *array; size_t num_blobs; size_t capacity; }; struct blob_table * new_blob_table(size_t capacity) { struct blob_table *table; struct hlist_head *array; table = MALLOC(sizeof(struct blob_table)); if (table == NULL) goto oom; array = CALLOC(capacity, sizeof(array[0])); if (array == NULL) { FREE(table); goto oom; } table->num_blobs = 0; table->capacity = capacity; table->array = array; return table; oom: ERROR("Failed to allocate memory for blob table " "with capacity %zu", capacity); return NULL; } static int do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore) { free_blob_descriptor(blob); return 0; } void free_blob_table(struct blob_table *table) { if (table) { for_blob_in_table(table, do_free_blob_descriptor, NULL); FREE(table->array); FREE(table); } } struct blob_descriptor * new_blob_descriptor(void) { BUILD_BUG_ON(BLOB_NONEXISTENT != 0); return CALLOC(1, sizeof(struct blob_descriptor)); } struct blob_descriptor * clone_blob_descriptor(const struct blob_descriptor *old) { struct blob_descriptor *new; new = memdup(old, sizeof(struct blob_descriptor)); if (new == NULL) return NULL; switch (new->blob_location) { case BLOB_IN_WIM: list_add(&new->rdesc_node, &new->rdesc->blob_list); break; case BLOB_IN_FILE_ON_DISK: #ifdef __WIN32__ case BLOB_IN_WINNT_FILE_ON_DISK: case BLOB_WIN32_ENCRYPTED: #endif #ifdef WITH_FUSE case BLOB_IN_STAGING_FILE: BUILD_BUG_ON((void*)&old->file_on_disk != (void*)&old->staging_file_name); #endif new->file_on_disk = TSTRDUP(old->file_on_disk); if (new->file_on_disk == NULL) goto out_free; break; case BLOB_IN_ATTACHED_BUFFER: new->attached_buffer = memdup(old->attached_buffer, old->size); if (new->attached_buffer == NULL) goto out_free; break; #ifdef WITH_NTFS_3G case BLOB_IN_NTFS_VOLUME: new->ntfs_loc = clone_ntfs_location(old->ntfs_loc); if (!new->ntfs_loc) goto out_free; break; #endif } return new; out_free: free_blob_descriptor(new); return NULL; } static void blob_release_location(struct blob_descriptor *blob) { switch (blob->blob_location) { case BLOB_IN_WIM: list_del(&blob->rdesc_node); if (list_empty(&blob->rdesc->blob_list)) FREE(blob->rdesc); break; case BLOB_IN_FILE_ON_DISK: #ifdef __WIN32__ case BLOB_IN_WINNT_FILE_ON_DISK: case BLOB_WIN32_ENCRYPTED: #endif #ifdef WITH_FUSE case BLOB_IN_STAGING_FILE: BUILD_BUG_ON((void*)&blob->file_on_disk != (void*)&blob->staging_file_name); #endif case BLOB_IN_ATTACHED_BUFFER: BUILD_BUG_ON((void*)&blob->file_on_disk != (void*)&blob->attached_buffer); FREE(blob->file_on_disk); break; #ifdef WITH_NTFS_3G case BLOB_IN_NTFS_VOLUME: if (blob->ntfs_loc) free_ntfs_location(blob->ntfs_loc); break; #endif } } void free_blob_descriptor(struct blob_descriptor *blob) { if (blob) { blob_release_location(blob); FREE(blob); } } /* Should this blob be retained even if it has no references? */ static bool should_retain_blob(const struct blob_descriptor *blob) { return blob->blob_location == BLOB_IN_WIM; } static void finalize_blob(struct blob_descriptor *blob) { if (!should_retain_blob(blob)) free_blob_descriptor(blob); } /* * Decrements the reference count of the specified blob, which must be either * (a) unhashed, or (b) inserted in the specified blob table. * * If the blob's reference count reaches 0, we may unlink it from @table and * free it. However, we retain blobs with 0 reference count that originated * from WIM files (BLOB_IN_WIM). We do this for two reasons: * * 1. This prevents information about valid blobs in a WIM file --- blobs which * will continue to be present after appending to the WIM file --- from being * lost merely because we dropped all references to them. * * 2. Blob reference counts we read from WIM files can't be trusted. It's * possible that a WIM has reference counts that are too low; WIMGAPI * sometimes creates WIMs where this is the case. It's also possible that * blobs have been referenced from an external WIM; those blobs can * potentially have any reference count at all, either lower or higher than * would be expected for this WIM ("this WIM" meaning the owner of @table) if * it were a standalone WIM. * * So we can't take the reference counts too seriously. But at least, we do * recalculate by default when writing a new WIM file. */ void blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table) { blob_subtract_refcnt(blob, table, 1); } void blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table, u32 count) { if (unlikely(blob->refcnt < count)) { blob->refcnt = 0; /* See comment above */ return; } blob->refcnt -= count; if (blob->refcnt != 0) return; if (blob->unhashed) { list_del(&blob->unhashed_list); #ifdef WITH_FUSE /* If the blob has been extracted to a staging file for a FUSE * mount, unlink the staging file. (Note that there still may * be open file descriptors to it.) */ if (blob->blob_location == BLOB_IN_STAGING_FILE) unlinkat(blob->staging_dir_fd, blob->staging_file_name, 0); #endif } else { if (!should_retain_blob(blob)) blob_table_unlink(table, blob); } /* If FUSE mounts are enabled, then don't actually free the blob * descriptor until the last file descriptor to it has been closed. */ #ifdef WITH_FUSE if (blob->num_opened_fds == 0) #endif finalize_blob(blob); } #ifdef WITH_FUSE void blob_decrement_num_opened_fds(struct blob_descriptor *blob) { wimlib_assert(blob->num_opened_fds != 0); if (--blob->num_opened_fds == 0 && blob->refcnt == 0) finalize_blob(blob); } #endif static void blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob) { size_t i = blob->hash_short % table->capacity; hlist_add_head(&blob->hash_list, &table->array[i]); } static void enlarge_blob_table(struct blob_table *table) { size_t old_capacity, new_capacity; struct hlist_head *old_array, *new_array; struct blob_descriptor *blob; struct hlist_node *tmp; size_t i; old_capacity = table->capacity; new_capacity = old_capacity * 2; new_array = CALLOC(new_capacity, sizeof(struct hlist_head)); if (new_array == NULL) return; old_array = table->array; table->array = new_array; table->capacity = new_capacity; for (i = 0; i < old_capacity; i++) { hlist_for_each_entry_safe(blob, tmp, &old_array[i], hash_list) { hlist_del(&blob->hash_list); blob_table_insert_raw(table, blob); } } FREE(old_array); } /* Insert a blob descriptor into the blob table. */ void blob_table_insert(struct blob_table *table, struct blob_descriptor *blob) { blob_table_insert_raw(table, blob); if (++table->num_blobs > table->capacity) enlarge_blob_table(table); } /* Unlinks a blob descriptor from the blob table; does not free it. */ void blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob) { wimlib_assert(!blob->unhashed); wimlib_assert(table->num_blobs != 0); hlist_del(&blob->hash_list); table->num_blobs--; } /* Given a SHA-1 message digest, return the corresponding blob descriptor from * the specified blob table, or NULL if there is none. */ struct blob_descriptor * lookup_blob(const struct blob_table *table, const u8 *hash) { size_t i; struct blob_descriptor *blob; i = load_size_t_unaligned(hash) % table->capacity; hlist_for_each_entry(blob, &table->array[i], hash_list) if (hashes_equal(hash, blob->hash)) return blob; return NULL; } /* Call a function on all blob descriptors in the specified blob table. Stop * early and return nonzero if any call to the function returns nonzero. */ int for_blob_in_table(struct blob_table *table, int (*visitor)(struct blob_descriptor *, void *), void *arg) { struct blob_descriptor *blob; struct hlist_node *tmp; int ret; for (size_t i = 0; i < table->capacity; i++) { hlist_for_each_entry_safe(blob, tmp, &table->array[i], hash_list) { ret = visitor(blob, arg); if (ret) return ret; } } return 0; } /* * This is a qsort() callback that sorts blobs into an order optimized for * reading. Sorting is done primarily by blob location, then secondarily by a * location-dependent order. For example, blobs in WIM resources are sorted * such that the underlying WIM files will be read sequentially. This is * especially important for WIM files containing solid resources. */ int cmp_blobs_by_sequential_order(const void *p1, const void *p2) { const struct blob_descriptor *blob1, *blob2; int v; WIMStruct *wim1, *wim2; blob1 = *(const struct blob_descriptor**)p1; blob2 = *(const struct blob_descriptor**)p2; v = (int)blob1->blob_location - (int)blob2->blob_location; /* Different locations? */ if (v) return v; switch (blob1->blob_location) { case BLOB_IN_WIM: wim1 = blob1->rdesc->wim; wim2 = blob2->rdesc->wim; /* Different (possibly split) WIMs? */ if (wim1 != wim2) { v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN); if (v) return v; } /* Different part numbers in the same WIM? */ v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number; if (v) return v; if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim) return cmp_u64(blob1->rdesc->offset_in_wim, blob2->rdesc->offset_in_wim); return cmp_u64(blob1->offset_in_res, blob2->offset_in_res); case BLOB_IN_FILE_ON_DISK: #ifdef WITH_FUSE case BLOB_IN_STAGING_FILE: #endif #ifdef __WIN32__ case BLOB_IN_WINNT_FILE_ON_DISK: case BLOB_WIN32_ENCRYPTED: /* Windows: compare by starting LCN (logical cluster number) */ v = cmp_u64(blob1->sort_key, blob2->sort_key); if (v) return v; #endif /* Compare files by path: just a heuristic that will place files * in the same directory next to each other. */ return tstrcmp(blob1->file_on_disk, blob2->file_on_disk); #ifdef WITH_NTFS_3G case BLOB_IN_NTFS_VOLUME: return cmp_ntfs_locations(blob1->ntfs_loc, blob2->ntfs_loc); #endif default: /* No additional sorting order defined for this resource * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare * everything equal to each other. */ return 0; } } int sort_blob_list(struct list_head *blob_list, size_t list_head_offset, int (*compar)(const void *, const void*)) { struct list_head *cur; struct blob_descriptor **array; size_t i; size_t array_size; size_t num_blobs = 0; list_for_each(cur, blob_list) num_blobs++; if (num_blobs <= 1) return 0; array_size = num_blobs * sizeof(array[0]); array = MALLOC(array_size); if (array == NULL) return WIMLIB_ERR_NOMEM; cur = blob_list->next; for (i = 0; i < num_blobs; i++) { array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset); cur = cur->next; } qsort(array, num_blobs, sizeof(array[0]), compar); INIT_LIST_HEAD(blob_list); for (i = 0; i < num_blobs; i++) { list_add_tail((struct list_head*) ((u8*)array[i] + list_head_offset), blob_list); } FREE(array); return 0; } /* Sort the specified list of blobs in an order optimized for sequential * reading. */ int sort_blob_list_by_sequential_order(struct list_head *blob_list, size_t list_head_offset) { return sort_blob_list(blob_list, list_head_offset, cmp_blobs_by_sequential_order); } static int add_blob_to_array(struct blob_descriptor *blob, void *_pp) { struct blob_descriptor ***pp = _pp; *(*pp)++ = blob; return 0; } /* Iterate through the blob descriptors in the specified blob table in an order * optimized for sequential reading. */ int for_blob_in_table_sorted_by_sequential_order(struct blob_table *table, int (*visitor)(struct blob_descriptor *, void *), void *arg) { struct blob_descriptor **blob_array, **p; size_t num_blobs = table->num_blobs; int ret; blob_array = MALLOC(num_blobs * sizeof(blob_array[0])); if (!blob_array) return WIMLIB_ERR_NOMEM; p = blob_array; for_blob_in_table(table, add_blob_to_array, &p); wimlib_assert(p == blob_array + num_blobs); qsort(blob_array, num_blobs, sizeof(blob_array[0]), cmp_blobs_by_sequential_order); ret = 0; for (size_t i = 0; i < num_blobs; i++) { ret = visitor(blob_array[i], arg); if (ret) break; } FREE(blob_array); return ret; } /* On-disk format of a blob descriptor in a WIM file. * * Note: if the WIM file contains solid resource(s), then this structure is * sometimes overloaded to describe a "resource" rather than a "blob". See the * code for details. */ struct blob_descriptor_disk { /* Size, offset, and flags of the blob. */ struct wim_reshdr_disk reshdr; /* Which part of the split WIM this blob is in; indexed from 1. */ le16 part_number; /* Reference count of this blob over all WIM images. (But see comment * above blob_decrement_refcnt().) */ le32 refcnt; /* SHA-1 message digest of the uncompressed data of this blob, or all * zeroes if this blob is of zero length. */ u8 hash[SHA1_HASH_SIZE]; } _packed_attribute; /* Given a nonempty run of consecutive blob descriptors with the SOLID flag set, * count how many specify resources (as opposed to blobs within those * resources). * * Returns the resulting count. */ static size_t count_solid_resources(const struct blob_descriptor_disk *entries, size_t max) { size_t count = 0; do { struct wim_reshdr reshdr; get_wim_reshdr(&(entries++)->reshdr, &reshdr); if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) { /* Run was terminated by a stand-alone blob entry. */ break; } if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) { /* This is a resource entry. */ count++; } } while (--max); return count; } /* * Given a run of consecutive blob descriptors with the SOLID flag set and * having @num_rdescs resource entries, load resource information from them into * the resource descriptors in the @rdescs array. * * Returns 0 on success, or a nonzero error code on failure. */ static int do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs, size_t num_rdescs, const struct blob_descriptor_disk *entries) { for (size_t i = 0; i < num_rdescs; i++) { struct wim_reshdr reshdr; struct alt_chunk_table_header_disk hdr; struct wim_resource_descriptor *rdesc; int ret; /* Advance to next resource entry. */ do { get_wim_reshdr(&(entries++)->reshdr, &reshdr); } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER); rdesc = rdescs[i]; wim_res_hdr_to_desc(&reshdr, wim, rdesc); /* For solid resources, the uncompressed size, compression type, * and chunk size are stored in the resource itself, not in the * blob table. */ ret = full_pread(&wim->in_fd, &hdr, sizeof(hdr), reshdr.offset_in_wim); if (ret) { ERROR("Failed to read header of solid resource " "(offset_in_wim=%"PRIu64")", reshdr.offset_in_wim); return ret; } rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize); /* Compression format numbers must be the same as in * WIMGAPI to be compatible here. */ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1); BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2); BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); rdesc->compression_type = le32_to_cpu(hdr.compression_format); rdesc->chunk_size = le32_to_cpu(hdr.chunk_size); DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" " "(%"TS"/%"PRIu32") @ +%"PRIu64"", i + 1, num_rdescs, rdesc->uncompressed_size, rdesc->size_in_wim, wimlib_get_compression_type_string(rdesc->compression_type), rdesc->chunk_size, rdesc->offset_in_wim); } return 0; } /* * Given a nonempty run of consecutive blob descriptors with the SOLID flag set, * allocate a 'struct wim_resource_descriptor' for each resource within that * run. * * Returns 0 on success, or a nonzero error code on failure. * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret. */ static int load_solid_info(WIMStruct *wim, const struct blob_descriptor_disk *entries, size_t num_remaining_entries, struct wim_resource_descriptor ***rdescs_ret, size_t *num_rdescs_ret) { size_t num_rdescs; struct wim_resource_descriptor **rdescs; size_t i; int ret; num_rdescs = count_solid_resources(entries, num_remaining_entries); rdescs = CALLOC(num_rdescs, sizeof(rdescs[0])); if (!rdescs) return WIMLIB_ERR_NOMEM; for (i = 0; i < num_rdescs; i++) { rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor)); if (!rdescs[i]) { ret = WIMLIB_ERR_NOMEM; goto out_free_rdescs; } } ret = do_load_solid_info(wim, rdescs, num_rdescs, entries); if (ret) goto out_free_rdescs; *rdescs_ret = rdescs; *num_rdescs_ret = num_rdescs; return 0; out_free_rdescs: for (i = 0; i < num_rdescs; i++) FREE(rdescs[i]); FREE(rdescs); return ret; } /* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor * with the SOLID flag set, try to assign it to resource in the current solid * run. */ static int assign_blob_to_solid_resource(const struct wim_reshdr *reshdr, struct blob_descriptor *blob, struct wim_resource_descriptor **rdescs, size_t num_rdescs) { u64 offset = reshdr->offset_in_wim; /* XXX: This linear search will be slow in the degenerate case where the * number of solid resources in the run is huge. */ blob->size = reshdr->size_in_wim; for (size_t i = 0; i < num_rdescs; i++) { if (offset + blob->size <= rdescs[i]->uncompressed_size) { blob_set_is_located_in_wim_resource(blob, rdescs[i], offset); return 0; } offset -= rdescs[i]->uncompressed_size; } ERROR("blob could not be assigned to a solid resource"); return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; } static void free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs) { if (rdescs) { for (size_t i = 0; i < num_rdescs; i++) if (list_empty(&rdescs[i]->blob_list)) FREE(rdescs[i]); FREE(rdescs); } } static int cmp_blobs_by_offset_in_res(const void *p1, const void *p2) { const struct blob_descriptor *blob1, *blob2; blob1 = *(const struct blob_descriptor**)p1; blob2 = *(const struct blob_descriptor**)p2; return cmp_u64(blob1->offset_in_res, blob2->offset_in_res); } /* Validate the size and location of a WIM resource. */ static int validate_resource(struct wim_resource_descriptor *rdesc) { struct blob_descriptor *blob; bool out_of_order; u64 expected_next_offset; int ret; /* Verify that the resource itself has a valid offset and size. */ if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim) goto invalid_due_to_overflow; /* Verify that each blob in the resource has a valid offset and size. */ expected_next_offset = 0; out_of_order = false; list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) { if (blob->offset_in_res + blob->size < blob->size || blob->offset_in_res + blob->size > rdesc->uncompressed_size) goto invalid_due_to_overflow; if (blob->offset_in_res >= expected_next_offset) expected_next_offset = blob->offset_in_res + blob->size; else out_of_order = true; } /* If the blobs were not located at strictly increasing positions (not * allowing for overlap), sort them. Then make sure that none overlap. */ if (out_of_order) { ret = sort_blob_list(&rdesc->blob_list, offsetof(struct blob_descriptor, rdesc_node), cmp_blobs_by_offset_in_res); if (ret) return ret; expected_next_offset = 0; list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) { if (blob->offset_in_res >= expected_next_offset) expected_next_offset = blob->offset_in_res + blob->size; else goto invalid_due_to_overlap; } } return 0; invalid_due_to_overflow: ERROR("Invalid blob table (offset overflow)"); return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; invalid_due_to_overlap: ERROR("Invalid blob table (blobs in solid resource overlap)"); return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; } static int finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs) { int ret = 0; for (size_t i = 0; i < num_rdescs; i++) { ret = validate_resource(rdescs[i]); if (ret) break; } free_solid_rdescs(rdescs, num_rdescs); return ret; } /* * read_blob_table() - * * Read the blob table from a WIM file. Usually, each entry in this table * describes a "blob", or equivalently a "resource", that the WIM file contains, * along with its location and SHA-1 message digest. Descriptors for * non-metadata blobs will be saved in the in-memory blob table * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a * special location per-image (the wim->image_metadata array). * * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple * blobs that are compressed together. Such a resource is called a "solid * resource". Solid resources are still described in the on-disk "blob table", * although the format is not the most logical. A consecutive sequence of * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run". * A solid run describes a set of solid resources, each of which contains a set * of blobs. In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource, * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid * resource. There are some oddities in how we need to determine which solid * resource a blob is actually in; see the code for details. * * Possible return values: * WIMLIB_ERR_SUCCESS (0) * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY * WIMLIB_ERR_NOMEM * * Or an error code caused by failure to read the blob table from the WIM * file. */ int read_blob_table(WIMStruct *wim) { int ret; size_t num_entries; void *buf = NULL; struct blob_table *table = NULL; struct blob_descriptor *cur_blob = NULL; size_t num_duplicate_blobs = 0; size_t num_wrong_part_blobs = 0; u32 image_index = 0; struct wim_resource_descriptor **cur_solid_rdescs = NULL; size_t cur_num_solid_rdescs = 0; DEBUG("Reading blob table."); /* Calculate the number of entries in the blob table. */ num_entries = wim->hdr.blob_table_reshdr.uncompressed_size / sizeof(struct blob_descriptor_disk); /* Read the blob table into a buffer. */ ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf); if (ret) goto out; /* Allocate a hash table to map SHA-1 message digests into blob * descriptors. This is the in-memory "blob table". */ table = new_blob_table(num_entries * 2 + 1); if (!table) goto oom; /* Allocate and initalize blob descriptors from the raw blob table * buffer. */ for (size_t i = 0; i < num_entries; i++) { const struct blob_descriptor_disk *disk_entry = &((const struct blob_descriptor_disk*)buf)[i]; struct wim_reshdr reshdr; u16 part_number; /* Get the resource header */ get_wim_reshdr(&disk_entry->reshdr, &reshdr); DEBUG("reshdr: size_in_wim=%"PRIu64", " "uncompressed_size=%"PRIu64", " "offset_in_wim=%"PRIu64", " "flags=0x%02x", reshdr.size_in_wim, reshdr.uncompressed_size, reshdr.offset_in_wim, reshdr.flags); /* Ignore SOLID flag if it isn't supposed to be used in this WIM * version. */ if (wim->hdr.wim_version == WIM_VERSION_DEFAULT) reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID; /* Allocate a new 'struct blob_descriptor'. */ cur_blob = new_blob_descriptor(); if (!cur_blob) goto oom; /* Get the part number, reference count, and hash. */ part_number = le16_to_cpu(disk_entry->part_number); cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt); copy_hash(cur_blob->hash, disk_entry->hash); if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) { /* SOLID entry */ if (!cur_solid_rdescs) { /* Starting new run */ ret = load_solid_info(wim, disk_entry, num_entries - i, &cur_solid_rdescs, &cur_num_solid_rdescs); if (ret) goto out; } if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) { /* Resource entry, not blob entry */ goto free_cur_blob_and_continue; } /* Blob entry */ ret = assign_blob_to_solid_resource(&reshdr, cur_blob, cur_solid_rdescs, cur_num_solid_rdescs); if (ret) goto out; } else { /* Normal blob/resource entry; SOLID not set. */ struct wim_resource_descriptor *rdesc; if (unlikely(cur_solid_rdescs)) { /* This entry terminated a solid run. */ ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs); cur_solid_rdescs = NULL; if (ret) goto out; } /* How to handle an uncompressed resource with its * uncompressed size different from its compressed size? * * Based on a simple test, WIMGAPI seems to handle this * as follows: * * if (size_in_wim > uncompressed_size) { * Ignore uncompressed_size; use size_in_wim * instead. * } else { * Honor uncompressed_size, but treat the part of * the file data above size_in_wim as all zeros. * } * * So we will do the same. */ if (unlikely(!(reshdr.flags & WIM_RESHDR_FLAG_COMPRESSED) && (reshdr.size_in_wim > reshdr.uncompressed_size))) { reshdr.uncompressed_size = reshdr.size_in_wim; } /* Set up a resource descriptor for this blob. */ rdesc = MALLOC(sizeof(struct wim_resource_descriptor)); if (!rdesc) goto oom; wim_res_hdr_to_desc(&reshdr, wim, rdesc); blob_set_is_located_in_nonsolid_wim_resource(cur_blob, rdesc); } /* cur_blob is now a blob bound to a resource. */ /* Ignore entries with all zeroes in the hash field. */ if (is_zero_hash(cur_blob->hash)) goto free_cur_blob_and_continue; /* Verify that the part number matches that of the underlying * WIM file. */ if (part_number != wim->hdr.part_number) { num_wrong_part_blobs++; goto free_cur_blob_and_continue; } if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) { cur_blob->is_metadata = 1; /* Blob table entry for a metadata resource. */ /* Metadata entries with no references must be ignored. * See, for example, the WinPE WIMs from the WAIK v2.1. */ if (cur_blob->refcnt == 0) goto free_cur_blob_and_continue; if (cur_blob->refcnt != 1) { /* We don't currently support this case due to * the complications of multiple images sharing * the same metadata resource or a metadata * resource also being referenced by files. */ ERROR("Found metadata resource with refcnt != 1"); ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; goto out; } if (wim->hdr.part_number != 1) { WARNING("Ignoring metadata resource found in a " "non-first part of the split WIM"); goto free_cur_blob_and_continue; } /* The number of entries in the blob table with * WIM_RESHDR_FLAG_METADATA set should be the same as * the image_count field in the WIM header. */ if (image_index == wim->hdr.image_count) { WARNING("Found more metadata resources than images"); goto free_cur_blob_and_continue; } /* Notice very carefully: We are assigning the metadata * resources to images in the same order in which their * blob table entries occur on disk. (This is also the * behavior of Microsoft's software.) In particular, * this overrides the actual locations of the metadata * resources themselves in the WIM file as well as any * information written in the XML data. */ DEBUG("Found metadata resource for image %"PRIu32" at " "offset %"PRIu64".", image_index + 1, reshdr.offset_in_wim); wim->image_metadata[image_index++]->metadata_blob = cur_blob; } else { /* Blob table entry for a non-metadata blob. */ /* Ignore this blob if it's a duplicate. */ if (lookup_blob(table, cur_blob->hash)) { num_duplicate_blobs++; goto free_cur_blob_and_continue; } /* Insert the blob into the in-memory blob table, keyed * by its SHA-1 message digest. */ blob_table_insert(table, cur_blob); } continue; free_cur_blob_and_continue: if (cur_solid_rdescs && cur_blob->blob_location == BLOB_IN_WIM) blob_unset_is_located_in_wim_resource(cur_blob); free_blob_descriptor(cur_blob); } cur_blob = NULL; if (cur_solid_rdescs) { /* End of blob table terminated a solid run. */ ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs); cur_solid_rdescs = NULL; if (ret) goto out; } if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) { WARNING("Could not find metadata resources for all images"); for (u32 i = image_index; i < wim->hdr.image_count; i++) put_image_metadata(wim->image_metadata[i], NULL); wim->hdr.image_count = image_index; } if (num_duplicate_blobs > 0) WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs); if (num_wrong_part_blobs > 0) { WARNING("Ignoring %zu blobs with wrong part number", num_wrong_part_blobs); } DEBUG("Done reading blob table."); wim->blob_table = table; ret = 0; goto out_free_buf; oom: ERROR("Not enough memory to read blob table!"); ret = WIMLIB_ERR_NOMEM; out: free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs); free_blob_descriptor(cur_blob); free_blob_table(table); out_free_buf: FREE(buf); return ret; } static void write_blob_descriptor(struct blob_descriptor_disk *disk_entry, const struct wim_reshdr *out_reshdr, u16 part_number, u32 refcnt, const u8 *hash) { put_wim_reshdr(out_reshdr, &disk_entry->reshdr); disk_entry->part_number = cpu_to_le16(part_number); disk_entry->refcnt = cpu_to_le32(refcnt); copy_hash(disk_entry->hash, hash); } /* Note: the list of blob descriptors must be sorted so that all entries for the * same solid resource are consecutive. In addition, blob descriptors for * metadata resources must be in the same order as the indices of the underlying * images. */ int write_blob_table_from_blob_list(struct list_head *blob_list, struct filedes *out_fd, u16 part_number, struct wim_reshdr *out_reshdr, int write_resource_flags) { size_t table_size; struct blob_descriptor *blob; struct blob_descriptor_disk *table_buf; struct blob_descriptor_disk *table_buf_ptr; int ret; u64 prev_res_offset_in_wim = ~0ULL; u64 prev_uncompressed_size; u64 logical_offset; table_size = 0; list_for_each_entry(blob, blob_list, blob_table_list) { table_size += sizeof(struct blob_descriptor_disk); if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID && blob->out_res_offset_in_wim != prev_res_offset_in_wim) { table_size += sizeof(struct blob_descriptor_disk); prev_res_offset_in_wim = blob->out_res_offset_in_wim; } } DEBUG("Writing WIM blob table (size=%zu, offset=%"PRIu64")", table_size, out_fd->offset); table_buf = MALLOC(table_size); if (table_buf == NULL) { ERROR("Failed to allocate %zu bytes for temporary blob table", table_size); return WIMLIB_ERR_NOMEM; } table_buf_ptr = table_buf; prev_res_offset_in_wim = ~0ULL; prev_uncompressed_size = 0; logical_offset = 0; list_for_each_entry(blob, blob_list, blob_table_list) { if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { struct wim_reshdr tmp_reshdr; /* Eww. When WIMGAPI sees multiple solid resources, it * expects the offsets to be adjusted as if there were * really only one solid resource. */ if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) { /* Put the resource entry for solid resource */ tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim; tmp_reshdr.size_in_wim = blob->out_res_size_in_wim; tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER; tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID; write_blob_descriptor(table_buf_ptr++, &tmp_reshdr, part_number, 1, zero_hash); logical_offset += prev_uncompressed_size; prev_res_offset_in_wim = blob->out_res_offset_in_wim; prev_uncompressed_size = blob->out_res_uncompressed_size; } tmp_reshdr = blob->out_reshdr; tmp_reshdr.offset_in_wim += logical_offset; write_blob_descriptor(table_buf_ptr++, &tmp_reshdr, part_number, blob->out_refcnt, blob->hash); } else { write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr, part_number, blob->out_refcnt, blob->hash); } } wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size); /* Write the blob table uncompressed. Although wimlib can handle a * compressed blob table, MS software cannot. */ ret = write_wim_resource_from_buffer(table_buf, table_size, true, out_fd, WIMLIB_COMPRESSION_TYPE_NONE, 0, out_reshdr, NULL, write_resource_flags); FREE(table_buf); DEBUG("ret=%d", ret); return ret; } /* Allocate a blob descriptor for the contents of the buffer, or re-use an * existing descriptor in @blob_table for an identical blob. */ struct blob_descriptor * new_blob_from_data_buffer(const void *buffer, size_t size, struct blob_table *blob_table) { u8 hash[SHA1_HASH_SIZE]; struct blob_descriptor *blob; void *buffer_copy; sha1_buffer(buffer, size, hash); blob = lookup_blob(blob_table, hash); if (blob) return blob; blob = new_blob_descriptor(); if (!blob) return NULL; buffer_copy = memdup(buffer, size); if (!buffer_copy) { free_blob_descriptor(blob); return NULL; } blob_set_is_located_in_attached_buffer(blob, buffer_copy, size); copy_hash(blob->hash, hash); blob_table_insert(blob_table, blob); return blob; } struct blob_descriptor * after_blob_hashed(struct blob_descriptor *blob, struct blob_descriptor **back_ptr, struct blob_table *blob_table) { struct blob_descriptor *duplicate_blob; list_del(&blob->unhashed_list); blob->unhashed = 0; /* Look for a duplicate blob */ duplicate_blob = lookup_blob(blob_table, blob->hash); if (duplicate_blob) { /* We have a duplicate blob. Transfer the reference counts from * this blob to the duplicate and update the reference to this * blob (from a stream) to point to the duplicate. The caller * is responsible for freeing @blob if needed. */ wimlib_assert(duplicate_blob->size == blob->size); duplicate_blob->refcnt += blob->refcnt; blob->refcnt = 0; *back_ptr = duplicate_blob; return duplicate_blob; } else { /* No duplicate blob, so we need to insert this blob into the * blob table and treat it as a hashed blob. */ blob_table_insert(blob_table, blob); return blob; } } /* * Calculate the SHA-1 message digest of a blob and move its descriptor from the * list of unhashed blobs to the blob table, possibly joining it with an * identical blob. * * @blob: * The blob to hash * @blob_table: * The blob table in which the blob needs to be indexed * @blob_ret: * On success, a pointer to the resulting blob descriptor is written to * this location. This will be the same as @blob if it was inserted into * the blob table, or different if a duplicate blob was found. * * Returns 0 on success; nonzero if there is an error reading the blob data. */ int hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table, struct blob_descriptor **blob_ret) { struct blob_descriptor **back_ptr; int ret; back_ptr = retrieve_pointer_to_unhashed_blob(blob); ret = sha1_blob(blob); if (ret) return ret; *blob_ret = after_blob_hashed(blob, back_ptr, blob_table); return 0; } void blob_to_wimlib_resource_entry(const struct blob_descriptor *blob, struct wimlib_resource_entry *wentry) { memset(wentry, 0, sizeof(*wentry)); wentry->uncompressed_size = blob->size; if (blob->blob_location == BLOB_IN_WIM) { unsigned res_flags = blob->rdesc->flags; wentry->part_number = blob->rdesc->wim->hdr.part_number; if (res_flags & WIM_RESHDR_FLAG_SOLID) { wentry->offset = blob->offset_in_res; } else { wentry->compressed_size = blob->rdesc->size_in_wim; wentry->offset = blob->rdesc->offset_in_wim; } wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim; wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim; wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size; wentry->is_compressed = (res_flags & WIM_RESHDR_FLAG_COMPRESSED) != 0; wentry->is_free = (res_flags & WIM_RESHDR_FLAG_FREE) != 0; wentry->is_spanned = (res_flags & WIM_RESHDR_FLAG_SPANNED) != 0; wentry->packed = (res_flags & WIM_RESHDR_FLAG_SOLID) != 0; } if (!blob->unhashed) copy_hash(wentry->sha1_hash, blob->hash); wentry->reference_count = blob->refcnt; wentry->is_metadata = blob->is_metadata; } struct iterate_blob_context { wimlib_iterate_lookup_table_callback_t cb; void *user_ctx; }; static int do_iterate_blob(struct blob_descriptor *blob, void *_ctx) { struct iterate_blob_context *ctx = _ctx; struct wimlib_resource_entry entry; blob_to_wimlib_resource_entry(blob, &entry); return (*ctx->cb)(&entry, ctx->user_ctx); } /* API function documented in wimlib.h */ WIMLIBAPI int wimlib_iterate_lookup_table(WIMStruct *wim, int flags, wimlib_iterate_lookup_table_callback_t cb, void *user_ctx) { if (flags != 0) return WIMLIB_ERR_INVALID_PARAM; struct iterate_blob_context ctx = { .cb = cb, .user_ctx = user_ctx, }; if (wim_has_metadata(wim)) { int ret; for (int i = 0; i < wim->hdr.image_count; i++) { struct blob_descriptor *blob; struct wim_image_metadata *imd = wim->image_metadata[i]; ret = do_iterate_blob(imd->metadata_blob, &ctx); if (ret) return ret; image_for_each_unhashed_blob(blob, imd) { ret = do_iterate_blob(blob, &ctx); if (ret) return ret; } } } return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx); }