X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flookup_table.c;h=882e7dd7048cd979f81dde2a9a611f630d3d12da;hp=f24bf053e5d7e3fa2b4b47ea5e32c79483b40e52;hb=4c73e29d8d74a4e969782d2d40e209337414034c;hpb=c902ba4633348027dd76d84fa1e14c7f7e91ca08 diff --git a/src/lookup_table.c b/src/lookup_table.c index f24bf053..882e7dd7 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -6,22 +6,20 @@ */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2014 Eric Biggers * - * This file is part of wimlib, a library for working with WIM files. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. - * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H @@ -94,17 +92,11 @@ do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore) void free_lookup_table(struct wim_lookup_table *table) { - DEBUG("Freeing lookup table."); - if (table == NULL) - return; - - if (table->array) { - for_lookup_table_entry(table, - do_free_lookup_table_entry, - NULL); + if (table) { + for_lookup_table_entry(table, do_free_lookup_table_entry, NULL); FREE(table->array); + FREE(table); } - FREE(table); } struct wim_lookup_table_entry * @@ -133,7 +125,6 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) if (new == NULL) return NULL; - new->extracted_file = NULL; switch (new->resource_location) { case RESOURCE_IN_WIM: list_add(&new->rspec_node, &new->rspec->stream_list); @@ -141,6 +132,7 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) case RESOURCE_IN_FILE_ON_DISK: #ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: case RESOURCE_WIN32_ENCRYPTED: #endif #ifdef WITH_FUSE @@ -190,54 +182,95 @@ out_free: } void -free_lookup_table_entry(struct wim_lookup_table_entry *lte) +lte_put_resource(struct wim_lookup_table_entry *lte) { - if (lte) { - switch (lte->resource_location) { - case RESOURCE_IN_WIM: - list_del(<e->rspec_node); - if (list_empty(<e->rspec->stream_list)) - FREE(lte->rspec); - break; - case RESOURCE_IN_FILE_ON_DISK: - #ifdef __WIN32__ - case RESOURCE_WIN32_ENCRYPTED: - #endif - #ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: - BUILD_BUG_ON((void*)<e->file_on_disk != - (void*)<e->staging_file_name); - #endif - case RESOURCE_IN_ATTACHED_BUFFER: - BUILD_BUG_ON((void*)<e->file_on_disk != - (void*)<e->attached_buffer); - FREE(lte->file_on_disk); - break; -#ifdef WITH_NTFS_3G - case RESOURCE_IN_NTFS_VOLUME: - if (lte->ntfs_loc) { - FREE(lte->ntfs_loc->path); - FREE(lte->ntfs_loc->stream_name); - FREE(lte->ntfs_loc); - } - break; + switch (lte->resource_location) { + case RESOURCE_IN_WIM: + list_del(<e->rspec_node); + if (list_empty(<e->rspec->stream_list)) + FREE(lte->rspec); + break; + case RESOURCE_IN_FILE_ON_DISK: +#ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: + case RESOURCE_WIN32_ENCRYPTED: #endif - default: - break; +#ifdef WITH_FUSE + case RESOURCE_IN_STAGING_FILE: + BUILD_BUG_ON((void*)<e->file_on_disk != + (void*)<e->staging_file_name); +#endif + case RESOURCE_IN_ATTACHED_BUFFER: + BUILD_BUG_ON((void*)<e->file_on_disk != + (void*)<e->attached_buffer); + FREE(lte->file_on_disk); + break; +#ifdef WITH_NTFS_3G + case RESOURCE_IN_NTFS_VOLUME: + if (lte->ntfs_loc) { + FREE(lte->ntfs_loc->path); + FREE(lte->ntfs_loc->stream_name); + FREE(lte->ntfs_loc); } + break; +#endif + default: + break; + } +} + +void +free_lookup_table_entry(struct wim_lookup_table_entry *lte) +{ + if (lte) { + lte_put_resource(lte); FREE(lte); } } -/* Decrements the reference count for the lookup table entry @lte. If its - * reference count reaches 0, it is unlinked from the lookup table. If, - * furthermore, the entry has no opened file descriptors associated with it, the - * entry is freed. */ +/* Should this stream be retained even if it has no references? */ +static bool +should_retain_lte(const struct wim_lookup_table_entry *lte) +{ + return lte->resource_location == RESOURCE_IN_WIM; +} + +static void +finalize_lte(struct wim_lookup_table_entry *lte) +{ + if (!should_retain_lte(lte)) + free_lookup_table_entry(lte); +} + +/* + * Decrements the reference count of the single-instance stream @lte, which must + * be inserted in the stream lookup table @table. + * + * If the stream's reference count reaches 0, we may unlink it from @table and + * free it. However, we retain streams with 0 reference count that originated + * from WIM files (RESOURCE_IN_WIM). We do this for two reasons: + * + * 1. This prevents information about valid streams in a WIM file --- streams + * which will continue to be present after appending to the WIM file --- from + * being lost merely because we dropped all references to them. + * + * 2. Stream reference counts we read from WIM files can't be trusted. It's + * possible that a WIM has reference counts that are too low; WIMGAPI + * sometimes creates WIMs where this is the case. It's also possible that + * streams have been referenced from an external WIM; those streams can + * potentially have any reference count at all, either lower or higher than + * would be expected for this WIM ("this WIM" meaning the owner of @table) if + * it were a standalone WIM. + * + * So we can't take the reference counts too seriously. But at least, we do + * recalculate by default when writing a new WIM file. + */ void lte_decrement_refcnt(struct wim_lookup_table_entry *lte, struct wim_lookup_table *table) { - wimlib_assert(lte->refcnt != 0); + if (unlikely(lte->refcnt == 0)) /* See comment above */ + return; if (--lte->refcnt == 0) { if (lte->unhashed) { @@ -248,10 +281,12 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, * that there still may be open file descriptors to it.) * */ if (lte->resource_location == RESOURCE_IN_STAGING_FILE) - unlink(lte->staging_file_name); + unlinkat(lte->staging_dir_fd, + lte->staging_file_name, 0); #endif } else { - lookup_table_unlink(table, lte); + if (!should_retain_lte(lte)) + lookup_table_unlink(table, lte); } /* If FUSE mounts are enabled, we don't actually free the entry @@ -260,7 +295,7 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, #ifdef WITH_FUSE if (lte->num_opened_fds == 0) #endif - free_lookup_table_entry(lte); + finalize_lte(lte); } } @@ -271,7 +306,7 @@ lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte) wimlib_assert(lte->num_opened_fds != 0); if (--lte->num_opened_fds == 0 && lte->refcnt == 0) - free_lookup_table_entry(lte); + finalize_lte(lte); } #endif @@ -402,7 +437,7 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) /* Different (possibly split) WIMs? */ if (wim1 != wim2) { - v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GID_LEN); + v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN); if (v) return v; } @@ -423,6 +458,7 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) case RESOURCE_IN_STAGING_FILE: #endif #ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: case RESOURCE_WIN32_ENCRYPTED: #endif /* Compare files by path: just a heuristic that will place files @@ -541,7 +577,8 @@ struct wim_lookup_table_entry_disk { /* Which part of the split WIM this stream is in; indexed from 1. */ le16 part_number; - /* Reference count of this stream over all WIM images. */ + /* Reference count of this stream over all WIM images. (But see comment + * above lte_decrement_refcnt().) */ le32 refcnt; /* SHA1 message digest of the uncompressed data of this stream, or @@ -551,64 +588,307 @@ struct wim_lookup_table_entry_disk { #define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50 +/* Given a nonempty run of consecutive lookup table entries with the + * PACKED_STREAMS flag set, count how many specify resources (as opposed to + * streams within those resources). + * + * Returns the resulting count. */ +static size_t +count_subpacks(const struct wim_lookup_table_entry_disk *entries, size_t max) +{ + size_t count = 0; + do { + struct wim_reshdr reshdr; + + get_wim_reshdr(&(entries++)->reshdr, &reshdr); + + if (!(reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) { + /* Run was terminated by a stand-alone stream entry. */ + break; + } + + if (reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER) { + /* This is a resource entry. */ + count++; + } + } while (--max); + return count; +} + +/* Given a run of consecutive lookup table entries with the PACKED_STREAMS flag + * set and having @num_subpacks resource entries, load resource information from + * them into the resource specifications in the @subpacks array. + * + * Returns 0 on success, or a nonzero error code on failure. */ +static int +do_load_subpack_info(WIMStruct *wim, struct wim_resource_spec **subpacks, + size_t num_subpacks, + const struct wim_lookup_table_entry_disk *entries) +{ + for (size_t i = 0; i < num_subpacks; i++) { + struct wim_reshdr reshdr; + struct alt_chunk_table_header_disk hdr; + struct wim_resource_spec *rspec; + int ret; + + /* Advance to next resource entry. */ + + do { + get_wim_reshdr(&(entries++)->reshdr, &reshdr); + } while (reshdr.uncompressed_size != WIM_PACK_MAGIC_NUMBER); + + rspec = subpacks[i]; + + wim_res_hdr_to_spec(&reshdr, wim, rspec); + + /* For packed resources, the uncompressed size, compression + * type, and chunk size are stored in the resource itself, not + * in the lookup table. */ + + ret = full_pread(&wim->in_fd, &hdr, + sizeof(hdr), reshdr.offset_in_wim); + if (ret) { + ERROR("Failed to read header of packed resource " + "(offset_in_wim=%"PRIu64")", + reshdr.offset_in_wim); + return ret; + } + + rspec->uncompressed_size = le64_to_cpu(hdr.res_usize); + + /* Compression format numbers must be the same as in + * WIMGAPI to be compatible here. */ + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); + rspec->compression_type = le32_to_cpu(hdr.compression_format); + + rspec->chunk_size = le32_to_cpu(hdr.chunk_size); + + DEBUG("Subpack %zu/%zu: %"PRIu64" => %"PRIu64" " + "(%"TS"/%"PRIu32") @ +%"PRIu64"", + i + 1, num_subpacks, + rspec->uncompressed_size, + rspec->size_in_wim, + wimlib_get_compression_type_string(rspec->compression_type), + rspec->chunk_size, + rspec->offset_in_wim); + + } + return 0; +} + +/* Given a nonempty run of consecutive lookup table entries with the + * PACKED_STREAMS flag set, allocate a 'struct wim_resource_spec' for each + * resource within that run. + * + * Returns 0 on success, or a nonzero error code on failure. + * Returns the pointers and count in *subpacks_ret and *num_subpacks_ret. + */ +static int +load_subpack_info(WIMStruct *wim, + const struct wim_lookup_table_entry_disk *entries, + size_t num_remaining_entries, + struct wim_resource_spec ***subpacks_ret, + size_t *num_subpacks_ret) +{ + size_t num_subpacks; + struct wim_resource_spec **subpacks; + size_t i; + int ret; + + num_subpacks = count_subpacks(entries, num_remaining_entries); + subpacks = CALLOC(num_subpacks, sizeof(subpacks[0])); + if (!subpacks) + return WIMLIB_ERR_NOMEM; + + for (i = 0; i < num_subpacks; i++) { + subpacks[i] = MALLOC(sizeof(struct wim_resource_spec)); + if (!subpacks[i]) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_subpacks; + } + } + + ret = do_load_subpack_info(wim, subpacks, num_subpacks, entries); + if (ret) + goto out_free_subpacks; + + *subpacks_ret = subpacks; + *num_subpacks_ret = num_subpacks; + return 0; + +out_free_subpacks: + for (i = 0; i < num_subpacks; i++) + FREE(subpacks[i]); + FREE(subpacks); + return ret; +} + +/* Given a 'struct wim_lookup_table_entry' allocated for a stream entry with + * PACKED_STREAMS set, try to bind it to a subpack of the current PACKED_STREAMS + * run. */ +static int +bind_stream_to_subpack(const struct wim_reshdr *reshdr, + struct wim_lookup_table_entry *stream, + struct wim_resource_spec **subpacks, + size_t num_subpacks) +{ + u64 offset = reshdr->offset_in_wim; + + /* XXX: This linear search will be slow in the degenerate case where the + * number of subpacks is huge. */ + stream->size = reshdr->size_in_wim; + stream->flags = reshdr->flags; + for (size_t i = 0; i < num_subpacks; i++) { + if (offset + stream->size <= subpacks[i]->uncompressed_size) { + stream->offset_in_res = offset; + lte_bind_wim_resource_spec(stream, subpacks[i]); + return 0; + } + offset -= subpacks[i]->uncompressed_size; + } + ERROR("Packed stream could not be assigned to any resource"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; +} + +static void +free_subpack_info(struct wim_resource_spec **subpacks, size_t num_subpacks) +{ + if (subpacks) { + for (size_t i = 0; i < num_subpacks; i++) + if (list_empty(&subpacks[i]->stream_list)) + FREE(subpacks[i]); + FREE(subpacks); + } +} + +static int +cmp_streams_by_offset_in_res(const void *p1, const void *p2) +{ + const struct wim_lookup_table_entry *lte1, *lte2; + + lte1 = *(const struct wim_lookup_table_entry**)p1; + lte2 = *(const struct wim_lookup_table_entry**)p2; + + return cmp_u64(lte1->offset_in_res, lte2->offset_in_res); +} + /* Validate the size and location of a WIM resource. */ static int -validate_resource(const struct wim_resource_spec *rspec) +validate_resource(struct wim_resource_spec *rspec) { struct wim_lookup_table_entry *lte; - u64 cur_offset; + bool out_of_order; + u64 expected_next_offset; + int ret; - /* Verify that calculating the offset of the end of the resource doesn't - * overflow. */ + /* Verify that the resource itself has a valid offset and size. */ if (rspec->offset_in_wim + rspec->size_in_wim < rspec->size_in_wim) - goto invalid; + goto invalid_due_to_overflow; - /* Verify that each stream in the resource has a valid offset and size, - * and that no streams overlap, and that the streams were added in order - * of increasing offset. */ - cur_offset = 0; + /* Verify that each stream in the resource has a valid offset and size. + */ + expected_next_offset = 0; + out_of_order = false; list_for_each_entry(lte, &rspec->stream_list, rspec_node) { if (lte->offset_in_res + lte->size < lte->size || - lte->offset_in_res + lte->size > rspec->uncompressed_size || - lte->offset_in_res < cur_offset) - goto invalid; + lte->offset_in_res + lte->size > rspec->uncompressed_size) + goto invalid_due_to_overflow; - cur_offset = lte->offset_in_res + lte->size; + if (lte->offset_in_res >= expected_next_offset) + expected_next_offset = lte->offset_in_res + lte->size; + else + out_of_order = true; } + + /* If the streams were not located at strictly increasing positions (not + * allowing for overlap), sort them. Then make sure that none overlap. + */ + if (out_of_order) { + ret = sort_stream_list(&rspec->stream_list, + offsetof(struct wim_lookup_table_entry, + rspec_node), + cmp_streams_by_offset_in_res); + if (ret) + return ret; + + expected_next_offset = 0; + list_for_each_entry(lte, &rspec->stream_list, rspec_node) { + if (lte->offset_in_res >= expected_next_offset) + expected_next_offset = lte->offset_in_res + lte->size; + else + goto invalid_due_to_overlap; + } + } + return 0; -invalid: +invalid_due_to_overflow: + ERROR("Invalid resource entry (offset overflow)"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - ERROR("Invalid resource entry!"); +invalid_due_to_overlap: + ERROR("Invalid resource entry (streams in packed resource overlap)"); return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; } +static int +finish_subpacks(struct wim_resource_spec **subpacks, size_t num_subpacks) +{ + int ret = 0; + for (size_t i = 0; i < num_subpacks; i++) { + ret = validate_resource(subpacks[i]); + if (ret) + break; + } + free_subpack_info(subpacks, num_subpacks); + return ret; +} + /* - * Reads the lookup table from a WIM file. Each entry specifies a stream that - * the WIM file contains, along with its location and SHA1 message digest. + * Reads the lookup table from a WIM file. Usually, each entry specifies a + * stream that the WIM file contains, along with its location and SHA1 message + * digest. + * + * Saves lookup table entries for non-metadata streams in a hash table (set to + * wim->lookup_table), and saves the metadata entry for each image in a special + * per-image location (the wim->image_metadata array). * - * Saves lookup table entries for non-metadata streams in a hash table, and - * saves the metadata entry for each image in a special per-image location (the - * image_metadata array). + * This works for both version WIM_VERSION_DEFAULT (68864) and version + * WIM_VERSION_PACKED_STREAMS (3584) WIMs. In the latter, a consecutive run of + * lookup table entries that all have flag WIM_RESHDR_FLAG_PACKED_STREAMS (0x10) + * set is a "packed run". A packed run logically contains zero or more + * resources, each of which logically contains zero or more streams. + * Physically, in such a run, a "lookup table entry" with uncompressed size + * WIM_PACK_MAGIC_NUMBER (0x100000000) specifies a resource, whereas any other + * entry specifies a stream. Within such a run, stream entries and resource + * entries need not be in any particular order, except that the order of the + * resource entries is important, as it affects how streams are assigned to + * resources. See the code for details. * - * Return values: + * Possible return values: * WIMLIB_ERR_SUCCESS (0) * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY - * WIMLIB_ERR_RESOURCE_NOT_FOUND + * WIMLIB_ERR_NOMEM * - * Or an error code caused by failure to read the lookup table into memory. + * Or an error code caused by failure to read the lookup table from the WIM + * file. */ int read_wim_lookup_table(WIMStruct *wim) { int ret; - size_t i; size_t num_entries; - struct wim_lookup_table *table; - struct wim_lookup_table_entry *cur_entry, *duplicate_entry; - struct wim_resource_spec *cur_rspec; - void *buf; - bool back_to_back_pack; + void *buf = NULL; + struct wim_lookup_table *table = NULL; + struct wim_lookup_table_entry *cur_entry = NULL; + size_t num_duplicate_entries = 0; + size_t num_wrong_part_entries = 0; + u32 image_index = 0; + struct wim_resource_spec **cur_subpacks = NULL; + size_t cur_num_subpacks = 0; DEBUG("Reading lookup table."); @@ -616,7 +896,7 @@ read_wim_lookup_table(WIMStruct *wim) BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) != WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE); - /* Calculate number of entries in the lookup table. */ + /* Calculate the number of entries in the lookup table. */ num_entries = wim->hdr.lookup_table_reshdr.uncompressed_size / sizeof(struct wim_lookup_table_entry_disk); @@ -628,22 +908,22 @@ read_wim_lookup_table(WIMStruct *wim) /* Allocate a hash table to map SHA1 message digests into stream * specifications. This is the in-memory "lookup table". */ table = new_lookup_table(num_entries * 2 + 1); - if (table == NULL) { - ERROR("Not enough memory to read lookup table."); - ret = WIMLIB_ERR_NOMEM; - goto out_free_buf; - } + if (!table) + goto oom; - /* Allocate and initalize stream entries from the raw lookup table - * buffer. */ - wim->current_image = 0; - cur_rspec = NULL; - for (i = 0; i < num_entries; i++) { + /* Allocate and initalize stream entries ('struct + * wim_lookup_table_entry's) from the raw lookup table buffer. Each of + * these entries will point to a 'struct wim_resource_spec' that + * describes the underlying resource. In WIMs with version number + * WIM_VERSION_PACKED_STREAMS, a resource may contain multiple streams. + */ + for (size_t i = 0; i < num_entries; i++) { const struct wim_lookup_table_entry_disk *disk_entry = &((const struct wim_lookup_table_entry_disk*)buf)[i]; - u16 part_number; struct wim_reshdr reshdr; + u16 part_number; + /* Get the resource header */ get_wim_reshdr(&disk_entry->reshdr, &reshdr); DEBUG("reshdr: size_in_wim=%"PRIu64", " @@ -653,272 +933,225 @@ read_wim_lookup_table(WIMStruct *wim) reshdr.size_in_wim, reshdr.uncompressed_size, reshdr.offset_in_wim, reshdr.flags); + /* Ignore PACKED_STREAMS flag if it isn't supposed to be used in + * this WIM version. */ if (wim->hdr.wim_version == WIM_VERSION_DEFAULT) reshdr.flags &= ~WIM_RESHDR_FLAG_PACKED_STREAMS; + /* Allocate a new 'struct wim_lookup_table_entry'. */ cur_entry = new_lookup_table_entry(); - if (cur_entry == NULL) { - ERROR("Not enough memory to read lookup table!"); - ret = WIMLIB_ERR_NOMEM; - goto err; - } + if (!cur_entry) + goto oom; + /* Get the part number, reference count, and hash. */ part_number = le16_to_cpu(disk_entry->part_number); cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt); copy_hash(cur_entry->hash, disk_entry->hash); - if (part_number != wim->hdr.part_number) { - WARNING("A lookup table entry in part %hu of the WIM " - "points to part %hu (ignoring it)", - wim->hdr.part_number, part_number); - free_lookup_table_entry(cur_entry); - continue; - } - - if (!(reshdr.flags & (WIM_RESHDR_FLAG_PACKED_STREAMS | - WIM_RESHDR_FLAG_COMPRESSED))) { - if (reshdr.uncompressed_size != reshdr.size_in_wim) { - ERROR("Invalid resource entry!"); - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto err; - } - } + if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - back_to_back_pack = false; - if (!(reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) || - cur_rspec == NULL || - (back_to_back_pack = - ((reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER && - cur_rspec != NULL && - cur_rspec->size_in_wim != 0))) - { - /* Starting new run of streams that share the same WIM - * resource. */ - struct wim_lookup_table_entry *prev_entry = NULL; + /* PACKED_STREAMS entry */ - if (back_to_back_pack && - !list_empty(&cur_rspec->stream_list)) - { - prev_entry = list_entry(cur_rspec->stream_list.prev, - struct wim_lookup_table_entry, - rspec_node); - lte_unbind_wim_resource_spec(prev_entry); - } - if (cur_rspec != NULL) { - ret = validate_resource(cur_rspec); + if (!cur_subpacks) { + /* Starting new run */ + ret = load_subpack_info(wim, disk_entry, + num_entries - i, + &cur_subpacks, + &cur_num_subpacks); if (ret) - goto err; + goto out; } - /* Allocate the resource specification and initialize it - * with values from the current stream entry. */ - cur_rspec = MALLOC(sizeof(*cur_rspec)); - if (cur_rspec == NULL) { - ERROR("Not enough memory to read lookup table!"); - ret = WIMLIB_ERR_NOMEM; - goto err; - } - wim_res_hdr_to_spec(&reshdr, wim, cur_rspec); - - /* If this is a packed run, the current stream entry may - * specify a stream within the resource, and not the - * resource itself. Zero possibly irrelevant data until - * it is read for certain. (Note that the computation - * of 'back_to_back_pack' tests if 'size_in_wim' is - * nonzero to see if the resource info has been read; - * hence we need to set it to 0 here.) */ - if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - cur_rspec->size_in_wim = 0; - cur_rspec->uncompressed_size = 0; - cur_rspec->offset_in_wim = 0; + if (reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER) { + /* Resource entry, not stream entry */ + goto free_cur_entry_and_continue; } - if (prev_entry) - lte_bind_wim_resource_spec(prev_entry, cur_rspec); - } + /* Stream entry */ - if ((reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER) - { - /* Found the specification for the packed resource. - * Transfer the values to the `struct - * wim_resource_spec', and discard the current stream - * since this lookup table entry did not, in fact, - * correspond to a "stream". + ret = bind_stream_to_subpack(&reshdr, + cur_entry, + cur_subpacks, + cur_num_subpacks); + if (ret) + goto out; + + } else { + /* Normal stream/resource entry; PACKED_STREAMS not set. */ - /* Uncompressed size of the resource pack is actually - * stored in the header of the resource itself. Read - * it, and also grab the chunk size and compression type - * (which are not necessarily the defaults from the WIM - * header). */ - struct alt_chunk_table_header_disk hdr; + struct wim_resource_spec *rspec; - ret = full_pread(&wim->in_fd, &hdr, - sizeof(hdr), reshdr.offset_in_wim); - if (ret) - goto err; - - cur_rspec->uncompressed_size = le64_to_cpu(hdr.res_usize); - cur_rspec->offset_in_wim = reshdr.offset_in_wim; - cur_rspec->size_in_wim = reshdr.size_in_wim; - cur_rspec->flags = reshdr.flags; - - /* Compression format numbers must be the same as in - * WIMGAPI to be compatible here. */ - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 1); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 2); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); - cur_rspec->compression_type = le32_to_cpu(hdr.compression_format); - - cur_rspec->chunk_size = le32_to_cpu(hdr.chunk_size); - - DEBUG("Full pack is %"PRIu64" compressed bytes " - "at file offset %"PRIu64" (flags 0x%02x)", - cur_rspec->size_in_wim, - cur_rspec->offset_in_wim, - cur_rspec->flags); - free_lookup_table_entry(cur_entry); - continue; - } + if (unlikely(cur_subpacks)) { + /* This entry terminated a packed run. */ + ret = finish_subpacks(cur_subpacks, + cur_num_subpacks); + cur_subpacks = NULL; + if (ret) + goto out; + } - if (is_zero_hash(cur_entry->hash)) { - free_lookup_table_entry(cur_entry); - continue; - } + /* How to handle an uncompressed resource with its + * uncompressed size different from its compressed size? + * + * Based on a simple test, WIMGAPI seems to handle this + * as follows: + * + * if (size_in_wim > uncompressed_size) { + * Ignore uncompressed_size; use size_in_wim + * instead. + * } else { + * Honor uncompressed_size, but treat the part of + * the file data above size_in_wim as all zeros. + * } + * + * So we will do the same. */ + if (unlikely(!(reshdr.flags & + WIM_RESHDR_FLAG_COMPRESSED) && + (reshdr.size_in_wim > + reshdr.uncompressed_size))) + { + reshdr.uncompressed_size = reshdr.size_in_wim; + } - if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - /* Continuing the pack with another stream. */ - DEBUG("Continuing pack with stream: " - "%"PRIu64" uncompressed bytes @ " - "resource offset %"PRIu64")", - reshdr.size_in_wim, reshdr.offset_in_wim); - } + /* Set up a resource specification for this stream. */ + + rspec = MALLOC(sizeof(struct wim_resource_spec)); + if (!rspec) + goto oom; + + wim_res_hdr_to_spec(&reshdr, wim, rspec); - lte_bind_wim_resource_spec(cur_entry, cur_rspec); - if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - /* In packed runs, the offset field is used for - * in-resource offset, not the in-WIM offset, and the - * size field is used for the uncompressed size, not the - * compressed size. */ - cur_entry->offset_in_res = reshdr.offset_in_wim; - cur_entry->size = reshdr.size_in_wim; - cur_entry->flags = reshdr.flags; - } else { - /* Normal case: The stream corresponds one-to-one with - * the resource entry. */ cur_entry->offset_in_res = 0; cur_entry->size = reshdr.uncompressed_size; cur_entry->flags = reshdr.flags; - cur_rspec = NULL; + + lte_bind_wim_resource_spec(cur_entry, rspec); } - if (cur_entry->flags & WIM_RESHDR_FLAG_METADATA) { - /* Lookup table entry for a metadata resource */ + /* cur_entry is now a stream bound to a resource. */ - /* Metadata entries with no references must be ignored; - * see for example the WinPE WIMs from the WAIK v2.1. - * */ - if (cur_entry->refcnt == 0) { - free_lookup_table_entry(cur_entry); - continue; - } + /* Ignore entries with all zeroes in the hash field. */ + if (is_zero_hash(cur_entry->hash)) + goto free_cur_entry_and_continue; + + /* Verify that the part number matches that of the underlying + * WIM file. */ + if (part_number != wim->hdr.part_number) { + num_wrong_part_entries++; + goto free_cur_entry_and_continue; + } + + if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) { + + /* Lookup table entry for a metadata resource. */ + + /* Metadata entries with no references must be ignored. + * See, for example, the WinPE WIMs from the WAIK v2.1. + */ + if (cur_entry->refcnt == 0) + goto free_cur_entry_and_continue; if (cur_entry->refcnt != 1) { - if (wimlib_print_errors) { - ERROR("Found metadata resource with refcnt != 1:"); - print_lookup_table_entry(cur_entry, stderr); - } + /* We don't currently support this case due to + * the complications of multiple images sharing + * the same metadata resource or a metadata + * resource also being referenced by files. */ + ERROR("Found metadata resource with refcnt != 1"); ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto err; + goto out; } if (wim->hdr.part_number != 1) { WARNING("Ignoring metadata resource found in a " "non-first part of the split WIM"); - free_lookup_table_entry(cur_entry); - continue; + goto free_cur_entry_and_continue; } - if (wim->current_image == wim->hdr.image_count) { - WARNING("The WIM header says there are %u images " - "in the WIM, but we found more metadata " - "resources than this (ignoring the extra)", - wim->hdr.image_count); - free_lookup_table_entry(cur_entry); - continue; + + /* The number of entries in the lookup table with + * WIM_RESHDR_FLAG_METADATA set should be the same as + * the image_count field in the WIM header. */ + if (image_index == wim->hdr.image_count) { + WARNING("Found more metadata resources than images"); + goto free_cur_entry_and_continue; } /* Notice very carefully: We are assigning the metadata - * resources in the exact order mirrored by their lookup - * table entries on disk, which is the behavior of - * Microsoft's software. In particular, this overrides - * the actual locations of the metadata resources - * themselves in the WIM file as well as any information - * written in the XML data. */ - DEBUG("Found metadata resource for image %u at " + * resources to images in the same order in which their + * lookup table entries occur on disk. (This is also + * the behavior of Microsoft's software.) In + * particular, this overrides the actual locations of + * the metadata resources themselves in the WIM file as + * well as any information written in the XML data. */ + DEBUG("Found metadata resource for image %"PRIu32" at " "offset %"PRIu64".", - wim->current_image + 1, - cur_entry->rspec->offset_in_wim); - wim->image_metadata[ - wim->current_image++]->metadata_lte = cur_entry; - continue; - } + image_index + 1, + reshdr.offset_in_wim); + + wim->image_metadata[image_index++]->metadata_lte = cur_entry; + } else { + /* Lookup table entry for a non-metadata stream. */ - /* Lookup table entry for a stream that is not a metadata - * resource. */ - duplicate_entry = lookup_stream(table, cur_entry->hash); - if (duplicate_entry) { - if (wimlib_print_errors) { - WARNING("The WIM lookup table contains two entries with the " - "same SHA1 message digest!"); - WARNING("The first entry is:"); - print_lookup_table_entry(duplicate_entry, stderr); - WARNING("The second entry is:"); - print_lookup_table_entry(cur_entry, stderr); + /* Ignore this stream if it's a duplicate. */ + if (lookup_stream(table, cur_entry->hash)) { + num_duplicate_entries++; + goto free_cur_entry_and_continue; } - free_lookup_table_entry(cur_entry); - continue; + + /* Insert the stream into the in-memory lookup table, + * keyed by its SHA1 message digest. */ + lookup_table_insert(table, cur_entry); } - /* Finally, insert the stream into the lookup table, keyed by - * its SHA1 message digest. */ - lookup_table_insert(table, cur_entry); + continue; + + free_cur_entry_and_continue: + if (cur_subpacks && + cur_entry->resource_location == RESOURCE_IN_WIM) + lte_unbind_wim_resource_spec(cur_entry); + free_lookup_table_entry(cur_entry); } cur_entry = NULL; - /* Validate the last resource. */ - if (cur_rspec != NULL) { - ret = validate_resource(cur_rspec); + if (cur_subpacks) { + /* End of lookup table terminated a packed run. */ + ret = finish_subpacks(cur_subpacks, cur_num_subpacks); + cur_subpacks = NULL; if (ret) - goto err; + goto out; } - if (wim->hdr.part_number == 1 && wim->current_image != wim->hdr.image_count) { - WARNING("The header of \"%"TS"\" says there are %u images in\n" - " the WIM, but we only found %d metadata resources! Acting as if\n" - " the header specified only %d images instead.", - wim->filename, wim->hdr.image_count, - wim->current_image, wim->current_image); - for (int i = wim->current_image; i < wim->hdr.image_count; i++) + if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) { + WARNING("Could not find metadata resources for all images"); + for (u32 i = image_index; i < wim->hdr.image_count; i++) put_image_metadata(wim->image_metadata[i], NULL); - wim->hdr.image_count = wim->current_image; + wim->hdr.image_count = image_index; + } + + if (num_duplicate_entries > 0) { + WARNING("Ignoring %zu duplicate streams in the WIM lookup table", + num_duplicate_entries); } + + if (num_wrong_part_entries > 0) { + WARNING("Ignoring %zu streams with wrong part number", + num_wrong_part_entries); + } + DEBUG("Done reading lookup table."); wim->lookup_table = table; ret = 0; goto out_free_buf; -err: - if (cur_rspec && list_empty(&cur_rspec->stream_list)) - FREE(cur_rspec); +oom: + ERROR("Not enough memory to read lookup table!"); + ret = WIMLIB_ERR_NOMEM; +out: + free_subpack_info(cur_subpacks, cur_num_subpacks); free_lookup_table_entry(cur_entry); free_lookup_table(table); out_free_buf: FREE(buf); -out: - wim->current_image = 0; return ret; } @@ -933,6 +1166,10 @@ put_wim_lookup_table_entry(struct wim_lookup_table_entry_disk *disk_entry, copy_hash(disk_entry->hash, hash); } +/* Note: the list of stream entries must be sorted so that all entries for the + * same packed resource are consecutive. In addition, entries with + * WIM_RESHDR_FLAG_METADATA set must be in the same order as the indices of the + * underlying images. */ int write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct filedes *out_fd, @@ -946,6 +1183,8 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct wim_lookup_table_entry_disk *table_buf_ptr; int ret; u64 prev_res_offset_in_wim = ~0ULL; + u64 prev_uncompressed_size; + u64 logical_offset; table_size = 0; list_for_each_entry(lte, stream_list, lookup_table_list) { @@ -971,38 +1210,46 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, table_buf_ptr = table_buf; prev_res_offset_in_wim = ~0ULL; + prev_uncompressed_size = 0; + logical_offset = 0; list_for_each_entry(lte, stream_list, lookup_table_list) { + if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { + struct wim_reshdr tmp_reshdr; - put_wim_lookup_table_entry(table_buf_ptr++, - <e->out_reshdr, - part_number, - lte->out_refcnt, - lte->hash); - if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS && - lte->out_res_offset_in_wim != prev_res_offset_in_wim) - { - /* Put the main resource entry for the pack. */ + /* Eww. When WIMGAPI sees multiple resource packs, it + * expects the offsets to be adjusted as if there were + * really only one pack. */ - struct wim_reshdr reshdr; + if (lte->out_res_offset_in_wim != prev_res_offset_in_wim) { + /* Put the resource entry for pack */ + tmp_reshdr.offset_in_wim = lte->out_res_offset_in_wim; + tmp_reshdr.size_in_wim = lte->out_res_size_in_wim; + tmp_reshdr.uncompressed_size = WIM_PACK_MAGIC_NUMBER; + tmp_reshdr.flags = WIM_RESHDR_FLAG_PACKED_STREAMS; - reshdr.offset_in_wim = lte->out_res_offset_in_wim; - reshdr.size_in_wim = lte->out_res_size_in_wim; - reshdr.uncompressed_size = WIM_PACK_MAGIC_NUMBER; - reshdr.flags = WIM_RESHDR_FLAG_PACKED_STREAMS; + put_wim_lookup_table_entry(table_buf_ptr++, + &tmp_reshdr, + part_number, + 1, zero_hash); - DEBUG("Putting main entry for pack: " - "size_in_wim=%"PRIu64", " - "offset_in_wim=%"PRIu64", " - "uncompressed_size=%"PRIu64, - reshdr.size_in_wim, - reshdr.offset_in_wim, - reshdr.uncompressed_size); + logical_offset += prev_uncompressed_size; + prev_res_offset_in_wim = lte->out_res_offset_in_wim; + prev_uncompressed_size = lte->out_res_uncompressed_size; + } + tmp_reshdr = lte->out_reshdr; + tmp_reshdr.offset_in_wim += logical_offset; put_wim_lookup_table_entry(table_buf_ptr++, - &reshdr, + &tmp_reshdr, part_number, - 1, zero_hash); - prev_res_offset_in_wim = lte->out_res_offset_in_wim; + lte->out_refcnt, + lte->hash); + } else { + put_wim_lookup_table_entry(table_buf_ptr++, + <e->out_reshdr, + part_number, + lte->out_refcnt, + lte->hash); } } @@ -1024,13 +1271,6 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, return ret; } -int -lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) -{ - lte->real_refcnt = 0; - return 0; -} - int lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) { @@ -1038,16 +1278,6 @@ lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) return 0; } -int -lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore) -{ - if (lte->extracted_file != NULL) { - FREE(lte->extracted_file); - lte->extracted_file = NULL; - } - return 0; -} - /* Allocate a stream entry for the contents of the buffer, or re-use an existing * entry in @lookup_table for the same stream. */ struct wim_lookup_table_entry * @@ -1140,87 +1370,6 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, return 0; } -void -print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) -{ - if (lte == NULL) { - tputc(T('\n'), out); - return; - } - - - tprintf(T("Uncompressed size = %"PRIu64" bytes\n"), - lte->size); - if (lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - tprintf(T("Offset = %"PRIu64" bytes\n"), - lte->offset_in_res); - - tprintf(T("Raw uncompressed size = %"PRIu64" bytes\n"), - lte->rspec->uncompressed_size); - - tprintf(T("Raw compressed size = %"PRIu64" bytes\n"), - lte->rspec->size_in_wim); - - tprintf(T("Raw offset = %"PRIu64" bytes\n"), - lte->rspec->offset_in_wim); - } else if (lte->resource_location == RESOURCE_IN_WIM) { - tprintf(T("Compressed size = %"PRIu64" bytes\n"), - lte->rspec->size_in_wim); - - tprintf(T("Offset = %"PRIu64" bytes\n"), - lte->rspec->offset_in_wim); - } - - tfprintf(out, T("Reference Count = %u\n"), lte->refcnt); - - if (lte->unhashed) { - tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"), - lte->back_inode, lte->back_stream_id); - } else { - tfprintf(out, T("Hash = 0x")); - print_hash(lte->hash, out); - tputc(T('\n'), out); - } - - tfprintf(out, T("Flags = ")); - u8 flags = lte->flags; - if (flags & WIM_RESHDR_FLAG_COMPRESSED) - tfputs(T("WIM_RESHDR_FLAG_COMPRESSED, "), out); - if (flags & WIM_RESHDR_FLAG_FREE) - tfputs(T("WIM_RESHDR_FLAG_FREE, "), out); - if (flags & WIM_RESHDR_FLAG_METADATA) - tfputs(T("WIM_RESHDR_FLAG_METADATA, "), out); - if (flags & WIM_RESHDR_FLAG_SPANNED) - tfputs(T("WIM_RESHDR_FLAG_SPANNED, "), out); - if (flags & WIM_RESHDR_FLAG_PACKED_STREAMS) - tfputs(T("WIM_RESHDR_FLAG_PACKED_STREAMS, "), out); - tputc(T('\n'), out); - switch (lte->resource_location) { - case RESOURCE_IN_WIM: - if (lte->rspec->wim->filename) { - tfprintf(out, T("WIM file = `%"TS"'\n"), - lte->rspec->wim->filename); - } - break; -#ifdef __WIN32__ - case RESOURCE_WIN32_ENCRYPTED: -#endif - case RESOURCE_IN_FILE_ON_DISK: - tfprintf(out, T("File on Disk = `%"TS"'\n"), - lte->file_on_disk); - break; -#ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: - tfprintf(out, T("Staging File = `%"TS"'\n"), - lte->staging_file_name); - break; -#endif - default: - break; - } - tputc(T('\n'), out); -} - void lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, struct wimlib_resource_entry *wentry) @@ -1271,11 +1420,14 @@ wimlib_iterate_lookup_table(WIMStruct *wim, int flags, wimlib_iterate_lookup_table_callback_t cb, void *user_ctx) { + if (flags != 0) + return WIMLIB_ERR_INVALID_PARAM; + struct iterate_lte_context ctx = { .cb = cb, .user_ctx = user_ctx, }; - if (wim->hdr.part_number == 1) { + if (wim_has_metadata(wim)) { int ret; for (int i = 0; i < wim->hdr.image_count; i++) { ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte,