X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Flookup_table.c;h=26f231e4240a148cd98e18f6d0a860be317eacee;hb=dadccd777e56b473128d87e485fcf8565b8cb93b;hp=4cae97ce0cbdc7a69b1e7e66a22dbd8bf1bb42dc;hpb=5d3d469e410dc5f4a28814ad231336fc174cba56;p=wimlib diff --git a/src/lookup_table.c b/src/lookup_table.c index 4cae97ce..26f231e4 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -6,28 +6,30 @@ */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2014 Eric Biggers * - * This file is part of wimlib, a library for working with WIM files. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. - * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif +#include +#include +#include /* for unlink() */ + #include "wimlib/assert.h" #include "wimlib/endianness.h" #include "wimlib/error.h" @@ -35,13 +37,10 @@ #include "wimlib/metadata.h" #include "wimlib/ntfs_3g.h" #include "wimlib/resource.h" +#include "wimlib/unaligned.h" #include "wimlib/util.h" #include "wimlib/write.h" -#include -#include -#include /* for unlink() */ - /* WIM lookup table: * * This is a logical mapping from SHA1 message digests to the data streams @@ -94,17 +93,11 @@ do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore) void free_lookup_table(struct wim_lookup_table *table) { - DEBUG("Freeing lookup table."); - if (table == NULL) - return; - - if (table->array) { - for_lookup_table_entry(table, - do_free_lookup_table_entry, - NULL); + if (table) { + for_lookup_table_entry(table, do_free_lookup_table_entry, NULL); FREE(table->array); + FREE(table); } - FREE(table); } struct wim_lookup_table_entry * @@ -133,7 +126,6 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) if (new == NULL) return NULL; - new->extracted_file = NULL; switch (new->resource_location) { case RESOURCE_IN_WIM: list_add(&new->rspec_node, &new->rspec->stream_list); @@ -141,6 +133,7 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) case RESOURCE_IN_FILE_ON_DISK: #ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: case RESOURCE_WIN32_ENCRYPTED: #endif #ifdef WITH_FUSE @@ -190,54 +183,95 @@ out_free: } void -free_lookup_table_entry(struct wim_lookup_table_entry *lte) +lte_put_resource(struct wim_lookup_table_entry *lte) { - if (lte) { - switch (lte->resource_location) { - case RESOURCE_IN_WIM: - list_del(<e->rspec_node); - if (list_empty(<e->rspec->stream_list)) - FREE(lte->rspec); - break; - case RESOURCE_IN_FILE_ON_DISK: - #ifdef __WIN32__ - case RESOURCE_WIN32_ENCRYPTED: - #endif - #ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: - BUILD_BUG_ON((void*)<e->file_on_disk != - (void*)<e->staging_file_name); - #endif - case RESOURCE_IN_ATTACHED_BUFFER: - BUILD_BUG_ON((void*)<e->file_on_disk != - (void*)<e->attached_buffer); - FREE(lte->file_on_disk); - break; -#ifdef WITH_NTFS_3G - case RESOURCE_IN_NTFS_VOLUME: - if (lte->ntfs_loc) { - FREE(lte->ntfs_loc->path); - FREE(lte->ntfs_loc->stream_name); - FREE(lte->ntfs_loc); - } - break; + switch (lte->resource_location) { + case RESOURCE_IN_WIM: + list_del(<e->rspec_node); + if (list_empty(<e->rspec->stream_list)) + FREE(lte->rspec); + break; + case RESOURCE_IN_FILE_ON_DISK: +#ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: + case RESOURCE_WIN32_ENCRYPTED: #endif - default: - break; +#ifdef WITH_FUSE + case RESOURCE_IN_STAGING_FILE: + BUILD_BUG_ON((void*)<e->file_on_disk != + (void*)<e->staging_file_name); +#endif + case RESOURCE_IN_ATTACHED_BUFFER: + BUILD_BUG_ON((void*)<e->file_on_disk != + (void*)<e->attached_buffer); + FREE(lte->file_on_disk); + break; +#ifdef WITH_NTFS_3G + case RESOURCE_IN_NTFS_VOLUME: + if (lte->ntfs_loc) { + FREE(lte->ntfs_loc->path); + FREE(lte->ntfs_loc->stream_name); + FREE(lte->ntfs_loc); } + break; +#endif + default: + break; + } +} + +void +free_lookup_table_entry(struct wim_lookup_table_entry *lte) +{ + if (lte) { + lte_put_resource(lte); FREE(lte); } } -/* Decrements the reference count for the lookup table entry @lte. If its - * reference count reaches 0, it is unlinked from the lookup table. If, - * furthermore, the entry has no opened file descriptors associated with it, the - * entry is freed. */ +/* Should this stream be retained even if it has no references? */ +static bool +should_retain_lte(const struct wim_lookup_table_entry *lte) +{ + return lte->resource_location == RESOURCE_IN_WIM; +} + +static void +finalize_lte(struct wim_lookup_table_entry *lte) +{ + if (!should_retain_lte(lte)) + free_lookup_table_entry(lte); +} + +/* + * Decrements the reference count of the single-instance stream @lte, which must + * be inserted in the stream lookup table @table. + * + * If the stream's reference count reaches 0, we may unlink it from @table and + * free it. However, we retain streams with 0 reference count that originated + * from WIM files (RESOURCE_IN_WIM). We do this for two reasons: + * + * 1. This prevents information about valid streams in a WIM file --- streams + * which will continue to be present after appending to the WIM file --- from + * being lost merely because we dropped all references to them. + * + * 2. Stream reference counts we read from WIM files can't be trusted. It's + * possible that a WIM has reference counts that are too low; WIMGAPI + * sometimes creates WIMs where this is the case. It's also possible that + * streams have been referenced from an external WIM; those streams can + * potentially have any reference count at all, either lower or higher than + * would be expected for this WIM ("this WIM" meaning the owner of @table) if + * it were a standalone WIM. + * + * So we can't take the reference counts too seriously. But at least, we do + * recalculate by default when writing a new WIM file. + */ void lte_decrement_refcnt(struct wim_lookup_table_entry *lte, struct wim_lookup_table *table) { - wimlib_assert(lte->refcnt != 0); + if (unlikely(lte->refcnt == 0)) /* See comment above */ + return; if (--lte->refcnt == 0) { if (lte->unhashed) { @@ -248,10 +282,12 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, * that there still may be open file descriptors to it.) * */ if (lte->resource_location == RESOURCE_IN_STAGING_FILE) - unlink(lte->staging_file_name); + unlinkat(lte->staging_dir_fd, + lte->staging_file_name, 0); #endif } else { - lookup_table_unlink(table, lte); + if (!should_retain_lte(lte)) + lookup_table_unlink(table, lte); } /* If FUSE mounts are enabled, we don't actually free the entry @@ -260,7 +296,7 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, #ifdef WITH_FUSE if (lte->num_opened_fds == 0) #endif - free_lookup_table_entry(lte); + finalize_lte(lte); } } @@ -271,7 +307,7 @@ lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte) wimlib_assert(lte->num_opened_fds != 0); if (--lte->num_opened_fds == 0 && lte->refcnt == 0) - free_lookup_table_entry(lte); + finalize_lte(lte); } #endif @@ -342,7 +378,7 @@ lookup_stream(const struct wim_lookup_table *table, const u8 hash[]) struct wim_lookup_table_entry *lte; struct hlist_node *pos; - i = *(size_t*)hash % table->capacity; + i = load_size_t_unaligned(hash) % table->capacity; hlist_for_each_entry(lte, pos, &table->array[i], hash_list) if (hashes_equal(hash, lte->hash)) return lte; @@ -379,7 +415,7 @@ for_lookup_table_entry(struct wim_lookup_table *table, * per-resource location order. For example, resources in WIM files are sorted * primarily by part number, then secondarily by offset, as to implement optimal * reading of either a standalone or split WIM. */ -static int +int cmp_streams_by_sequential_order(const void *p1, const void *p2) { const struct wim_lookup_table_entry *lte1, *lte2; @@ -402,7 +438,7 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) /* Different (possibly split) WIMs? */ if (wim1 != wim2) { - v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GID_LEN); + v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN); if (v) return v; } @@ -423,6 +459,7 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) case RESOURCE_IN_STAGING_FILE: #endif #ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: case RESOURCE_WIN32_ENCRYPTED: #endif /* Compare files by path: just a heuristic that will place files @@ -541,7 +578,8 @@ struct wim_lookup_table_entry_disk { /* Which part of the split WIM this stream is in; indexed from 1. */ le16 part_number; - /* Reference count of this stream over all WIM images. */ + /* Reference count of this stream over all WIM images. (But see comment + * above lte_decrement_refcnt().) */ le32 refcnt; /* SHA1 message digest of the uncompressed data of this stream, or @@ -551,64 +589,308 @@ struct wim_lookup_table_entry_disk { #define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50 +/* Given a nonempty run of consecutive lookup table entries with the SOLID flag + * set, count how many specify resources (as opposed to streams within those + * resources). + * + * Returns the resulting count. */ +static size_t +count_solid_resources(const struct wim_lookup_table_entry_disk *entries, size_t max) +{ + size_t count = 0; + do { + struct wim_reshdr reshdr; + + get_wim_reshdr(&(entries++)->reshdr, &reshdr); + + if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) { + /* Run was terminated by a stand-alone stream entry. */ + break; + } + + if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) { + /* This is a resource entry. */ + count++; + } + } while (--max); + return count; +} + +/* + * Given a run of consecutive lookup table entries with the SOLID flag set and + * having @num_rspecs resource entries, load resource information from them into + * the resource specifications in the @rspecs array. + * + * Returns 0 on success, or a nonzero error code on failure. + */ +static int +do_load_solid_info(WIMStruct *wim, struct wim_resource_spec **rspecs, + size_t num_rspecs, + const struct wim_lookup_table_entry_disk *entries) +{ + for (size_t i = 0; i < num_rspecs; i++) { + struct wim_reshdr reshdr; + struct alt_chunk_table_header_disk hdr; + struct wim_resource_spec *rspec; + int ret; + + /* Advance to next resource entry. */ + + do { + get_wim_reshdr(&(entries++)->reshdr, &reshdr); + } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER); + + rspec = rspecs[i]; + + wim_res_hdr_to_spec(&reshdr, wim, rspec); + + /* For solid resources, the uncompressed size, compression type, + * and chunk size are stored in the resource itself, not in the + * lookup table. */ + + ret = full_pread(&wim->in_fd, &hdr, + sizeof(hdr), reshdr.offset_in_wim); + if (ret) { + ERROR("Failed to read header of solid resource " + "(offset_in_wim=%"PRIu64")", + reshdr.offset_in_wim); + return ret; + } + + rspec->uncompressed_size = le64_to_cpu(hdr.res_usize); + + /* Compression format numbers must be the same as in + * WIMGAPI to be compatible here. */ + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); + rspec->compression_type = le32_to_cpu(hdr.compression_format); + + rspec->chunk_size = le32_to_cpu(hdr.chunk_size); + + DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" " + "(%"TS"/%"PRIu32") @ +%"PRIu64"", + i + 1, num_rspecs, + rspec->uncompressed_size, + rspec->size_in_wim, + wimlib_get_compression_type_string(rspec->compression_type), + rspec->chunk_size, + rspec->offset_in_wim); + + } + return 0; +} + +/* + * Given a nonempty run of consecutive lookup table entries with the SOLID flag + * set, allocate a 'struct wim_resource_spec' for each resource within that run. + * + * Returns 0 on success, or a nonzero error code on failure. + * Returns the pointers and count in *rspecs_ret and *num_rspecs_ret. + */ +static int +load_solid_info(WIMStruct *wim, + const struct wim_lookup_table_entry_disk *entries, + size_t num_remaining_entries, + struct wim_resource_spec ***rspecs_ret, + size_t *num_rspecs_ret) +{ + size_t num_rspecs; + struct wim_resource_spec **rspecs; + size_t i; + int ret; + + num_rspecs = count_solid_resources(entries, num_remaining_entries); + rspecs = CALLOC(num_rspecs, sizeof(rspecs[0])); + if (!rspecs) + return WIMLIB_ERR_NOMEM; + + for (i = 0; i < num_rspecs; i++) { + rspecs[i] = MALLOC(sizeof(struct wim_resource_spec)); + if (!rspecs[i]) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_rspecs; + } + } + + ret = do_load_solid_info(wim, rspecs, num_rspecs, entries); + if (ret) + goto out_free_rspecs; + + *rspecs_ret = rspecs; + *num_rspecs_ret = num_rspecs; + return 0; + +out_free_rspecs: + for (i = 0; i < num_rspecs; i++) + FREE(rspecs[i]); + FREE(rspecs); + return ret; +} + +/* Given a 'struct wim_lookup_table_entry' allocated for a stream entry with the + * SOLID flag set, try to bind it to resource in the current solid run. */ +static int +bind_stream_to_solid_resource(const struct wim_reshdr *reshdr, + struct wim_lookup_table_entry *stream, + struct wim_resource_spec **rspecs, + size_t num_rspecs) +{ + u64 offset = reshdr->offset_in_wim; + + /* XXX: This linear search will be slow in the degenerate case where the + * number of solid resources in the run is huge. */ + stream->size = reshdr->size_in_wim; + stream->flags = reshdr->flags; + for (size_t i = 0; i < num_rspecs; i++) { + if (offset + stream->size <= rspecs[i]->uncompressed_size) { + stream->offset_in_res = offset; + lte_bind_wim_resource_spec(stream, rspecs[i]); + return 0; + } + offset -= rspecs[i]->uncompressed_size; + } + ERROR("Stream could not be assigned to a solid resource"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; +} + +static void +free_solid_rspecs(struct wim_resource_spec **rspecs, size_t num_rspecs) +{ + if (rspecs) { + for (size_t i = 0; i < num_rspecs; i++) + if (list_empty(&rspecs[i]->stream_list)) + FREE(rspecs[i]); + FREE(rspecs); + } +} + +static int +cmp_streams_by_offset_in_res(const void *p1, const void *p2) +{ + const struct wim_lookup_table_entry *lte1, *lte2; + + lte1 = *(const struct wim_lookup_table_entry**)p1; + lte2 = *(const struct wim_lookup_table_entry**)p2; + + return cmp_u64(lte1->offset_in_res, lte2->offset_in_res); +} + /* Validate the size and location of a WIM resource. */ static int -validate_resource(const struct wim_resource_spec *rspec) +validate_resource(struct wim_resource_spec *rspec) { struct wim_lookup_table_entry *lte; - u64 cur_offset; + bool out_of_order; + u64 expected_next_offset; + int ret; - /* Verify that calculating the offset of the end of the resource doesn't - * overflow. */ + /* Verify that the resource itself has a valid offset and size. */ if (rspec->offset_in_wim + rspec->size_in_wim < rspec->size_in_wim) - goto invalid; + goto invalid_due_to_overflow; - /* Verify that each stream in the resource has a valid offset and size, - * and that no streams overlap, and that the streams were added in order - * of increasing offset. */ - cur_offset = 0; + /* Verify that each stream in the resource has a valid offset and size. + */ + expected_next_offset = 0; + out_of_order = false; list_for_each_entry(lte, &rspec->stream_list, rspec_node) { if (lte->offset_in_res + lte->size < lte->size || - lte->offset_in_res + lte->size > rspec->uncompressed_size || - lte->offset_in_res < cur_offset) - goto invalid; + lte->offset_in_res + lte->size > rspec->uncompressed_size) + goto invalid_due_to_overflow; - cur_offset = lte->offset_in_res + lte->size; + if (lte->offset_in_res >= expected_next_offset) + expected_next_offset = lte->offset_in_res + lte->size; + else + out_of_order = true; } + + /* If the streams were not located at strictly increasing positions (not + * allowing for overlap), sort them. Then make sure that none overlap. + */ + if (out_of_order) { + ret = sort_stream_list(&rspec->stream_list, + offsetof(struct wim_lookup_table_entry, + rspec_node), + cmp_streams_by_offset_in_res); + if (ret) + return ret; + + expected_next_offset = 0; + list_for_each_entry(lte, &rspec->stream_list, rspec_node) { + if (lte->offset_in_res >= expected_next_offset) + expected_next_offset = lte->offset_in_res + lte->size; + else + goto invalid_due_to_overlap; + } + } + return 0; -invalid: +invalid_due_to_overflow: + ERROR("Invalid resource entry (offset overflow)"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - ERROR("Invalid resource entry!"); +invalid_due_to_overlap: + ERROR("Invalid resource entry (streams in solid resource overlap)"); return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; } +static int +finish_solid_rspecs(struct wim_resource_spec **rspecs, size_t num_rspecs) +{ + int ret = 0; + for (size_t i = 0; i < num_rspecs; i++) { + ret = validate_resource(rspecs[i]); + if (ret) + break; + } + free_solid_rspecs(rspecs, num_rspecs); + return ret; +} + /* - * Reads the lookup table from a WIM file. Each entry specifies a stream that - * the WIM file contains, along with its location and SHA1 message digest. + * Reads the lookup table from a WIM file. Usually, each entry specifies a + * stream that the WIM file contains, along with its location and SHA1 message + * digest. + * + * Saves lookup table entries for non-metadata streams in a hash table (set to + * wim->lookup_table), and saves the metadata entry for each image in a special + * per-image location (the wim->image_metadata array). * - * Saves lookup table entries for non-metadata streams in a hash table, and - * saves the metadata entry for each image in a special per-image location (the - * image_metadata array). + * This works for both version WIM_VERSION_DEFAULT (68864) and version + * WIM_VERSION_SOLID (3584) WIMs. In the latter, a consecutive run of lookup + * table entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid + * run". A solid run logically contains zero or more resources, each of which + * logically contains zero or more streams. Physically, in such a run, a + * "lookup table entry" with uncompressed size SOLID_RESOURCE_MAGIC_NUMBER + * (0x100000000) specifies a resource, whereas any other entry specifies a + * stream. Within such a run, stream entries and resource entries need not be + * in any particular order, except that the order of the resource entries is + * important, as it affects how streams are assigned to resources. See the code + * for details. * - * Return values: + * Possible return values: * WIMLIB_ERR_SUCCESS (0) * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY - * WIMLIB_ERR_RESOURCE_NOT_FOUND + * WIMLIB_ERR_NOMEM * - * Or an error code caused by failure to read the lookup table into memory. + * Or an error code caused by failure to read the lookup table from the WIM + * file. */ int read_wim_lookup_table(WIMStruct *wim) { int ret; - size_t i; size_t num_entries; - struct wim_lookup_table *table; - struct wim_lookup_table_entry *cur_entry, *duplicate_entry; - struct wim_resource_spec *cur_rspec; - void *buf; - bool back_to_back_pack; + void *buf = NULL; + struct wim_lookup_table *table = NULL; + struct wim_lookup_table_entry *cur_entry = NULL; + size_t num_duplicate_entries = 0; + size_t num_wrong_part_entries = 0; + u32 image_index = 0; + struct wim_resource_spec **cur_solid_rspecs = NULL; + size_t cur_num_solid_rspecs = 0; DEBUG("Reading lookup table."); @@ -616,7 +898,7 @@ read_wim_lookup_table(WIMStruct *wim) BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) != WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE); - /* Calculate number of entries in the lookup table. */ + /* Calculate the number of entries in the lookup table. */ num_entries = wim->hdr.lookup_table_reshdr.uncompressed_size / sizeof(struct wim_lookup_table_entry_disk); @@ -628,22 +910,22 @@ read_wim_lookup_table(WIMStruct *wim) /* Allocate a hash table to map SHA1 message digests into stream * specifications. This is the in-memory "lookup table". */ table = new_lookup_table(num_entries * 2 + 1); - if (table == NULL) { - ERROR("Not enough memory to read lookup table."); - ret = WIMLIB_ERR_NOMEM; - goto out_free_buf; - } + if (!table) + goto oom; - /* Allocate and initalize stream entries from the raw lookup table - * buffer. */ - wim->current_image = 0; - cur_rspec = NULL; - for (i = 0; i < num_entries; i++) { + /* Allocate and initalize stream entries ('struct + * wim_lookup_table_entry's) from the raw lookup table buffer. Each of + * these entries will point to a 'struct wim_resource_spec' that + * describes the underlying resource. In WIMs with version number + * WIM_VERSION_SOLID, a resource may contain multiple streams. + */ + for (size_t i = 0; i < num_entries; i++) { const struct wim_lookup_table_entry_disk *disk_entry = &((const struct wim_lookup_table_entry_disk*)buf)[i]; - u16 part_number; struct wim_reshdr reshdr; + u16 part_number; + /* Get the resource header */ get_wim_reshdr(&disk_entry->reshdr, &reshdr); DEBUG("reshdr: size_in_wim=%"PRIu64", " @@ -653,259 +935,224 @@ read_wim_lookup_table(WIMStruct *wim) reshdr.size_in_wim, reshdr.uncompressed_size, reshdr.offset_in_wim, reshdr.flags); + /* Ignore SOLID flag if it isn't supposed to be used in this WIM + * version. */ if (wim->hdr.wim_version == WIM_VERSION_DEFAULT) - reshdr.flags &= ~WIM_RESHDR_FLAG_PACKED_STREAMS; + reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID; + /* Allocate a new 'struct wim_lookup_table_entry'. */ cur_entry = new_lookup_table_entry(); - if (cur_entry == NULL) { - ERROR("Not enough memory to read lookup table!"); - ret = WIMLIB_ERR_NOMEM; - goto err; - } + if (!cur_entry) + goto oom; + /* Get the part number, reference count, and hash. */ part_number = le16_to_cpu(disk_entry->part_number); cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt); copy_hash(cur_entry->hash, disk_entry->hash); - if (part_number != wim->hdr.part_number) { - WARNING("A lookup table entry in part %hu of the WIM " - "points to part %hu (ignoring it)", - wim->hdr.part_number, part_number); - free_lookup_table_entry(cur_entry); - continue; - } - - if (!(reshdr.flags & (WIM_RESHDR_FLAG_PACKED_STREAMS | - WIM_RESHDR_FLAG_COMPRESSED))) { - if (reshdr.uncompressed_size != reshdr.size_in_wim) { - ERROR("Invalid resource entry!"); - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto err; - } - } + if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) { - back_to_back_pack = false; - if (!(reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) || - cur_rspec == NULL || - (back_to_back_pack = - ((reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER && - cur_rspec != NULL && - cur_rspec->size_in_wim != 0))) - { - /* Starting new run of streams that share the same WIM - * resource. */ - struct wim_lookup_table_entry *prev_entry = NULL; + /* SOLID entry */ - if (back_to_back_pack && - !list_empty(&cur_rspec->stream_list)) - { - prev_entry = list_entry(cur_rspec->stream_list.prev, - struct wim_lookup_table_entry, - rspec_node); - lte_unbind_wim_resource_spec(prev_entry); - } - if (cur_rspec != NULL) { - ret = validate_resource(cur_rspec); + if (!cur_solid_rspecs) { + /* Starting new run */ + ret = load_solid_info(wim, disk_entry, + num_entries - i, + &cur_solid_rspecs, + &cur_num_solid_rspecs); if (ret) - goto err; + goto out; } - /* Allocate the resource specification and initialize it - * with values from the current stream entry. */ - cur_rspec = MALLOC(sizeof(*cur_rspec)); - if (cur_rspec == NULL) { - ERROR("Not enough memory to read lookup table!"); - ret = WIMLIB_ERR_NOMEM; - goto err; + if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) { + /* Resource entry, not stream entry */ + goto free_cur_entry_and_continue; } - wim_res_hdr_to_spec(&reshdr, wim, cur_rspec); - if (prev_entry) - lte_bind_wim_resource_spec(prev_entry, cur_rspec); - } + /* Stream entry */ - if ((reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER) - { - /* Found the specification for the packed resource. - * Transfer the values to the `struct - * wim_resource_spec', and discard the current stream - * since this lookup table entry did not, in fact, - * correspond to a "stream". - */ + ret = bind_stream_to_solid_resource(&reshdr, + cur_entry, + cur_solid_rspecs, + cur_num_solid_rspecs); + if (ret) + goto out; - /* Uncompressed size of the resource pack is actually - * stored in the header of the resource itself. Read - * it, and also grab the chunk size and compression type - * (which are not necessarily the defaults from the WIM - * header). */ - struct alt_chunk_table_header_disk hdr; + } else { + /* Normal stream/resource entry; SOLID not set. */ - ret = full_pread(&wim->in_fd, &hdr, - sizeof(hdr), reshdr.offset_in_wim); - if (ret) - goto err; - - cur_rspec->uncompressed_size = le64_to_cpu(hdr.res_usize); - cur_rspec->offset_in_wim = reshdr.offset_in_wim; - cur_rspec->size_in_wim = reshdr.size_in_wim; - cur_rspec->flags = reshdr.flags; - - /* Compression format numbers must be the same as in - * WIMGAPI to be compatible here. */ - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 1); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 2); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); - cur_rspec->compression_type = le32_to_cpu(hdr.compression_format); - - cur_rspec->chunk_size = le32_to_cpu(hdr.chunk_size); - - DEBUG("Full pack is %"PRIu64" compressed bytes " - "at file offset %"PRIu64" (flags 0x%02x)", - cur_rspec->size_in_wim, - cur_rspec->offset_in_wim, - cur_rspec->flags); - free_lookup_table_entry(cur_entry); - continue; - } + struct wim_resource_spec *rspec; - if (is_zero_hash(cur_entry->hash)) { - free_lookup_table_entry(cur_entry); - continue; - } + if (unlikely(cur_solid_rspecs)) { + /* This entry terminated a solid run. */ + ret = finish_solid_rspecs(cur_solid_rspecs, + cur_num_solid_rspecs); + cur_solid_rspecs = NULL; + if (ret) + goto out; + } - if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - /* Continuing the pack with another stream. */ - DEBUG("Continuing pack with stream: " - "%"PRIu64" uncompressed bytes @ " - "resource offset %"PRIu64")", - reshdr.size_in_wim, reshdr.offset_in_wim); - } + /* How to handle an uncompressed resource with its + * uncompressed size different from its compressed size? + * + * Based on a simple test, WIMGAPI seems to handle this + * as follows: + * + * if (size_in_wim > uncompressed_size) { + * Ignore uncompressed_size; use size_in_wim + * instead. + * } else { + * Honor uncompressed_size, but treat the part of + * the file data above size_in_wim as all zeros. + * } + * + * So we will do the same. */ + if (unlikely(!(reshdr.flags & + WIM_RESHDR_FLAG_COMPRESSED) && + (reshdr.size_in_wim > + reshdr.uncompressed_size))) + { + reshdr.uncompressed_size = reshdr.size_in_wim; + } + + /* Set up a resource specification for this stream. */ + + rspec = MALLOC(sizeof(struct wim_resource_spec)); + if (!rspec) + goto oom; + + wim_res_hdr_to_spec(&reshdr, wim, rspec); - lte_bind_wim_resource_spec(cur_entry, cur_rspec); - if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - /* In packed runs, the offset field is used for - * in-resource offset, not the in-WIM offset, and the - * size field is used for the uncompressed size, not the - * compressed size. */ - cur_entry->offset_in_res = reshdr.offset_in_wim; - cur_entry->size = reshdr.size_in_wim; - cur_entry->flags = reshdr.flags; - } else { - /* Normal case: The stream corresponds one-to-one with - * the resource entry. */ cur_entry->offset_in_res = 0; cur_entry->size = reshdr.uncompressed_size; cur_entry->flags = reshdr.flags; - cur_rspec = NULL; + + lte_bind_wim_resource_spec(cur_entry, rspec); } - if (cur_entry->flags & WIM_RESHDR_FLAG_METADATA) { - /* Lookup table entry for a metadata resource */ + /* cur_entry is now a stream bound to a resource. */ - /* Metadata entries with no references must be ignored; - * see for example the WinPE WIMs from the WAIK v2.1. - * */ - if (cur_entry->refcnt == 0) { - free_lookup_table_entry(cur_entry); - continue; - } + /* Ignore entries with all zeroes in the hash field. */ + if (is_zero_hash(cur_entry->hash)) + goto free_cur_entry_and_continue; + + /* Verify that the part number matches that of the underlying + * WIM file. */ + if (part_number != wim->hdr.part_number) { + num_wrong_part_entries++; + goto free_cur_entry_and_continue; + } + + if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) { + + /* Lookup table entry for a metadata resource. */ + + /* Metadata entries with no references must be ignored. + * See, for example, the WinPE WIMs from the WAIK v2.1. + */ + if (cur_entry->refcnt == 0) + goto free_cur_entry_and_continue; if (cur_entry->refcnt != 1) { - if (wimlib_print_errors) { - ERROR("Found metadata resource with refcnt != 1:"); - print_lookup_table_entry(cur_entry, stderr); - } + /* We don't currently support this case due to + * the complications of multiple images sharing + * the same metadata resource or a metadata + * resource also being referenced by files. */ + ERROR("Found metadata resource with refcnt != 1"); ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto err; + goto out; } if (wim->hdr.part_number != 1) { WARNING("Ignoring metadata resource found in a " "non-first part of the split WIM"); - free_lookup_table_entry(cur_entry); - continue; + goto free_cur_entry_and_continue; } - if (wim->current_image == wim->hdr.image_count) { - WARNING("The WIM header says there are %u images " - "in the WIM, but we found more metadata " - "resources than this (ignoring the extra)", - wim->hdr.image_count); - free_lookup_table_entry(cur_entry); - continue; + + /* The number of entries in the lookup table with + * WIM_RESHDR_FLAG_METADATA set should be the same as + * the image_count field in the WIM header. */ + if (image_index == wim->hdr.image_count) { + WARNING("Found more metadata resources than images"); + goto free_cur_entry_and_continue; } /* Notice very carefully: We are assigning the metadata - * resources in the exact order mirrored by their lookup - * table entries on disk, which is the behavior of - * Microsoft's software. In particular, this overrides - * the actual locations of the metadata resources - * themselves in the WIM file as well as any information - * written in the XML data. */ - DEBUG("Found metadata resource for image %u at " + * resources to images in the same order in which their + * lookup table entries occur on disk. (This is also + * the behavior of Microsoft's software.) In + * particular, this overrides the actual locations of + * the metadata resources themselves in the WIM file as + * well as any information written in the XML data. */ + DEBUG("Found metadata resource for image %"PRIu32" at " "offset %"PRIu64".", - wim->current_image + 1, - cur_entry->rspec->offset_in_wim); - wim->image_metadata[ - wim->current_image++]->metadata_lte = cur_entry; - continue; - } + image_index + 1, + reshdr.offset_in_wim); + + wim->image_metadata[image_index++]->metadata_lte = cur_entry; + } else { + /* Lookup table entry for a non-metadata stream. */ - /* Lookup table entry for a stream that is not a metadata - * resource. */ - duplicate_entry = lookup_stream(table, cur_entry->hash); - if (duplicate_entry) { - if (wimlib_print_errors) { - WARNING("The WIM lookup table contains two entries with the " - "same SHA1 message digest!"); - WARNING("The first entry is:"); - print_lookup_table_entry(duplicate_entry, stderr); - WARNING("The second entry is:"); - print_lookup_table_entry(cur_entry, stderr); + /* Ignore this stream if it's a duplicate. */ + if (lookup_stream(table, cur_entry->hash)) { + num_duplicate_entries++; + goto free_cur_entry_and_continue; } - free_lookup_table_entry(cur_entry); - continue; + + /* Insert the stream into the in-memory lookup table, + * keyed by its SHA1 message digest. */ + lookup_table_insert(table, cur_entry); } - /* Finally, insert the stream into the lookup table, keyed by - * its SHA1 message digest. */ - lookup_table_insert(table, cur_entry); + continue; + + free_cur_entry_and_continue: + if (cur_solid_rspecs && + cur_entry->resource_location == RESOURCE_IN_WIM) + lte_unbind_wim_resource_spec(cur_entry); + free_lookup_table_entry(cur_entry); } cur_entry = NULL; - /* Validate the last resource. */ - if (cur_rspec != NULL) { - ret = validate_resource(cur_rspec); + if (cur_solid_rspecs) { + /* End of lookup table terminated a solid run. */ + ret = finish_solid_rspecs(cur_solid_rspecs, cur_num_solid_rspecs); + cur_solid_rspecs = NULL; if (ret) - goto err; + goto out; } - if (wim->hdr.part_number == 1 && wim->current_image != wim->hdr.image_count) { - WARNING("The header of \"%"TS"\" says there are %u images in\n" - " the WIM, but we only found %d metadata resources! Acting as if\n" - " the header specified only %d images instead.", - wim->filename, wim->hdr.image_count, - wim->current_image, wim->current_image); - for (int i = wim->current_image; i < wim->hdr.image_count; i++) + if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) { + WARNING("Could not find metadata resources for all images"); + for (u32 i = image_index; i < wim->hdr.image_count; i++) put_image_metadata(wim->image_metadata[i], NULL); - wim->hdr.image_count = wim->current_image; + wim->hdr.image_count = image_index; } + + if (num_duplicate_entries > 0) { + WARNING("Ignoring %zu duplicate streams in the WIM lookup table", + num_duplicate_entries); + } + + if (num_wrong_part_entries > 0) { + WARNING("Ignoring %zu streams with wrong part number", + num_wrong_part_entries); + } + DEBUG("Done reading lookup table."); wim->lookup_table = table; ret = 0; goto out_free_buf; -err: - if (cur_rspec && list_empty(&cur_rspec->stream_list)) - FREE(cur_rspec); +oom: + ERROR("Not enough memory to read lookup table!"); + ret = WIMLIB_ERR_NOMEM; +out: + free_solid_rspecs(cur_solid_rspecs, cur_num_solid_rspecs); free_lookup_table_entry(cur_entry); free_lookup_table(table); out_free_buf: FREE(buf); -out: - wim->current_image = 0; return ret; } @@ -920,6 +1167,10 @@ put_wim_lookup_table_entry(struct wim_lookup_table_entry_disk *disk_entry, copy_hash(disk_entry->hash, hash); } +/* Note: the list of stream entries must be sorted so that all entries for the + * same solid resource are consecutive. In addition, entries with + * WIM_RESHDR_FLAG_METADATA set must be in the same order as the indices of the + * underlying images. */ int write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct filedes *out_fd, @@ -933,12 +1184,14 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct wim_lookup_table_entry_disk *table_buf_ptr; int ret; u64 prev_res_offset_in_wim = ~0ULL; + u64 prev_uncompressed_size; + u64 logical_offset; table_size = 0; list_for_each_entry(lte, stream_list, lookup_table_list) { table_size += sizeof(struct wim_lookup_table_entry_disk); - if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS && + if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID && lte->out_res_offset_in_wim != prev_res_offset_in_wim) { table_size += sizeof(struct wim_lookup_table_entry_disk); @@ -958,38 +1211,46 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, table_buf_ptr = table_buf; prev_res_offset_in_wim = ~0ULL; + prev_uncompressed_size = 0; + logical_offset = 0; list_for_each_entry(lte, stream_list, lookup_table_list) { + if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { + struct wim_reshdr tmp_reshdr; - put_wim_lookup_table_entry(table_buf_ptr++, - <e->out_reshdr, - part_number, - lte->out_refcnt, - lte->hash); - if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS && - lte->out_res_offset_in_wim != prev_res_offset_in_wim) - { - /* Put the main resource entry for the pack. */ + /* Eww. When WIMGAPI sees multiple solid resources, it + * expects the offsets to be adjusted as if there were + * really only one solid resource. */ - struct wim_reshdr reshdr; + if (lte->out_res_offset_in_wim != prev_res_offset_in_wim) { + /* Put the resource entry for solid resource */ + tmp_reshdr.offset_in_wim = lte->out_res_offset_in_wim; + tmp_reshdr.size_in_wim = lte->out_res_size_in_wim; + tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER; + tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID; - reshdr.offset_in_wim = lte->out_res_offset_in_wim; - reshdr.size_in_wim = lte->out_res_size_in_wim; - reshdr.uncompressed_size = WIM_PACK_MAGIC_NUMBER; - reshdr.flags = WIM_RESHDR_FLAG_PACKED_STREAMS; + put_wim_lookup_table_entry(table_buf_ptr++, + &tmp_reshdr, + part_number, + 1, zero_hash); - DEBUG("Putting main entry for pack: " - "size_in_wim=%"PRIu64", " - "offset_in_wim=%"PRIu64", " - "uncompressed_size=%"PRIu64, - reshdr.size_in_wim, - reshdr.offset_in_wim, - reshdr.uncompressed_size); + logical_offset += prev_uncompressed_size; + prev_res_offset_in_wim = lte->out_res_offset_in_wim; + prev_uncompressed_size = lte->out_res_uncompressed_size; + } + tmp_reshdr = lte->out_reshdr; + tmp_reshdr.offset_in_wim += logical_offset; put_wim_lookup_table_entry(table_buf_ptr++, - &reshdr, + &tmp_reshdr, part_number, - 1, zero_hash); - prev_res_offset_in_wim = lte->out_res_offset_in_wim; + lte->out_refcnt, + lte->hash); + } else { + put_wim_lookup_table_entry(table_buf_ptr++, + <e->out_reshdr, + part_number, + lte->out_refcnt, + lte->hash); } } @@ -1011,30 +1272,6 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, return ret; } -int -lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) -{ - lte->real_refcnt = 0; - return 0; -} - -int -lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) -{ - lte->out_refcnt = 0; - return 0; -} - -int -lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore) -{ - if (lte->extracted_file != NULL) { - FREE(lte->extracted_file); - lte->extracted_file = NULL; - } - return 0; -} - /* Allocate a stream entry for the contents of the buffer, or re-use an existing * entry in @lookup_table for the same stream. */ struct wim_lookup_table_entry * @@ -1127,87 +1364,6 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, return 0; } -void -print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) -{ - if (lte == NULL) { - tputc(T('\n'), out); - return; - } - - - tprintf(T("Uncompressed size = %"PRIu64" bytes\n"), - lte->size); - if (lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - tprintf(T("Offset = %"PRIu64" bytes\n"), - lte->offset_in_res); - - tprintf(T("Raw uncompressed size = %"PRIu64" bytes\n"), - lte->rspec->uncompressed_size); - - tprintf(T("Raw compressed size = %"PRIu64" bytes\n"), - lte->rspec->size_in_wim); - - tprintf(T("Raw offset = %"PRIu64" bytes\n"), - lte->rspec->offset_in_wim); - } else if (lte->resource_location == RESOURCE_IN_WIM) { - tprintf(T("Compressed size = %"PRIu64" bytes\n"), - lte->rspec->size_in_wim); - - tprintf(T("Offset = %"PRIu64" bytes\n"), - lte->rspec->offset_in_wim); - } - - tfprintf(out, T("Reference Count = %u\n"), lte->refcnt); - - if (lte->unhashed) { - tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"), - lte->back_inode, lte->back_stream_id); - } else { - tfprintf(out, T("Hash = 0x")); - print_hash(lte->hash, out); - tputc(T('\n'), out); - } - - tfprintf(out, T("Flags = ")); - u8 flags = lte->flags; - if (flags & WIM_RESHDR_FLAG_COMPRESSED) - tfputs(T("WIM_RESHDR_FLAG_COMPRESSED, "), out); - if (flags & WIM_RESHDR_FLAG_FREE) - tfputs(T("WIM_RESHDR_FLAG_FREE, "), out); - if (flags & WIM_RESHDR_FLAG_METADATA) - tfputs(T("WIM_RESHDR_FLAG_METADATA, "), out); - if (flags & WIM_RESHDR_FLAG_SPANNED) - tfputs(T("WIM_RESHDR_FLAG_SPANNED, "), out); - if (flags & WIM_RESHDR_FLAG_PACKED_STREAMS) - tfputs(T("WIM_RESHDR_FLAG_PACKED_STREAMS, "), out); - tputc(T('\n'), out); - switch (lte->resource_location) { - case RESOURCE_IN_WIM: - if (lte->rspec->wim->filename) { - tfprintf(out, T("WIM file = `%"TS"'\n"), - lte->rspec->wim->filename); - } - break; -#ifdef __WIN32__ - case RESOURCE_WIN32_ENCRYPTED: -#endif - case RESOURCE_IN_FILE_ON_DISK: - tfprintf(out, T("File on Disk = `%"TS"'\n"), - lte->file_on_disk); - break; -#ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: - tfprintf(out, T("Staging File = `%"TS"'\n"), - lte->staging_file_name); - break; -#endif - default: - break; - } - tputc(T('\n'), out); -} - void lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, struct wimlib_resource_entry *wentry) @@ -1217,7 +1373,7 @@ lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, wentry->uncompressed_size = lte->size; if (lte->resource_location == RESOURCE_IN_WIM) { wentry->part_number = lte->rspec->wim->hdr.part_number; - if (lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { + if (lte->flags & WIM_RESHDR_FLAG_SOLID) { wentry->compressed_size = 0; wentry->offset = lte->offset_in_res; } else { @@ -1234,7 +1390,7 @@ lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, wentry->is_metadata = (lte->flags & WIM_RESHDR_FLAG_METADATA) != 0; wentry->is_free = (lte->flags & WIM_RESHDR_FLAG_FREE) != 0; wentry->is_spanned = (lte->flags & WIM_RESHDR_FLAG_SPANNED) != 0; - wentry->packed = (lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) != 0; + wentry->packed = (lte->flags & WIM_RESHDR_FLAG_SOLID) != 0; } struct iterate_lte_context { @@ -1258,11 +1414,14 @@ wimlib_iterate_lookup_table(WIMStruct *wim, int flags, wimlib_iterate_lookup_table_callback_t cb, void *user_ctx) { + if (flags != 0) + return WIMLIB_ERR_INVALID_PARAM; + struct iterate_lte_context ctx = { .cb = cb, .user_ctx = user_ctx, }; - if (wim->hdr.part_number == 1) { + if (wim_has_metadata(wim)) { int ret; for (int i = 0; i < wim->hdr.image_count; i++) { ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte,