X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flookup_table.c;h=0e4f96a3244a0c917097830afeab1972e8a72dd8;hp=1058bf2620d1fad913740a8a8d76ec0dcbdbed91;hb=41c221c509deed7dc9c2bd8eb8c7e93563b21199;hpb=b24275706ca80ffd423fbb91557199f7289191ac diff --git a/src/lookup_table.c b/src/lookup_table.c index 1058bf26..0e4f96a3 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2014 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -94,17 +94,11 @@ do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore) void free_lookup_table(struct wim_lookup_table *table) { - DEBUG("Freeing lookup table."); - if (table == NULL) - return; - - if (table->array) { - for_lookup_table_entry(table, - do_free_lookup_table_entry, - NULL); + if (table) { + for_lookup_table_entry(table, do_free_lookup_table_entry, NULL); FREE(table->array); + FREE(table); } - FREE(table); } struct wim_lookup_table_entry * @@ -133,7 +127,6 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) if (new == NULL) return NULL; - new->extracted_file = NULL; switch (new->resource_location) { case RESOURCE_IN_WIM: list_add(&new->rspec_node, &new->rspec->stream_list); @@ -141,6 +134,7 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) case RESOURCE_IN_FILE_ON_DISK: #ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: case RESOURCE_WIN32_ENCRYPTED: #endif #ifdef WITH_FUSE @@ -200,6 +194,7 @@ lte_put_resource(struct wim_lookup_table_entry *lte) break; case RESOURCE_IN_FILE_ON_DISK: #ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: case RESOURCE_WIN32_ENCRYPTED: #endif #ifdef WITH_FUSE @@ -235,10 +230,30 @@ free_lookup_table_entry(struct wim_lookup_table_entry *lte) } } -/* Decrements the reference count for the lookup table entry @lte. If its - * reference count reaches 0, it is unlinked from the lookup table. If, - * furthermore, the entry has no opened file descriptors associated with it, the - * entry is freed. */ +/* Should this stream be retained even if it has no references? */ +static bool +should_retain_lte(const struct wim_lookup_table_entry *lte) +{ + return lte->resource_location == RESOURCE_IN_WIM; +} + +static void +finalize_lte(struct wim_lookup_table_entry *lte) +{ + if (!should_retain_lte(lte)) + free_lookup_table_entry(lte); +} + +/* + * Decrements the reference count for the lookup table entry @lte, which must be + * inserted in the stream lookup table @table. + * + * If the reference count reaches 0, this may cause @lte to be destroyed. + * However, we may retain entries with 0 reference count. This does not affect + * correctness, but it prevents the entries for valid streams in a WIM archive, + * which will continue to be present after appending to the file, from being + * lost merely because we dropped all references to them. + */ void lte_decrement_refcnt(struct wim_lookup_table_entry *lte, struct wim_lookup_table *table) @@ -254,10 +269,12 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, * that there still may be open file descriptors to it.) * */ if (lte->resource_location == RESOURCE_IN_STAGING_FILE) - unlink(lte->staging_file_name); + unlinkat(lte->staging_dir_fd, + lte->staging_file_name, 0); #endif } else { - lookup_table_unlink(table, lte); + if (!should_retain_lte(lte)) + lookup_table_unlink(table, lte); } /* If FUSE mounts are enabled, we don't actually free the entry @@ -266,7 +283,7 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, #ifdef WITH_FUSE if (lte->num_opened_fds == 0) #endif - free_lookup_table_entry(lte); + finalize_lte(lte); } } @@ -277,7 +294,7 @@ lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte) wimlib_assert(lte->num_opened_fds != 0); if (--lte->num_opened_fds == 0 && lte->refcnt == 0) - free_lookup_table_entry(lte); + finalize_lte(lte); } #endif @@ -408,7 +425,7 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) /* Different (possibly split) WIMs? */ if (wim1 != wim2) { - v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GID_LEN); + v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN); if (v) return v; } @@ -429,6 +446,7 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) case RESOURCE_IN_STAGING_FILE: #endif #ifdef __WIN32__ + case RESOURCE_IN_WINNT_FILE_ON_DISK: case RESOURCE_WIN32_ENCRYPTED: #endif /* Compare files by path: just a heuristic that will place files @@ -557,64 +575,307 @@ struct wim_lookup_table_entry_disk { #define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50 +/* Given a nonempty run of consecutive lookup table entries with the + * PACKED_STREAMS flag set, count how many specify resources (as opposed to + * streams within those resources). + * + * Returns the resulting count. */ +static size_t +count_subpacks(const struct wim_lookup_table_entry_disk *entries, size_t max) +{ + size_t count = 0; + do { + struct wim_reshdr reshdr; + + get_wim_reshdr(&(entries++)->reshdr, &reshdr); + + if (!(reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) { + /* Run was terminated by a stand-alone stream entry. */ + break; + } + + if (reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER) { + /* This is a resource entry. */ + count++; + } + } while (--max); + return count; +} + +/* Given a run of consecutive lookup table entries with the PACKED_STREAMS flag + * set and having @num_subpacks resource entries, load resource information from + * them into the resource specifications in the @subpacks array. + * + * Returns 0 on success, or a nonzero error code on failure. */ +static int +do_load_subpack_info(WIMStruct *wim, struct wim_resource_spec **subpacks, + size_t num_subpacks, + const struct wim_lookup_table_entry_disk *entries) +{ + for (size_t i = 0; i < num_subpacks; i++) { + struct wim_reshdr reshdr; + struct alt_chunk_table_header_disk hdr; + struct wim_resource_spec *rspec; + int ret; + + /* Advance to next resource entry. */ + + do { + get_wim_reshdr(&(entries++)->reshdr, &reshdr); + } while (reshdr.uncompressed_size != WIM_PACK_MAGIC_NUMBER); + + rspec = subpacks[i]; + + wim_res_hdr_to_spec(&reshdr, wim, rspec); + + /* For packed resources, the uncompressed size, compression + * type, and chunk size are stored in the resource itself, not + * in the lookup table. */ + + ret = full_pread(&wim->in_fd, &hdr, + sizeof(hdr), reshdr.offset_in_wim); + if (ret) { + ERROR("Failed to read header of packed resource " + "(offset_in_wim=%"PRIu64")", + reshdr.offset_in_wim); + return ret; + } + + rspec->uncompressed_size = le64_to_cpu(hdr.res_usize); + + /* Compression format numbers must be the same as in + * WIMGAPI to be compatible here. */ + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); + rspec->compression_type = le32_to_cpu(hdr.compression_format); + + rspec->chunk_size = le32_to_cpu(hdr.chunk_size); + + DEBUG("Subpack %zu/%zu: %"PRIu64" => %"PRIu64" " + "(%"TS"/%"PRIu32") @ +%"PRIu64"", + i + 1, num_subpacks, + rspec->uncompressed_size, + rspec->size_in_wim, + wimlib_get_compression_type_string(rspec->compression_type), + rspec->chunk_size, + rspec->offset_in_wim); + + } + return 0; +} + +/* Given a nonempty run of consecutive lookup table entries with the + * PACKED_STREAMS flag set, allocate a 'struct wim_resource_spec' for each + * resource within that run. + * + * Returns 0 on success, or a nonzero error code on failure. + * Returns the pointers and count in *subpacks_ret and *num_subpacks_ret. + */ +static int +load_subpack_info(WIMStruct *wim, + const struct wim_lookup_table_entry_disk *entries, + size_t num_remaining_entries, + struct wim_resource_spec ***subpacks_ret, + size_t *num_subpacks_ret) +{ + size_t num_subpacks; + struct wim_resource_spec **subpacks; + size_t i; + int ret; + + num_subpacks = count_subpacks(entries, num_remaining_entries); + subpacks = CALLOC(num_subpacks, sizeof(subpacks[0])); + if (!subpacks) + return WIMLIB_ERR_NOMEM; + + for (i = 0; i < num_subpacks; i++) { + subpacks[i] = MALLOC(sizeof(struct wim_resource_spec)); + if (!subpacks[i]) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_subpacks; + } + } + + ret = do_load_subpack_info(wim, subpacks, num_subpacks, entries); + if (ret) + goto out_free_subpacks; + + *subpacks_ret = subpacks; + *num_subpacks_ret = num_subpacks; + return 0; + +out_free_subpacks: + for (i = 0; i < num_subpacks; i++) + FREE(subpacks[i]); + FREE(subpacks); + return ret; +} + +/* Given a 'struct wim_lookup_table_entry' allocated for a stream entry with + * PACKED_STREAMS set, try to bind it to a subpack of the current PACKED_STREAMS + * run. */ +static int +bind_stream_to_subpack(const struct wim_reshdr *reshdr, + struct wim_lookup_table_entry *stream, + struct wim_resource_spec **subpacks, + size_t num_subpacks) +{ + u64 offset = reshdr->offset_in_wim; + + /* XXX: This linear search will be slow in the degenerate case where the + * number of subpacks is huge. */ + stream->size = reshdr->size_in_wim; + stream->flags = reshdr->flags; + for (size_t i = 0; i < num_subpacks; i++) { + if (offset + stream->size <= subpacks[i]->uncompressed_size) { + stream->offset_in_res = offset; + lte_bind_wim_resource_spec(stream, subpacks[i]); + return 0; + } + offset -= subpacks[i]->uncompressed_size; + } + ERROR("Packed stream could not be assigned to any resource"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; +} + +static void +free_subpack_info(struct wim_resource_spec **subpacks, size_t num_subpacks) +{ + if (subpacks) { + for (size_t i = 0; i < num_subpacks; i++) + if (list_empty(&subpacks[i]->stream_list)) + FREE(subpacks[i]); + FREE(subpacks); + } +} + +static int +cmp_streams_by_offset_in_res(const void *p1, const void *p2) +{ + const struct wim_lookup_table_entry *lte1, *lte2; + + lte1 = *(const struct wim_lookup_table_entry**)p1; + lte2 = *(const struct wim_lookup_table_entry**)p2; + + return cmp_u64(lte1->offset_in_res, lte2->offset_in_res); +} + /* Validate the size and location of a WIM resource. */ static int -validate_resource(const struct wim_resource_spec *rspec) +validate_resource(struct wim_resource_spec *rspec) { struct wim_lookup_table_entry *lte; - u64 cur_offset; + bool out_of_order; + u64 expected_next_offset; + int ret; - /* Verify that calculating the offset of the end of the resource doesn't - * overflow. */ + /* Verify that the resource itself has a valid offset and size. */ if (rspec->offset_in_wim + rspec->size_in_wim < rspec->size_in_wim) - goto invalid; + goto invalid_due_to_overflow; - /* Verify that each stream in the resource has a valid offset and size, - * and that no streams overlap, and that the streams were added in order - * of increasing offset. */ - cur_offset = 0; + /* Verify that each stream in the resource has a valid offset and size. + */ + expected_next_offset = 0; + out_of_order = false; list_for_each_entry(lte, &rspec->stream_list, rspec_node) { if (lte->offset_in_res + lte->size < lte->size || - lte->offset_in_res + lte->size > rspec->uncompressed_size || - lte->offset_in_res < cur_offset) - goto invalid; + lte->offset_in_res + lte->size > rspec->uncompressed_size) + goto invalid_due_to_overflow; - cur_offset = lte->offset_in_res + lte->size; + if (lte->offset_in_res >= expected_next_offset) + expected_next_offset = lte->offset_in_res + lte->size; + else + out_of_order = true; } + + /* If the streams were not located at strictly increasing positions (not + * allowing for overlap), sort them. Then make sure that none overlap. + */ + if (out_of_order) { + ret = sort_stream_list(&rspec->stream_list, + offsetof(struct wim_lookup_table_entry, + rspec_node), + cmp_streams_by_offset_in_res); + if (ret) + return ret; + + expected_next_offset = 0; + list_for_each_entry(lte, &rspec->stream_list, rspec_node) { + if (lte->offset_in_res >= expected_next_offset) + expected_next_offset = lte->offset_in_res + lte->size; + else + goto invalid_due_to_overlap; + } + } + return 0; -invalid: +invalid_due_to_overflow: + ERROR("Invalid resource entry (offset overflow)"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - ERROR("Invalid resource entry!"); +invalid_due_to_overlap: + ERROR("Invalid resource entry (streams in packed resource overlap)"); return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; } +static int +finish_subpacks(struct wim_resource_spec **subpacks, size_t num_subpacks) +{ + int ret = 0; + for (size_t i = 0; i < num_subpacks; i++) { + ret = validate_resource(subpacks[i]); + if (ret) + break; + } + free_subpack_info(subpacks, num_subpacks); + return ret; +} + /* - * Reads the lookup table from a WIM file. Each entry specifies a stream that - * the WIM file contains, along with its location and SHA1 message digest. + * Reads the lookup table from a WIM file. Usually, each entry specifies a + * stream that the WIM file contains, along with its location and SHA1 message + * digest. + * + * Saves lookup table entries for non-metadata streams in a hash table (set to + * wim->lookup_table), and saves the metadata entry for each image in a special + * per-image location (the wim->image_metadata array). * - * Saves lookup table entries for non-metadata streams in a hash table, and - * saves the metadata entry for each image in a special per-image location (the - * image_metadata array). + * This works for both version WIM_VERSION_DEFAULT (68864) and version + * WIM_VERSION_PACKED_STREAMS (3584) WIMs. In the latter, a consecutive run of + * lookup table entries that all have flag WIM_RESHDR_FLAG_PACKED_STREAMS (0x10) + * set is a "packed run". A packed run logically contains zero or more + * resources, each of which logically contains zero or more streams. + * Physically, in such a run, a "lookup table entry" with uncompressed size + * WIM_PACK_MAGIC_NUMBER (0x100000000) specifies a resource, whereas any other + * entry specifies a stream. Within such a run, stream entries and resource + * entries need not be in any particular order, except that the order of the + * resource entries is important, as it affects how streams are assigned to + * resources. See the code for details. * - * Return values: + * Possible return values: * WIMLIB_ERR_SUCCESS (0) * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY - * WIMLIB_ERR_RESOURCE_NOT_FOUND + * WIMLIB_ERR_NOMEM * - * Or an error code caused by failure to read the lookup table into memory. + * Or an error code caused by failure to read the lookup table from the WIM + * file. */ int read_wim_lookup_table(WIMStruct *wim) { int ret; - size_t i; size_t num_entries; - struct wim_lookup_table *table; - struct wim_lookup_table_entry *cur_entry, *duplicate_entry; - struct wim_resource_spec *cur_rspec; - void *buf; - bool back_to_back_pack; + void *buf = NULL; + struct wim_lookup_table *table = NULL; + struct wim_lookup_table_entry *cur_entry = NULL; + size_t num_duplicate_entries = 0; + size_t num_wrong_part_entries = 0; + u32 image_index = 0; + struct wim_resource_spec **cur_subpacks = NULL; + size_t cur_num_subpacks = 0; DEBUG("Reading lookup table."); @@ -622,7 +883,7 @@ read_wim_lookup_table(WIMStruct *wim) BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) != WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE); - /* Calculate number of entries in the lookup table. */ + /* Calculate the number of entries in the lookup table. */ num_entries = wim->hdr.lookup_table_reshdr.uncompressed_size / sizeof(struct wim_lookup_table_entry_disk); @@ -634,22 +895,22 @@ read_wim_lookup_table(WIMStruct *wim) /* Allocate a hash table to map SHA1 message digests into stream * specifications. This is the in-memory "lookup table". */ table = new_lookup_table(num_entries * 2 + 1); - if (table == NULL) { - ERROR("Not enough memory to read lookup table."); - ret = WIMLIB_ERR_NOMEM; - goto out_free_buf; - } + if (!table) + goto oom; - /* Allocate and initalize stream entries from the raw lookup table - * buffer. */ - wim->current_image = 0; - cur_rspec = NULL; - for (i = 0; i < num_entries; i++) { + /* Allocate and initalize stream entries ('struct + * wim_lookup_table_entry's) from the raw lookup table buffer. Each of + * these entries will point to a 'struct wim_resource_spec' that + * describes the underlying resource. In WIMs with version number + * WIM_VERSION_PACKED_STREAMS, a resource may contain multiple streams. + */ + for (size_t i = 0; i < num_entries; i++) { const struct wim_lookup_table_entry_disk *disk_entry = &((const struct wim_lookup_table_entry_disk*)buf)[i]; - u16 part_number; struct wim_reshdr reshdr; + u16 part_number; + /* Get the resource header */ get_wim_reshdr(&disk_entry->reshdr, &reshdr); DEBUG("reshdr: size_in_wim=%"PRIu64", " @@ -659,281 +920,225 @@ read_wim_lookup_table(WIMStruct *wim) reshdr.size_in_wim, reshdr.uncompressed_size, reshdr.offset_in_wim, reshdr.flags); + /* Ignore PACKED_STREAMS flag if it isn't supposed to be used in + * this WIM version. */ if (wim->hdr.wim_version == WIM_VERSION_DEFAULT) reshdr.flags &= ~WIM_RESHDR_FLAG_PACKED_STREAMS; + /* Allocate a new 'struct wim_lookup_table_entry'. */ cur_entry = new_lookup_table_entry(); - if (cur_entry == NULL) { - ERROR("Not enough memory to read lookup table!"); - ret = WIMLIB_ERR_NOMEM; - goto err; - } + if (!cur_entry) + goto oom; + /* Get the part number, reference count, and hash. */ part_number = le16_to_cpu(disk_entry->part_number); cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt); copy_hash(cur_entry->hash, disk_entry->hash); - if (part_number != wim->hdr.part_number) { - WARNING("A lookup table entry in part %hu of the WIM " - "points to part %hu (ignoring it)", - wim->hdr.part_number, part_number); - free_lookup_table_entry(cur_entry); - continue; - } - - if (!(reshdr.flags & (WIM_RESHDR_FLAG_PACKED_STREAMS | - WIM_RESHDR_FLAG_COMPRESSED))) { - if (reshdr.uncompressed_size != reshdr.size_in_wim) { - /* So ... This is an uncompressed resource, but - * its uncompressed size is NOT the same as its - * "compressed" size (size_in_wim). What to do - * with it? - * - * Based on a simple test, WIMGAPI seems to - * handle this as follows: - * - * if (size_in_wim > uncompressed_size) { - * Ignore uncompressed_size; use - * size_in_wim instead. - * } else { - * Honor uncompressed_size, but treat the - * part of the file data above size_in_wim - * as all zeros. - * } - * - * So we will do the same. - */ - if (reshdr.size_in_wim > reshdr.uncompressed_size) - reshdr.uncompressed_size = reshdr.size_in_wim; - } - } + if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - back_to_back_pack = false; - if (!(reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) || - cur_rspec == NULL || - (back_to_back_pack = - ((reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER && - cur_rspec != NULL && - cur_rspec->size_in_wim != 0))) - { - /* Starting new run of streams that share the same WIM - * resource. */ - struct wim_lookup_table_entry *prev_entry = NULL; + /* PACKED_STREAMS entry */ - if (back_to_back_pack && - !list_empty(&cur_rspec->stream_list)) - { - prev_entry = list_entry(cur_rspec->stream_list.prev, - struct wim_lookup_table_entry, - rspec_node); - lte_unbind_wim_resource_spec(prev_entry); - } - if (cur_rspec != NULL) { - ret = validate_resource(cur_rspec); + if (!cur_subpacks) { + /* Starting new run */ + ret = load_subpack_info(wim, disk_entry, + num_entries - i, + &cur_subpacks, + &cur_num_subpacks); if (ret) - goto err; + goto out; } - /* Allocate the resource specification and initialize it - * with values from the current stream entry. */ - cur_rspec = MALLOC(sizeof(*cur_rspec)); - if (cur_rspec == NULL) { - ERROR("Not enough memory to read lookup table!"); - ret = WIMLIB_ERR_NOMEM; - goto err; - } - wim_res_hdr_to_spec(&reshdr, wim, cur_rspec); - - /* If this is a packed run, the current stream entry may - * specify a stream within the resource, and not the - * resource itself. Zero possibly irrelevant data until - * it is read for certain. (Note that the computation - * of 'back_to_back_pack' tests if 'size_in_wim' is - * nonzero to see if the resource info has been read; - * hence we need to set it to 0 here.) */ - if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - cur_rspec->size_in_wim = 0; - cur_rspec->uncompressed_size = 0; - cur_rspec->offset_in_wim = 0; + if (reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER) { + /* Resource entry, not stream entry */ + goto free_cur_entry_and_continue; } - if (prev_entry) - lte_bind_wim_resource_spec(prev_entry, cur_rspec); - } + /* Stream entry */ - if ((reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER) - { - /* Found the specification for the packed resource. - * Transfer the values to the `struct - * wim_resource_spec', and discard the current stream - * since this lookup table entry did not, in fact, - * correspond to a "stream". + ret = bind_stream_to_subpack(&reshdr, + cur_entry, + cur_subpacks, + cur_num_subpacks); + if (ret) + goto out; + + } else { + /* Normal stream/resource entry; PACKED_STREAMS not set. */ - /* Uncompressed size of the resource pack is actually - * stored in the header of the resource itself. Read - * it, and also grab the chunk size and compression type - * (which are not necessarily the defaults from the WIM - * header). */ - struct alt_chunk_table_header_disk hdr; + struct wim_resource_spec *rspec; - ret = full_pread(&wim->in_fd, &hdr, - sizeof(hdr), reshdr.offset_in_wim); - if (ret) - goto err; - - cur_rspec->uncompressed_size = le64_to_cpu(hdr.res_usize); - cur_rspec->offset_in_wim = reshdr.offset_in_wim; - cur_rspec->size_in_wim = reshdr.size_in_wim; - cur_rspec->flags = reshdr.flags; - - /* Compression format numbers must be the same as in - * WIMGAPI to be compatible here. */ - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); - cur_rspec->compression_type = le32_to_cpu(hdr.compression_format); - - cur_rspec->chunk_size = le32_to_cpu(hdr.chunk_size); - - DEBUG("Full pack is %"PRIu64" compressed bytes " - "at file offset %"PRIu64" (flags 0x%02x)", - cur_rspec->size_in_wim, - cur_rspec->offset_in_wim, - cur_rspec->flags); - free_lookup_table_entry(cur_entry); - continue; - } + if (unlikely(cur_subpacks)) { + /* This entry terminated a packed run. */ + ret = finish_subpacks(cur_subpacks, + cur_num_subpacks); + cur_subpacks = NULL; + if (ret) + goto out; + } - if (is_zero_hash(cur_entry->hash)) { - free_lookup_table_entry(cur_entry); - continue; - } + /* How to handle an uncompressed resource with its + * uncompressed size different from its compressed size? + * + * Based on a simple test, WIMGAPI seems to handle this + * as follows: + * + * if (size_in_wim > uncompressed_size) { + * Ignore uncompressed_size; use size_in_wim + * instead. + * } else { + * Honor uncompressed_size, but treat the part of + * the file data above size_in_wim as all zeros. + * } + * + * So we will do the same. */ + if (unlikely(!(reshdr.flags & + WIM_RESHDR_FLAG_COMPRESSED) && + (reshdr.size_in_wim > + reshdr.uncompressed_size))) + { + reshdr.uncompressed_size = reshdr.size_in_wim; + } - if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - /* Continuing the pack with another stream. */ - DEBUG("Continuing pack with stream: " - "%"PRIu64" uncompressed bytes @ " - "resource offset %"PRIu64")", - reshdr.size_in_wim, reshdr.offset_in_wim); - } + /* Set up a resource specification for this stream. */ + + rspec = MALLOC(sizeof(struct wim_resource_spec)); + if (!rspec) + goto oom; + + wim_res_hdr_to_spec(&reshdr, wim, rspec); - lte_bind_wim_resource_spec(cur_entry, cur_rspec); - if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - /* In packed runs, the offset field is used for - * in-resource offset, not the in-WIM offset, and the - * size field is used for the uncompressed size, not the - * compressed size. */ - cur_entry->offset_in_res = reshdr.offset_in_wim; - cur_entry->size = reshdr.size_in_wim; - cur_entry->flags = reshdr.flags; - } else { - /* Normal case: The stream corresponds one-to-one with - * the resource entry. */ cur_entry->offset_in_res = 0; cur_entry->size = reshdr.uncompressed_size; cur_entry->flags = reshdr.flags; - cur_rspec = NULL; + + lte_bind_wim_resource_spec(cur_entry, rspec); } - if (cur_entry->flags & WIM_RESHDR_FLAG_METADATA) { - /* Lookup table entry for a metadata resource */ + /* cur_entry is now a stream bound to a resource. */ - /* Metadata entries with no references must be ignored; - * see for example the WinPE WIMs from the WAIK v2.1. - * */ - if (cur_entry->refcnt == 0) { - free_lookup_table_entry(cur_entry); - continue; - } + /* Ignore entries with all zeroes in the hash field. */ + if (is_zero_hash(cur_entry->hash)) + goto free_cur_entry_and_continue; + + /* Verify that the part number matches that of the underlying + * WIM file. */ + if (part_number != wim->hdr.part_number) { + num_wrong_part_entries++; + goto free_cur_entry_and_continue; + } + + if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) { + + /* Lookup table entry for a metadata resource. */ + + /* Metadata entries with no references must be ignored. + * See, for example, the WinPE WIMs from the WAIK v2.1. + */ + if (cur_entry->refcnt == 0) + goto free_cur_entry_and_continue; if (cur_entry->refcnt != 1) { + /* We don't currently support this case due to + * the complications of multiple images sharing + * the same metadata resource or a metadata + * resource also being referenced by files. */ ERROR("Found metadata resource with refcnt != 1"); ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto err; + goto out; } if (wim->hdr.part_number != 1) { WARNING("Ignoring metadata resource found in a " "non-first part of the split WIM"); - free_lookup_table_entry(cur_entry); - continue; + goto free_cur_entry_and_continue; } - if (wim->current_image == wim->hdr.image_count) { - WARNING("The WIM header says there are %u images " - "in the WIM, but we found more metadata " - "resources than this (ignoring the extra)", - wim->hdr.image_count); - free_lookup_table_entry(cur_entry); - continue; + + /* The number of entries in the lookup table with + * WIM_RESHDR_FLAG_METADATA set should be the same as + * the image_count field in the WIM header. */ + if (image_index == wim->hdr.image_count) { + WARNING("Found more metadata resources than images"); + goto free_cur_entry_and_continue; } /* Notice very carefully: We are assigning the metadata - * resources in the exact order mirrored by their lookup - * table entries on disk, which is the behavior of - * Microsoft's software. In particular, this overrides - * the actual locations of the metadata resources - * themselves in the WIM file as well as any information - * written in the XML data. */ - DEBUG("Found metadata resource for image %u at " + * resources to images in the same order in which their + * lookup table entries occur on disk. (This is also + * the behavior of Microsoft's software.) In + * particular, this overrides the actual locations of + * the metadata resources themselves in the WIM file as + * well as any information written in the XML data. */ + DEBUG("Found metadata resource for image %"PRIu32" at " "offset %"PRIu64".", - wim->current_image + 1, - cur_entry->rspec->offset_in_wim); - wim->image_metadata[ - wim->current_image++]->metadata_lte = cur_entry; - continue; - } + image_index + 1, + reshdr.offset_in_wim); - /* Lookup table entry for a stream that is not a metadata - * resource. */ - duplicate_entry = lookup_stream(table, cur_entry->hash); - if (duplicate_entry) { - WARNING("The WIM lookup table contains two entries " - "with the same SHA1 message digest!"); - free_lookup_table_entry(cur_entry); - continue; + wim->image_metadata[image_index++]->metadata_lte = cur_entry; + } else { + /* Lookup table entry for a non-metadata stream. */ + + /* Ignore this stream if it's a duplicate. */ + if (lookup_stream(table, cur_entry->hash)) { + num_duplicate_entries++; + goto free_cur_entry_and_continue; + } + + /* Insert the stream into the in-memory lookup table, + * keyed by its SHA1 message digest. */ + lookup_table_insert(table, cur_entry); } - /* Finally, insert the stream into the lookup table, keyed by - * its SHA1 message digest. */ - lookup_table_insert(table, cur_entry); + continue; + + free_cur_entry_and_continue: + if (cur_subpacks && + cur_entry->resource_location == RESOURCE_IN_WIM) + lte_unbind_wim_resource_spec(cur_entry); + free_lookup_table_entry(cur_entry); } cur_entry = NULL; - /* Validate the last resource. */ - if (cur_rspec != NULL) { - ret = validate_resource(cur_rspec); + if (cur_subpacks) { + /* End of lookup table terminated a packed run. */ + ret = finish_subpacks(cur_subpacks, cur_num_subpacks); + cur_subpacks = NULL; if (ret) - goto err; + goto out; } - if (wim->hdr.part_number == 1 && wim->current_image != wim->hdr.image_count) { - WARNING("The header of \"%"TS"\" says there are %u images in\n" - " the WIM, but we only found %d metadata resources! Acting as if\n" - " the header specified only %d images instead.", - wim->filename, wim->hdr.image_count, - wim->current_image, wim->current_image); - for (int i = wim->current_image; i < wim->hdr.image_count; i++) + if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) { + WARNING("Could not find metadata resources for all images"); + for (u32 i = image_index; i < wim->hdr.image_count; i++) put_image_metadata(wim->image_metadata[i], NULL); - wim->hdr.image_count = wim->current_image; + wim->hdr.image_count = image_index; } + + if (num_duplicate_entries > 0) { + WARNING("Ignoring %zu duplicate streams in the WIM lookup table", + num_duplicate_entries); + } + + if (num_wrong_part_entries > 0) { + WARNING("Ignoring %zu streams with wrong part number", + num_wrong_part_entries); + } + DEBUG("Done reading lookup table."); wim->lookup_table = table; ret = 0; goto out_free_buf; -err: - if (cur_rspec && list_empty(&cur_rspec->stream_list)) - FREE(cur_rspec); +oom: + ERROR("Not enough memory to read lookup table!"); + ret = WIMLIB_ERR_NOMEM; +out: + free_subpack_info(cur_subpacks, cur_num_subpacks); free_lookup_table_entry(cur_entry); free_lookup_table(table); out_free_buf: FREE(buf); -out: - wim->current_image = 0; return ret; } @@ -948,6 +1153,10 @@ put_wim_lookup_table_entry(struct wim_lookup_table_entry_disk *disk_entry, copy_hash(disk_entry->hash, hash); } +/* Note: the list of stream entries must be sorted so that all entries for the + * same packed resource are consecutive. In addition, entries with + * WIM_RESHDR_FLAG_METADATA set must be in the same order as the indices of the + * underlying images. */ int write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct filedes *out_fd, @@ -961,6 +1170,8 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct wim_lookup_table_entry_disk *table_buf_ptr; int ret; u64 prev_res_offset_in_wim = ~0ULL; + u64 prev_uncompressed_size; + u64 logical_offset; table_size = 0; list_for_each_entry(lte, stream_list, lookup_table_list) { @@ -986,38 +1197,46 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, table_buf_ptr = table_buf; prev_res_offset_in_wim = ~0ULL; + prev_uncompressed_size = 0; + logical_offset = 0; list_for_each_entry(lte, stream_list, lookup_table_list) { + if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { + struct wim_reshdr tmp_reshdr; - put_wim_lookup_table_entry(table_buf_ptr++, - <e->out_reshdr, - part_number, - lte->out_refcnt, - lte->hash); - if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS && - lte->out_res_offset_in_wim != prev_res_offset_in_wim) - { - /* Put the main resource entry for the pack. */ + /* Eww. When WIMGAPI sees multiple resource packs, it + * expects the offsets to be adjusted as if there were + * really only one pack. */ - struct wim_reshdr reshdr; + if (lte->out_res_offset_in_wim != prev_res_offset_in_wim) { + /* Put the resource entry for pack */ + tmp_reshdr.offset_in_wim = lte->out_res_offset_in_wim; + tmp_reshdr.size_in_wim = lte->out_res_size_in_wim; + tmp_reshdr.uncompressed_size = WIM_PACK_MAGIC_NUMBER; + tmp_reshdr.flags = WIM_RESHDR_FLAG_PACKED_STREAMS; - reshdr.offset_in_wim = lte->out_res_offset_in_wim; - reshdr.size_in_wim = lte->out_res_size_in_wim; - reshdr.uncompressed_size = WIM_PACK_MAGIC_NUMBER; - reshdr.flags = WIM_RESHDR_FLAG_PACKED_STREAMS; + put_wim_lookup_table_entry(table_buf_ptr++, + &tmp_reshdr, + part_number, + 1, zero_hash); - DEBUG("Putting main entry for pack: " - "size_in_wim=%"PRIu64", " - "offset_in_wim=%"PRIu64", " - "uncompressed_size=%"PRIu64, - reshdr.size_in_wim, - reshdr.offset_in_wim, - reshdr.uncompressed_size); + logical_offset += prev_uncompressed_size; + prev_res_offset_in_wim = lte->out_res_offset_in_wim; + prev_uncompressed_size = lte->out_res_uncompressed_size; + } + tmp_reshdr = lte->out_reshdr; + tmp_reshdr.offset_in_wim += logical_offset; put_wim_lookup_table_entry(table_buf_ptr++, - &reshdr, + &tmp_reshdr, part_number, - 1, zero_hash); - prev_res_offset_in_wim = lte->out_res_offset_in_wim; + lte->out_refcnt, + lte->hash); + } else { + put_wim_lookup_table_entry(table_buf_ptr++, + <e->out_reshdr, + part_number, + lte->out_refcnt, + lte->hash); } } @@ -1053,16 +1272,6 @@ lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) return 0; } -int -lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore) -{ - if (lte->extracted_file != NULL) { - FREE(lte->extracted_file); - lte->extracted_file = NULL; - } - return 0; -} - /* Allocate a stream entry for the contents of the buffer, or re-use an existing * entry in @lookup_table for the same stream. */ struct wim_lookup_table_entry * @@ -1212,7 +1421,7 @@ wimlib_iterate_lookup_table(WIMStruct *wim, int flags, .cb = cb, .user_ctx = user_ctx, }; - if (wim->hdr.part_number == 1) { + if (wim_has_metadata(wim)) { int ret; for (int i = 0; i < wim->hdr.image_count; i++) { ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte,