+static void
+free_subpack_info(struct wim_resource_spec **subpacks, size_t num_subpacks)
+{
+ if (subpacks) {
+ for (size_t i = 0; i < num_subpacks; i++)
+ if (list_empty(&subpacks[i]->stream_list))
+ FREE(subpacks[i]);
+ FREE(subpacks);
+ }
+}
+
+static int
+cmp_streams_by_offset_in_res(const void *p1, const void *p2)
+{
+ const struct wim_lookup_table_entry *lte1, *lte2;
+
+ lte1 = *(const struct wim_lookup_table_entry**)p1;
+ lte2 = *(const struct wim_lookup_table_entry**)p2;
+
+ return cmp_u64(lte1->offset_in_res, lte2->offset_in_res);
+}
+
+/* Validate the size and location of a WIM resource. */
+static int
+validate_resource(struct wim_resource_spec *rspec)
+{
+ struct wim_lookup_table_entry *lte;
+ bool out_of_order;
+ u64 expected_next_offset;
+ int ret;
+
+ /* Verify that the resource itself has a valid offset and size. */
+ if (rspec->offset_in_wim + rspec->size_in_wim < rspec->size_in_wim)
+ goto invalid_due_to_overflow;
+
+ /* Verify that each stream in the resource has a valid offset and size.
+ */
+ expected_next_offset = 0;
+ out_of_order = false;
+ list_for_each_entry(lte, &rspec->stream_list, rspec_node) {
+ if (lte->offset_in_res + lte->size < lte->size ||
+ lte->offset_in_res + lte->size > rspec->uncompressed_size)
+ goto invalid_due_to_overflow;
+
+ if (lte->offset_in_res >= expected_next_offset)
+ expected_next_offset = lte->offset_in_res + lte->size;
+ else
+ out_of_order = true;
+ }
+
+ /* If the streams were not located at strictly increasing positions (not
+ * allowing for overlap), sort them. Then make sure that none overlap.
+ */
+ if (out_of_order) {
+ ret = sort_stream_list(&rspec->stream_list,
+ offsetof(struct wim_lookup_table_entry,
+ rspec_node),
+ cmp_streams_by_offset_in_res);
+ if (ret)
+ return ret;
+
+ expected_next_offset = 0;
+ list_for_each_entry(lte, &rspec->stream_list, rspec_node) {
+ if (lte->offset_in_res >= expected_next_offset)
+ expected_next_offset = lte->offset_in_res + lte->size;
+ else
+ goto invalid_due_to_overlap;
+ }
+ }
+
+ return 0;
+
+invalid_due_to_overflow:
+ ERROR("Invalid resource entry (offset overflow)");
+ return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+
+invalid_due_to_overlap:
+ ERROR("Invalid resource entry (streams in packed resource overlap)");
+ return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+}
+
+static int
+finish_subpacks(struct wim_resource_spec **subpacks, size_t num_subpacks)
+{
+ int ret = 0;
+ for (size_t i = 0; i < num_subpacks; i++) {
+ ret = validate_resource(subpacks[i]);
+ if (ret)
+ break;
+ }
+ free_subpack_info(subpacks, num_subpacks);
+ return ret;
+}
+
+/*
+ * Reads the lookup table from a WIM file. Usually, each entry specifies a
+ * stream that the WIM file contains, along with its location and SHA1 message
+ * digest.
+ *
+ * Saves lookup table entries for non-metadata streams in a hash table (set to
+ * wim->lookup_table), and saves the metadata entry for each image in a special
+ * per-image location (the wim->image_metadata array).
+ *
+ * This works for both version WIM_VERSION_DEFAULT (68864) and version
+ * WIM_VERSION_PACKED_STREAMS (3584) WIMs. In the latter, a consecutive run of
+ * lookup table entries that all have flag WIM_RESHDR_FLAG_PACKED_STREAMS (0x10)
+ * set is a "packed run". A packed run logically contains zero or more
+ * resources, each of which logically contains zero or more streams.
+ * Physically, in such a run, a "lookup table entry" with uncompressed size
+ * WIM_PACK_MAGIC_NUMBER (0x100000000) specifies a resource, whereas any other
+ * entry specifies a stream. Within such a run, stream entries and resource
+ * entries need not be in any particular order, except that the order of the
+ * resource entries is important, as it affects how streams are assigned to
+ * resources. See the code for details.
+ *
+ * Possible return values:
+ * WIMLIB_ERR_SUCCESS (0)
+ * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
+ * WIMLIB_ERR_NOMEM
+ *
+ * Or an error code caused by failure to read the lookup table from the WIM
+ * file.
+ */
+int
+read_wim_lookup_table(WIMStruct *wim)
+{
+ int ret;
+ size_t num_entries;
+ void *buf = NULL;
+ struct wim_lookup_table *table = NULL;
+ struct wim_lookup_table_entry *cur_entry = NULL;
+ size_t num_duplicate_entries = 0;
+ size_t num_wrong_part_entries = 0;
+ u32 image_index = 0;
+ struct wim_resource_spec **cur_subpacks = NULL;
+ size_t cur_num_subpacks = 0;
+
+ DEBUG("Reading lookup table.");
+
+ /* Sanity check: lookup table entries are 50 bytes each. */
+ BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) !=
+ WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE);
+
+ /* Calculate the number of entries in the lookup table. */
+ num_entries = wim->hdr.lookup_table_reshdr.uncompressed_size /
+ sizeof(struct wim_lookup_table_entry_disk);
+
+ /* Read the lookup table into a buffer. */
+ ret = wim_reshdr_to_data(&wim->hdr.lookup_table_reshdr, wim, &buf);
+ if (ret)
+ goto out;
+
+ /* Allocate a hash table to map SHA1 message digests into stream
+ * specifications. This is the in-memory "lookup table". */
+ table = new_lookup_table(num_entries * 2 + 1);
+ if (!table)
+ goto oom;
+
+ /* Allocate and initalize stream entries ('struct
+ * wim_lookup_table_entry's) from the raw lookup table buffer. Each of
+ * these entries will point to a 'struct wim_resource_spec' that
+ * describes the underlying resource. In WIMs with version number
+ * WIM_VERSION_PACKED_STREAMS, a resource may contain multiple streams.
+ */
+ for (size_t i = 0; i < num_entries; i++) {
+ const struct wim_lookup_table_entry_disk *disk_entry =
+ &((const struct wim_lookup_table_entry_disk*)buf)[i];
+ struct wim_reshdr reshdr;
+ u16 part_number;
+
+ /* Get the resource header */
+ get_wim_reshdr(&disk_entry->reshdr, &reshdr);
+
+ DEBUG("reshdr: size_in_wim=%"PRIu64", "
+ "uncompressed_size=%"PRIu64", "
+ "offset_in_wim=%"PRIu64", "
+ "flags=0x%02x",
+ reshdr.size_in_wim, reshdr.uncompressed_size,
+ reshdr.offset_in_wim, reshdr.flags);
+
+ /* Ignore PACKED_STREAMS flag if it isn't supposed to be used in
+ * this WIM version. */
+ if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
+ reshdr.flags &= ~WIM_RESHDR_FLAG_PACKED_STREAMS;
+
+ /* Allocate a new 'struct wim_lookup_table_entry'. */
+ cur_entry = new_lookup_table_entry();
+ if (!cur_entry)
+ goto oom;
+
+ /* Get the part number, reference count, and hash. */
+ part_number = le16_to_cpu(disk_entry->part_number);
+ cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt);
+ copy_hash(cur_entry->hash, disk_entry->hash);
+
+ if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) {
+
+ /* PACKED_STREAMS entry */
+
+ if (!cur_subpacks) {
+ /* Starting new run */
+ ret = load_subpack_info(wim, disk_entry,
+ num_entries - i,
+ &cur_subpacks,
+ &cur_num_subpacks);
+ if (ret)
+ goto out;
+ }
+
+ if (reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER) {
+ /* Resource entry, not stream entry */
+ goto free_cur_entry_and_continue;
+ }
+
+ /* Stream entry */
+
+ ret = bind_stream_to_subpack(&reshdr,
+ cur_entry,
+ cur_subpacks,
+ cur_num_subpacks);
+ if (ret)
+ goto out;
+
+ } else {
+ /* Normal stream/resource entry; PACKED_STREAMS not set.
+ */
+
+ struct wim_resource_spec *rspec;
+
+ if (unlikely(cur_subpacks)) {
+ /* This entry terminated a packed run. */
+ ret = finish_subpacks(cur_subpacks,
+ cur_num_subpacks);
+ cur_subpacks = NULL;
+ if (ret)
+ goto out;
+ }
+
+ /* How to handle an uncompressed resource with its
+ * uncompressed size different from its compressed size?
+ *
+ * Based on a simple test, WIMGAPI seems to handle this
+ * as follows:
+ *
+ * if (size_in_wim > uncompressed_size) {
+ * Ignore uncompressed_size; use size_in_wim
+ * instead.
+ * } else {
+ * Honor uncompressed_size, but treat the part of
+ * the file data above size_in_wim as all zeros.
+ * }
+ *
+ * So we will do the same. */
+ if (unlikely(!(reshdr.flags &
+ WIM_RESHDR_FLAG_COMPRESSED) &&
+ (reshdr.size_in_wim >
+ reshdr.uncompressed_size)))
+ {
+ reshdr.uncompressed_size = reshdr.size_in_wim;
+ }
+
+ /* Set up a resource specification for this stream. */
+
+ rspec = MALLOC(sizeof(struct wim_resource_spec));
+ if (!rspec)
+ goto oom;
+
+ wim_res_hdr_to_spec(&reshdr, wim, rspec);
+
+ cur_entry->offset_in_res = 0;
+ cur_entry->size = reshdr.uncompressed_size;
+ cur_entry->flags = reshdr.flags;
+
+ lte_bind_wim_resource_spec(cur_entry, rspec);
+ }
+
+ /* cur_entry is now a stream bound to a resource. */
+
+ /* Ignore entries with all zeroes in the hash field. */
+ if (is_zero_hash(cur_entry->hash))
+ goto free_cur_entry_and_continue;
+
+ /* Verify that the part number matches that of the underlying
+ * WIM file. */
+ if (part_number != wim->hdr.part_number) {
+ num_wrong_part_entries++;
+ goto free_cur_entry_and_continue;
+ }
+
+ if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
+
+ /* Lookup table entry for a metadata resource. */
+
+ /* Metadata entries with no references must be ignored.
+ * See, for example, the WinPE WIMs from the WAIK v2.1.
+ */
+ if (cur_entry->refcnt == 0)
+ goto free_cur_entry_and_continue;
+
+ if (cur_entry->refcnt != 1) {
+ /* We don't currently support this case due to
+ * the complications of multiple images sharing
+ * the same metadata resource or a metadata
+ * resource also being referenced by files. */
+ ERROR("Found metadata resource with refcnt != 1");
+ ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+ goto out;
+ }