+int
+read_wim_lookup_table(WIMStruct *wim)
+{
+ int ret;
+ size_t num_entries;
+ void *buf = NULL;
+ struct wim_lookup_table *table = NULL;
+ struct wim_lookup_table_entry *cur_entry = NULL;
+ struct wim_resource_spec *cur_rspec = NULL;
+ size_t num_duplicate_entries = 0;
+ size_t num_wrong_part_entries = 0;
+ u32 image_index = 0;
+
+ DEBUG("Reading lookup table.");
+
+ /* Sanity check: lookup table entries are 50 bytes each. */
+ BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) !=
+ WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE);
+
+ /* Calculate the number of entries in the lookup table. */
+ num_entries = wim->hdr.lookup_table_reshdr.uncompressed_size /
+ sizeof(struct wim_lookup_table_entry_disk);
+
+ /* Read the lookup table into a buffer. */
+ ret = wim_reshdr_to_data(&wim->hdr.lookup_table_reshdr, wim, &buf);
+ if (ret)
+ goto out;
+
+ /* Allocate a hash table to map SHA1 message digests into stream
+ * specifications. This is the in-memory "lookup table". */
+ table = new_lookup_table(num_entries * 2 + 1);
+ if (!table)
+ goto oom;
+
+ /* Allocate and initalize stream entries ('struct
+ * wim_lookup_table_entry's) from the raw lookup table buffer. Each of
+ * these entries will point to a 'struct wim_resource_spec' that
+ * describes the underlying resource. In WIMs with version number
+ * WIM_VERSION_PACKED_STREAMS, a resource may contain multiple streams.
+ */
+ for (size_t i = 0; i < num_entries; i++) {
+ const struct wim_lookup_table_entry_disk *disk_entry =
+ &((const struct wim_lookup_table_entry_disk*)buf)[i];
+ struct wim_reshdr reshdr;
+ u16 part_number;
+
+ /* Get the resource header */
+ get_wim_reshdr(&disk_entry->reshdr, &reshdr);
+
+ DEBUG("reshdr: size_in_wim=%"PRIu64", "
+ "uncompressed_size=%"PRIu64", "
+ "offset_in_wim=%"PRIu64", "
+ "flags=0x%02x\n",
+ reshdr.size_in_wim, reshdr.uncompressed_size,
+ reshdr.offset_in_wim, reshdr.flags);
+
+ /* Ignore PACKED_STREAMS flag if it isn't supposed to be used in
+ * this WIM version */
+ if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
+ reshdr.flags &= ~WIM_RESHDR_FLAG_PACKED_STREAMS;
+
+ /* Allocate a 'struct wim_lookup_table_entry' */
+ cur_entry = new_lookup_table_entry();
+ if (!cur_entry)
+ goto oom;
+
+ /* Get the part number, reference count, and hash. */
+ part_number = le16_to_cpu(disk_entry->part_number);
+ cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt);
+ copy_hash(cur_entry->hash, disk_entry->hash);
+
+ /* Verify that the part number matches that of the underlying
+ * WIM file. */
+ if (part_number != wim->hdr.part_number) {
+ num_wrong_part_entries++;
+ goto free_cur_entry_and_continue;
+ }
+
+ /* If resource is uncompressed, check for (unexpected) size
+ * mismatch. */
+ if (!(reshdr.flags & (WIM_RESHDR_FLAG_PACKED_STREAMS |
+ WIM_RESHDR_FLAG_COMPRESSED))) {
+ if (reshdr.uncompressed_size != reshdr.size_in_wim) {
+ /* So ... This is an uncompressed resource, but
+ * its uncompressed size is NOT the same as its
+ * "compressed" size (size_in_wim). What to do
+ * with it?
+ *
+ * Based on a simple test, WIMGAPI seems to
+ * handle this as follows:
+ *
+ * if (size_in_wim > uncompressed_size) {
+ * Ignore uncompressed_size; use
+ * size_in_wim instead.
+ * } else {
+ * Honor uncompressed_size, but treat the
+ * part of the file data above size_in_wim
+ * as all zeros.
+ * }
+ *
+ * So we will do the same.
+ */
+ if (reshdr.size_in_wim > reshdr.uncompressed_size)
+ reshdr.uncompressed_size = reshdr.size_in_wim;
+ }
+ }
+
+ /*
+ * Possibly start a new resource.
+ *
+ * We need to start a new resource if:
+ *
+ * - There is no previous resource (cur_rspec).
+ *
+ * OR
+ *
+ * - The resource header did not have PACKED_STREAMS set, so it
+ * specifies a new, single-stream resource.
+ *
+ * OR
+ *
+ * - The resource header had PACKED_STREAMS set, and it's a
+ * special entry that specifies the resource itself as opposed
+ * to a stream, and we already encountered one such entry in
+ * the current resource. We will interpret this as the
+ * beginning of a new packed resource. (However, note that
+ * wimlib does not currently allow create WIMs with multiple
+ * packed resources, as to remain compatible with WIMGAPI.)
+ */
+ if (likely(!cur_rspec) ||
+ !(reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) ||
+ (reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER &&
+ cur_rspec->size_in_wim != 0))
+ {
+ /* Finish previous resource (if existent) */
+ if (cur_rspec) {
+ ret = finish_resource(cur_rspec);
+ cur_rspec = NULL;
+ if (ret)
+ goto out;
+ }
+
+ /* Allocate the resource specification and initialize it
+ * with values from the current stream entry. */
+ cur_rspec = MALLOC(sizeof(*cur_rspec));
+ if (!cur_rspec)
+ goto oom;
+
+ wim_res_hdr_to_spec(&reshdr, wim, cur_rspec);
+
+ /* If this is a packed run, the current stream entry may
+ * specify a stream within the resource, and not the
+ * resource itself. Zero possibly irrelevant data until
+ * it is read for certain. */
+ if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) {
+ cur_rspec->size_in_wim = 0;
+ cur_rspec->uncompressed_size = 0;
+ cur_rspec->offset_in_wim = 0;
+ }
+ }
+
+ /* Now cur_rspec != NULL. */
+
+ /* Checked for packed resource specification. */
+ if (unlikely((reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) &&
+ reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER))
+ {
+ /* Found the specification for the packed resource.
+ * Transfer the values to the `struct
+ * wim_resource_spec', and discard the current stream
+ * since this lookup table entry did not, in fact,
+ * correspond to a "stream". */
+
+ /* The uncompressed size of the packed resource is
+ * actually stored in the header of the resource itself.
+ * Read it, and also grab the chunk size and compression
+ * type (which are not necessarily the defaults from the
+ * WIM header). */
+ struct alt_chunk_table_header_disk hdr;
+
+ ret = full_pread(&wim->in_fd, &hdr,
+ sizeof(hdr), reshdr.offset_in_wim);
+ if (ret)
+ goto out;
+
+ cur_rspec->uncompressed_size = le64_to_cpu(hdr.res_usize);
+ cur_rspec->offset_in_wim = reshdr.offset_in_wim;
+ cur_rspec->size_in_wim = reshdr.size_in_wim;
+ cur_rspec->flags = reshdr.flags;
+
+ /* Compression format numbers must be the same as in
+ * WIMGAPI to be compatible here. */
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
+ cur_rspec->compression_type = le32_to_cpu(hdr.compression_format);
+
+ cur_rspec->chunk_size = le32_to_cpu(hdr.chunk_size);
+
+ DEBUG("Full pack is %"PRIu64" compressed bytes "
+ "at file offset %"PRIu64" (flags 0x%02x)",
+ cur_rspec->size_in_wim,
+ cur_rspec->offset_in_wim,
+ cur_rspec->flags);
+ goto free_cur_entry_and_continue;
+ }
+
+ /* Ignore entries with all zeroes in the hash field. */
+ if (is_zero_hash(cur_entry->hash))
+ goto free_cur_entry_and_continue;
+
+ if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
+
+ /* Lookup table entry for a metadata resource. */
+
+ /* Metadata entries with no references must be ignored.
+ * See, for example, the WinPE WIMs from the WAIK v2.1.
+ */
+ if (cur_entry->refcnt == 0)
+ goto free_cur_entry_and_continue;
+
+ if (cur_entry->refcnt != 1) {
+ /* We don't currently support this case due to
+ * the complications of multiple images sharing
+ * the same metadata resource or a metadata
+ * resource also being referenced by files.
+ */
+ ERROR("Found metadata resource with refcnt != 1");
+ ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+ goto out;
+ }
+
+ if (wim->hdr.part_number != 1) {
+ WARNING("Ignoring metadata resource found in a "
+ "non-first part of the split WIM");
+ goto free_cur_entry_and_continue;
+ }
+
+ /* The number of entries in the lookup table with
+ * WIM_RESHDR_FLAG_METADATA set should be the same as
+ * the image_count field in the WIM header. */
+ if (image_index == wim->hdr.image_count) {
+ WARNING("Found more metadata resources than images");
+ goto free_cur_entry_and_continue;
+ }
+
+ /* Notice very carefully: We are assigning the metadata
+ * resources to images in the same order in which their
+ * lookup table entries occur on disk. (This is also
+ * the behavior of Microsoft's software.) In
+ * particular, this overrides the actual locations of
+ * the metadata resources themselves in the WIM file as
+ * well as any information written in the XML data. */
+ DEBUG("Found metadata resource for image %"PRIu32" at "
+ "offset %"PRIu64".",
+ image_index + 1,
+ reshdr.offset_in_wim);
+
+ wim->image_metadata[image_index++]->metadata_lte = cur_entry;
+ } else {
+ /* Lookup table entry for a non-metadata stream. */
+
+ /* Ignore this stream if it's a duplicate. */
+ if (lookup_stream(table, cur_entry->hash)) {
+ num_duplicate_entries++;
+ goto free_cur_entry_and_continue;
+ }
+
+ /* Insert the stream into the in-memory lookup table,
+ * keyed by its SHA1 message digest. */
+ lookup_table_insert(table, cur_entry);
+ }
+
+ /* Add the stream to the current resource specification. */
+ lte_bind_wim_resource_spec(cur_entry, cur_rspec);
+ if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) {
+ /* In packed runs, the offset field is used for
+ * in-resource offset, not the in-WIM offset, and the
+ * size field is used for the uncompressed size, not the
+ * compressed size. */
+ cur_entry->offset_in_res = reshdr.offset_in_wim;
+ cur_entry->size = reshdr.size_in_wim;
+ cur_entry->flags = reshdr.flags;
+ /* cur_rspec stays the same */
+
+ } else {
+ /* Normal case: The stream corresponds one-to-one with
+ * the resource entry. */
+ cur_entry->offset_in_res = 0;
+ cur_entry->size = reshdr.uncompressed_size;
+ cur_entry->flags = reshdr.flags;
+ cur_rspec = NULL;
+ }
+ continue;
+
+ free_cur_entry_and_continue:
+ free_lookup_table_entry(cur_entry);
+ }
+ cur_entry = NULL;
+
+ /* Validate the last resource. */
+ if (cur_rspec) {
+ ret = finish_resource(cur_rspec);
+ cur_rspec = NULL;
+ if (ret)
+ goto out;
+ }
+
+ if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) {
+ WARNING("Could not find metadata resources for all images");
+ for (u32 i = image_index; i < wim->hdr.image_count; i++)
+ put_image_metadata(wim->image_metadata[i], NULL);
+ wim->hdr.image_count = image_index;
+ }
+
+ if (num_duplicate_entries > 0) {
+ WARNING("Ignoring %zu duplicate streams in the WIM lookup table",
+ num_duplicate_entries);
+ }
+
+ if (num_wrong_part_entries > 0) {
+ WARNING("Ignoring %zu streams with wrong part number",
+ num_wrong_part_entries);
+ }
+
+ DEBUG("Done reading lookup table.");
+ wim->lookup_table = table;
+ table = NULL;
+ ret = 0;
+ goto out;
+oom:
+ ERROR("Not enough memory to read lookup table!");
+ ret = WIMLIB_ERR_NOMEM;
+out:
+ if (cur_rspec && list_empty(&cur_rspec->stream_list))
+ FREE(cur_rspec);
+ free_lookup_table_entry(cur_entry);
+ free_lookup_table(table);
+ FREE(buf);
+ return ret;
+}
+
+static void
+put_wim_lookup_table_entry(struct wim_lookup_table_entry_disk *disk_entry,
+ const struct wim_reshdr *out_reshdr,
+ u16 part_number, u32 refcnt, const u8 *hash)