X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flookup_table.c;h=59d7c7cfdd55547ef3b4d55a61c1deadf666743c;hp=4ad48bebb1ea207b5e077db079c1fa6c16d6228d;hb=f27fbe82ec560f3f1ac0464483297406f6bc508a;hpb=69a6de2b04308332d9e5e8bc61f34e4b9460c12f diff --git a/src/lookup_table.c b/src/lookup_table.c index 4ad48beb..59d7c7cf 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -28,22 +28,34 @@ # include "config.h" #endif +#include "wimlib/assert.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/file_io.h" -#include "wimlib/glob.h" #include "wimlib/lookup_table.h" #include "wimlib/metadata.h" -#include "wimlib/paths.h" +#include "wimlib/ntfs_3g.h" #include "wimlib/resource.h" #include "wimlib/util.h" #include "wimlib/write.h" -#include #include -#ifdef WITH_FUSE -# include /* for unlink() */ -#endif +#include +#include /* for unlink() */ + +/* WIM lookup table: + * + * This is a logical mapping from SHA1 message digests to the data streams + * contained in a WIM. + * + * Here it is implemented as a hash table. + * + * Note: Everything will break horribly if there is a SHA1 collision. + */ +struct wim_lookup_table { + struct hlist_head *array; + size_t num_entries; + size_t capacity; +}; struct wim_lookup_table * new_lookup_table(size_t capacity) @@ -51,21 +63,48 @@ new_lookup_table(size_t capacity) struct wim_lookup_table *table; struct hlist_head *array; - table = CALLOC(1, sizeof(struct wim_lookup_table)); - if (table) { - array = CALLOC(capacity, sizeof(array[0])); - if (array) { - table->num_entries = 0; - table->capacity = capacity; - table->array = array; - } else { - FREE(table); - table = NULL; - ERROR("Failed to allocate memory for lookup table " - "with capacity %zu", capacity); - } + table = MALLOC(sizeof(struct wim_lookup_table)); + if (table == NULL) + goto oom; + + array = CALLOC(capacity, sizeof(array[0])); + if (array == NULL) { + FREE(table); + goto oom; } + + table->num_entries = 0; + table->capacity = capacity; + table->array = array; return table; + +oom: + ERROR("Failed to allocate memory for lookup table " + "with capacity %zu", capacity); + return NULL; +} + +static int +do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore) +{ + free_lookup_table_entry(entry); + return 0; +} + +void +free_lookup_table(struct wim_lookup_table *table) +{ + DEBUG("Freeing lookup table."); + if (table == NULL) + return; + + if (table->array) { + for_lookup_table_entry(table, + do_free_lookup_table_entry, + NULL); + FREE(table->array); + } + FREE(table); } struct wim_lookup_table_entry * @@ -74,14 +113,14 @@ new_lookup_table_entry(void) struct wim_lookup_table_entry *lte; lte = CALLOC(1, sizeof(struct wim_lookup_table_entry)); - if (lte) { - lte->part_number = 1; - lte->refcnt = 1; - } else { - ERROR("Out of memory (tried to allocate %zu bytes for " - "lookup table entry)", - sizeof(struct wim_lookup_table_entry)); - } + if (lte == NULL) + return NULL; + + lte->refcnt = 1; + + /* lte->resource_location = RESOURCE_NONEXISTENT */ + BUILD_BUG_ON(RESOURCE_NONEXISTENT != 0); + return lte; } @@ -91,11 +130,15 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) struct wim_lookup_table_entry *new; new = memdup(old, sizeof(struct wim_lookup_table_entry)); - if (!new) + if (new == NULL) return NULL; new->extracted_file = NULL; switch (new->resource_location) { + case RESOURCE_IN_WIM: + list_add(&new->rspec_node, &new->rspec->stream_list); + break; + case RESOURCE_IN_FILE_ON_DISK: #ifdef __WIN32__ case RESOURCE_WIN32_ENCRYPTED: @@ -106,13 +149,12 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) (void*)&old->staging_file_name); #endif new->file_on_disk = TSTRDUP(old->file_on_disk); - if (!new->file_on_disk) + if (new->file_on_disk == NULL) goto out_free; break; case RESOURCE_IN_ATTACHED_BUFFER: - new->attached_buffer = memdup(old->attached_buffer, - wim_resource_size(old)); - if (!new->attached_buffer) + new->attached_buffer = memdup(old->attached_buffer, old->size); + if (new->attached_buffer == NULL) goto out_free; break; #ifdef WITH_NTFS_3G @@ -120,18 +162,18 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) if (old->ntfs_loc) { struct ntfs_location *loc; loc = memdup(old->ntfs_loc, sizeof(struct ntfs_location)); - if (!loc) + if (loc == NULL) goto out_free; loc->path = NULL; loc->stream_name = NULL; new->ntfs_loc = loc; loc->path = STRDUP(old->ntfs_loc->path); - if (!loc->path) + if (loc->path == NULL) goto out_free; - if (loc->stream_name_nchars) { + if (loc->stream_name_nchars != 0) { loc->stream_name = memdup(old->ntfs_loc->stream_name, loc->stream_name_nchars * 2); - if (!loc->stream_name) + if (loc->stream_name == NULL) goto out_free; } } @@ -141,116 +183,110 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) break; } return new; + out_free: free_lookup_table_entry(new); return NULL; } void -free_lookup_table_entry(struct wim_lookup_table_entry *lte) +lte_put_resource(struct wim_lookup_table_entry *lte) { - if (lte) { - switch (lte->resource_location) { - case RESOURCE_IN_FILE_ON_DISK: - #ifdef __WIN32__ - case RESOURCE_WIN32_ENCRYPTED: - #endif - #ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: - BUILD_BUG_ON((void*)<e->file_on_disk != - (void*)<e->staging_file_name); - #endif - case RESOURCE_IN_ATTACHED_BUFFER: - BUILD_BUG_ON((void*)<e->file_on_disk != - (void*)<e->attached_buffer); - FREE(lte->file_on_disk); - break; -#ifdef WITH_NTFS_3G - case RESOURCE_IN_NTFS_VOLUME: - if (lte->ntfs_loc) { - FREE(lte->ntfs_loc->path); - FREE(lte->ntfs_loc->stream_name); - FREE(lte->ntfs_loc); - } - break; + switch (lte->resource_location) { + case RESOURCE_IN_WIM: + list_del(<e->rspec_node); + if (list_empty(<e->rspec->stream_list)) + FREE(lte->rspec); + break; + case RESOURCE_IN_FILE_ON_DISK: +#ifdef __WIN32__ + case RESOURCE_WIN32_ENCRYPTED: #endif - default: - break; +#ifdef WITH_FUSE + case RESOURCE_IN_STAGING_FILE: + BUILD_BUG_ON((void*)<e->file_on_disk != + (void*)<e->staging_file_name); +#endif + case RESOURCE_IN_ATTACHED_BUFFER: + BUILD_BUG_ON((void*)<e->file_on_disk != + (void*)<e->attached_buffer); + FREE(lte->file_on_disk); + break; +#ifdef WITH_NTFS_3G + case RESOURCE_IN_NTFS_VOLUME: + if (lte->ntfs_loc) { + FREE(lte->ntfs_loc->path); + FREE(lte->ntfs_loc->stream_name); + FREE(lte->ntfs_loc); } - FREE(lte); + break; +#endif + default: + break; } } -static int -do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore) -{ - free_lookup_table_entry(entry); - return 0; -} - - void -free_lookup_table(struct wim_lookup_table *table) +free_lookup_table_entry(struct wim_lookup_table_entry *lte) { - DEBUG2("Freeing lookup table"); - if (table) { - if (table->array) { - for_lookup_table_entry(table, - do_free_lookup_table_entry, - NULL); - FREE(table->array); - } - FREE(table); + if (lte) { + lte_put_resource(lte); + FREE(lte); } } -/* - * Inserts an entry into the lookup table. - * - * @table: A pointer to the lookup table. - * @lte: A pointer to the entry to insert. - */ -void -lookup_table_insert(struct wim_lookup_table *table, - struct wim_lookup_table_entry *lte) +/* Should this stream be retained even if it has no references? */ +static bool +should_retain_lte(const struct wim_lookup_table_entry *lte) { - size_t i = lte->hash_short % table->capacity; - hlist_add_head(<e->hash_list, &table->array[i]); - - /* XXX Make the table grow when too many entries have been inserted. */ - table->num_entries++; + return lte->resource_location == RESOURCE_IN_WIM; } static void finalize_lte(struct wim_lookup_table_entry *lte) { - #ifdef WITH_FUSE - if (lte->resource_location == RESOURCE_IN_STAGING_FILE) { - unlink(lte->staging_file_name); - list_del(<e->unhashed_list); - } - #endif - free_lookup_table_entry(lte); + if (!should_retain_lte(lte)) + free_lookup_table_entry(lte); } -/* Decrements the reference count for the lookup table entry @lte. If its - * reference count reaches 0, it is unlinked from the lookup table. If, - * furthermore, the entry has no opened file descriptors associated with it, the - * entry is freed. */ +/* + * Decrements the reference count for the lookup table entry @lte, which must be + * inserted in the stream lookup table @table. + * + * If the reference count reaches 0, this may cause @lte to be destroyed. + * However, we may retain entries with 0 reference count. This does not affect + * correctness, but it prevents the entries for valid streams in a WIM archive, + * which will continue to be present after appending to the file, from being + * lost merely because we dropped all references to them. + */ void lte_decrement_refcnt(struct wim_lookup_table_entry *lte, struct wim_lookup_table *table) { - wimlib_assert(lte != NULL); wimlib_assert(lte->refcnt != 0); + if (--lte->refcnt == 0) { - if (lte->unhashed) + if (lte->unhashed) { list_del(<e->unhashed_list); - else - lookup_table_unlink(table, lte); - #ifdef WITH_FUSE + #ifdef WITH_FUSE + /* If the stream has been extracted to a staging file + * for a FUSE mount, unlink the staging file. (Note + * that there still may be open file descriptors to it.) + * */ + if (lte->resource_location == RESOURCE_IN_STAGING_FILE) + unlink(lte->staging_file_name); + #endif + } else { + if (!should_retain_lte(lte)) + lookup_table_unlink(table, lte); + } + + /* If FUSE mounts are enabled, we don't actually free the entry + * until the last file descriptor has been closed by + * lte_decrement_num_opened_fds(). */ +#ifdef WITH_FUSE if (lte->num_opened_fds == 0) - #endif +#endif finalize_lte(lte); } } @@ -259,12 +295,87 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, void lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte) { - if (lte->num_opened_fds != 0) - if (--lte->num_opened_fds == 0 && lte->refcnt == 0) - finalize_lte(lte); + wimlib_assert(lte->num_opened_fds != 0); + + if (--lte->num_opened_fds == 0 && lte->refcnt == 0) + finalize_lte(lte); } #endif +static void +lookup_table_insert_raw(struct wim_lookup_table *table, + struct wim_lookup_table_entry *lte) +{ + size_t i = lte->hash_short % table->capacity; + + hlist_add_head(<e->hash_list, &table->array[i]); +} + +static void +enlarge_lookup_table(struct wim_lookup_table *table) +{ + size_t old_capacity, new_capacity; + struct hlist_head *old_array, *new_array; + struct wim_lookup_table_entry *lte; + struct hlist_node *cur, *tmp; + size_t i; + + old_capacity = table->capacity; + new_capacity = old_capacity * 2; + new_array = CALLOC(new_capacity, sizeof(struct hlist_head)); + if (new_array == NULL) + return; + old_array = table->array; + table->array = new_array; + table->capacity = new_capacity; + + for (i = 0; i < old_capacity; i++) { + hlist_for_each_entry_safe(lte, cur, tmp, &old_array[i], hash_list) { + hlist_del(<e->hash_list); + lookup_table_insert_raw(table, lte); + } + } + FREE(old_array); +} + +/* Inserts an entry into the lookup table. */ +void +lookup_table_insert(struct wim_lookup_table *table, + struct wim_lookup_table_entry *lte) +{ + lookup_table_insert_raw(table, lte); + if (++table->num_entries > table->capacity) + enlarge_lookup_table(table); +} + +/* Unlinks a lookup table entry from the table; does not free it. */ +void +lookup_table_unlink(struct wim_lookup_table *table, + struct wim_lookup_table_entry *lte) +{ + wimlib_assert(!lte->unhashed); + wimlib_assert(table->num_entries != 0); + + hlist_del(<e->hash_list); + table->num_entries--; +} + +/* Given a SHA1 message digest, return the corresponding entry in the WIM's + * lookup table, or NULL if there is none. */ +struct wim_lookup_table_entry * +lookup_stream(const struct wim_lookup_table *table, const u8 hash[]) +{ + size_t i; + struct wim_lookup_table_entry *lte; + struct hlist_node *pos; + + i = *(size_t*)hash % table->capacity; + hlist_for_each_entry(lte, pos, &table->array[i], hash_list) + if (hashes_equal(hash, lte->hash)) + return lte; + return NULL; +} + /* Calls a function on all the entries in the WIM lookup table. Stop early and * return nonzero if any call to the function returns nonzero. */ int @@ -280,7 +391,6 @@ for_lookup_table_entry(struct wim_lookup_table *table, hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i], hash_list) { - wimlib_assert2(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)); ret = visitor(lte, arg); if (ret) return ret; @@ -290,7 +400,7 @@ for_lookup_table_entry(struct wim_lookup_table *table, } /* qsort() callback that sorts streams (represented by `struct - * wim_lookup_table_entry's) into an order optimized for reading and writing. + * wim_lookup_table_entry's) into an order optimized for reading. * * Sorting is done primarily by resource location, then secondarily by a * per-resource location order. For example, resources in WIM files are sorted @@ -301,6 +411,7 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) { const struct wim_lookup_table_entry *lte1, *lte2; int v; + WIMStruct *wim1, *wim2; lte1 = *(const struct wim_lookup_table_entry**)p1; lte2 = *(const struct wim_lookup_table_entry**)p2; @@ -313,27 +424,31 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) switch (lte1->resource_location) { case RESOURCE_IN_WIM: + wim1 = lte1->rspec->wim; + wim2 = lte2->rspec->wim; /* Different (possibly split) WIMs? */ - if (lte1->wim != lte2->wim) { - v = memcmp(lte1->wim->hdr.guid, lte2->wim->hdr.guid, - WIM_GID_LEN); + if (wim1 != wim2) { + v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GID_LEN); if (v) return v; } /* Different part numbers in the same WIM? */ - v = (int)lte1->wim->hdr.part_number - (int)lte2->wim->hdr.part_number; + v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number; if (v) return v; - /* Compare by offset. */ - if (lte1->resource_entry.offset < lte2->resource_entry.offset) - return -1; - else if (lte1->resource_entry.offset > lte2->resource_entry.offset) - return 1; - return 0; + if (lte1->rspec->offset_in_wim != lte2->rspec->offset_in_wim) + return cmp_u64(lte1->rspec->offset_in_wim, + lte2->rspec->offset_in_wim); + + return cmp_u64(lte1->offset_in_res, lte2->offset_in_res); + case RESOURCE_IN_FILE_ON_DISK: +#ifdef WITH_FUSE + case RESOURCE_IN_STAGING_FILE: +#endif #ifdef __WIN32__ case RESOURCE_WIN32_ENCRYPTED: #endif @@ -353,8 +468,9 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) } int -sort_stream_list_by_sequential_order(struct list_head *stream_list, - size_t list_head_offset) +sort_stream_list(struct list_head *stream_list, + size_t list_head_offset, + int (*compar)(const void *, const void*)) { struct list_head *cur; struct wim_lookup_table_entry **array; @@ -365,10 +481,14 @@ sort_stream_list_by_sequential_order(struct list_head *stream_list, list_for_each(cur, stream_list) num_streams++; + if (num_streams <= 1) + return 0; + array_size = num_streams * sizeof(array[0]); array = MALLOC(array_size); - if (!array) + if (array == NULL) return WIMLIB_ERR_NOMEM; + cur = stream_list->next; for (i = 0; i < num_streams; i++) { array[i] = (struct wim_lookup_table_entry*)((u8*)cur - @@ -376,8 +496,7 @@ sort_stream_list_by_sequential_order(struct list_head *stream_list, cur = cur->next; } - qsort(array, num_streams, sizeof(array[0]), - cmp_streams_by_sequential_order); + qsort(array, num_streams, sizeof(array[0]), compar); INIT_LIST_HEAD(stream_list); for (i = 0; i < num_streams; i++) { @@ -389,6 +508,15 @@ sort_stream_list_by_sequential_order(struct list_head *stream_list, return 0; } +/* Sort the specified list of streams in an order optimized for reading. */ +int +sort_stream_list_by_sequential_order(struct list_head *stream_list, + size_t list_head_offset) +{ + return sort_stream_list(stream_list, list_head_offset, + cmp_streams_by_sequential_order); +} + static int add_lte_to_array(struct wim_lookup_table_entry *lte, @@ -434,9 +562,8 @@ for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, /* On-disk format of a WIM lookup table entry (stream entry). */ struct wim_lookup_table_entry_disk { - /* Location, offset, compression status, and metadata status of the - * stream. */ - struct resource_entry_disk resource_entry; + /* Size, offset, and flags of the stream. */ + struct wim_reshdr_disk reshdr; /* Which part of the split WIM this stream is in; indexed from 1. */ le16 part_number; @@ -451,233 +578,472 @@ struct wim_lookup_table_entry_disk { #define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50 -void -lte_init_wim(struct wim_lookup_table_entry *lte, WIMStruct *wim) +static int +cmp_streams_by_offset_in_res(const void *p1, const void *p2) +{ + const struct wim_lookup_table_entry *lte1, *lte2; + + lte1 = *(const struct wim_lookup_table_entry**)p1; + lte2 = *(const struct wim_lookup_table_entry**)p2; + + return cmp_u64(lte1->offset_in_res, lte2->offset_in_res); +} + +/* Validate the size and location of a WIM resource. */ +static int +validate_resource(struct wim_resource_spec *rspec) +{ + struct wim_lookup_table_entry *lte; + bool out_of_order; + u64 expected_next_offset; + int ret; + + /* Verify that the resource itself has a valid offset and size. */ + if (rspec->offset_in_wim + rspec->size_in_wim < rspec->size_in_wim) + goto invalid_due_to_overflow; + + /* Verify that each stream in the resource has a valid offset and size. + */ + expected_next_offset = 0; + out_of_order = false; + list_for_each_entry(lte, &rspec->stream_list, rspec_node) { + if (lte->offset_in_res + lte->size < lte->size || + lte->offset_in_res + lte->size > rspec->uncompressed_size) + goto invalid_due_to_overflow; + + if (lte->offset_in_res >= expected_next_offset) + expected_next_offset = lte->offset_in_res + lte->size; + else + out_of_order = true; + } + + /* If the streams were not located at strictly increasing positions (not + * allowing for overlap), sort them. Then make sure that none overlap. + */ + if (out_of_order) { + ret = sort_stream_list(&rspec->stream_list, + offsetof(struct wim_lookup_table_entry, + rspec_node), + cmp_streams_by_offset_in_res); + if (ret) + return ret; + + expected_next_offset = 0; + list_for_each_entry(lte, &rspec->stream_list, rspec_node) { + if (lte->offset_in_res >= expected_next_offset) + expected_next_offset = lte->offset_in_res + lte->size; + else + goto invalid_due_to_overlap; + } + } + + return 0; + +invalid_due_to_overflow: + ERROR("Invalid resource entry (offset overflow)"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; + +invalid_due_to_overlap: + ERROR("Invalid resource entry (streams in packed resource overlap)"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; +} + +/* Validate the resource, or free it if unused. */ +static int +finish_resource(struct wim_resource_spec *rspec) { - lte->resource_location = RESOURCE_IN_WIM; - lte->wim = wim; - if (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED) - lte->compression_type = wim->compression_type; - else - lte->compression_type = WIMLIB_COMPRESSION_TYPE_NONE; - - if (wim_is_pipable(wim)) - lte->is_pipable = 1; + if (!list_empty(&rspec->stream_list)) { + /* This resource contains at least one stream. */ + return validate_resource(rspec); + } else { + /* No streams are in this resource. Get rid of it. */ + FREE(rspec); + return 0; + } } /* - * Reads the lookup table from a WIM file. + * Reads the lookup table from a WIM file. Usually, each entry specifies a + * stream that the WIM file contains, along with its location and SHA1 message + * digest. + * + * Saves lookup table entries for non-metadata streams in a hash table (set to + * wim->lookup_table), and saves the metadata entry for each image in a special + * per-image location (the wim->image_metadata array). * - * Saves lookup table entries for non-metadata streams in a hash table, and - * saves the metadata entry for each image in a special per-image location (the - * image_metadata array). + * This works for both version WIM_VERSION_DEFAULT (68864) and version + * WIM_VERSION_PACKED_STREAMS (3584) WIMs. * - * Return values: + * Possible return values: * WIMLIB_ERR_SUCCESS (0) * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY - * WIMLIB_ERR_RESOURCE_NOT_FOUND + * WIMLIB_ERR_NOMEM + * + * Or an error code caused by failure to read the lookup table from the WIM + * file. */ int read_wim_lookup_table(WIMStruct *wim) { int ret; - size_t i; size_t num_entries; - struct wim_lookup_table *table; - struct wim_lookup_table_entry *cur_entry, *duplicate_entry; - void *buf; + void *buf = NULL; + struct wim_lookup_table *table = NULL; + struct wim_lookup_table_entry *cur_entry = NULL; + struct wim_resource_spec *cur_rspec = NULL; + size_t num_duplicate_entries = 0; + size_t num_wrong_part_entries = 0; + u32 image_index = 0; + + DEBUG("Reading lookup table."); + /* Sanity check: lookup table entries are 50 bytes each. */ BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) != WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE); - DEBUG("Reading lookup table: offset %"PRIu64", size %"PRIu64"", - wim->hdr.lookup_table_res_entry.offset, - wim->hdr.lookup_table_res_entry.size); - - /* Calculate number of entries in the lookup table. */ - num_entries = wim->hdr.lookup_table_res_entry.size / + /* Calculate the number of entries in the lookup table. */ + num_entries = wim->hdr.lookup_table_reshdr.uncompressed_size / sizeof(struct wim_lookup_table_entry_disk); - /* Read the lookup table into a buffer. */ - ret = res_entry_to_data(&wim->hdr.lookup_table_res_entry, wim, &buf); + ret = wim_reshdr_to_data(&wim->hdr.lookup_table_reshdr, wim, &buf); if (ret) goto out; - /* Allocate hash table. */ + /* Allocate a hash table to map SHA1 message digests into stream + * specifications. This is the in-memory "lookup table". */ table = new_lookup_table(num_entries * 2 + 1); - if (!table) { - ERROR("Not enough memory to read lookup table."); - ret = WIMLIB_ERR_NOMEM; - goto out_free_buf; - } - - /* Allocate and initalize `struct wim_lookup_table_entry's from the - * on-disk lookup table. */ - wim->current_image = 0; - for (i = 0; i < num_entries; i++) { + if (!table) + goto oom; + + /* Allocate and initalize stream entries ('struct + * wim_lookup_table_entry's) from the raw lookup table buffer. Each of + * these entries will point to a 'struct wim_resource_spec' that + * describes the underlying resource. In WIMs with version number + * WIM_VERSION_PACKED_STREAMS, a resource may contain multiple streams. + */ + for (size_t i = 0; i < num_entries; i++) { const struct wim_lookup_table_entry_disk *disk_entry = &((const struct wim_lookup_table_entry_disk*)buf)[i]; + struct wim_reshdr reshdr; + u16 part_number; + + /* Get the resource header */ + get_wim_reshdr(&disk_entry->reshdr, &reshdr); + + DEBUG("reshdr: size_in_wim=%"PRIu64", " + "uncompressed_size=%"PRIu64", " + "offset_in_wim=%"PRIu64", " + "flags=0x%02x\n", + reshdr.size_in_wim, reshdr.uncompressed_size, + reshdr.offset_in_wim, reshdr.flags); + + /* Ignore PACKED_STREAMS flag if it isn't supposed to be used in + * this WIM version */ + if (wim->hdr.wim_version == WIM_VERSION_DEFAULT) + reshdr.flags &= ~WIM_RESHDR_FLAG_PACKED_STREAMS; + /* Allocate a 'struct wim_lookup_table_entry' */ cur_entry = new_lookup_table_entry(); - if (!cur_entry) { - ERROR("Not enough memory to read lookup table."); - ret = WIMLIB_ERR_NOMEM; - goto out_free_lookup_table; - } + if (!cur_entry) + goto oom; - cur_entry->wim = wim; - cur_entry->resource_location = RESOURCE_IN_WIM; - get_resource_entry(&disk_entry->resource_entry, &cur_entry->resource_entry); - cur_entry->part_number = le16_to_cpu(disk_entry->part_number); + /* Get the part number, reference count, and hash. */ + part_number = le16_to_cpu(disk_entry->part_number); cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt); copy_hash(cur_entry->hash, disk_entry->hash); - lte_init_wim(cur_entry, wim); - - if (cur_entry->part_number != wim->hdr.part_number) { - WARNING("A lookup table entry in part %hu of the WIM " - "points to part %hu (ignoring it)", - wim->hdr.part_number, cur_entry->part_number); - free_lookup_table_entry(cur_entry); - continue; + + /* Verify that the part number matches that of the underlying + * WIM file. */ + if (part_number != wim->hdr.part_number) { + num_wrong_part_entries++; + goto free_cur_entry_and_continue; } - if (is_zero_hash(cur_entry->hash)) { - WARNING("The WIM lookup table contains an entry with a " - "SHA1 message digest of all 0's (ignoring it)"); - free_lookup_table_entry(cur_entry); - continue; + /* If resource is uncompressed, check for (unexpected) size + * mismatch. */ + if (!(reshdr.flags & (WIM_RESHDR_FLAG_PACKED_STREAMS | + WIM_RESHDR_FLAG_COMPRESSED))) { + if (reshdr.uncompressed_size != reshdr.size_in_wim) { + /* So ... This is an uncompressed resource, but + * its uncompressed size is NOT the same as its + * "compressed" size (size_in_wim). What to do + * with it? + * + * Based on a simple test, WIMGAPI seems to + * handle this as follows: + * + * if (size_in_wim > uncompressed_size) { + * Ignore uncompressed_size; use + * size_in_wim instead. + * } else { + * Honor uncompressed_size, but treat the + * part of the file data above size_in_wim + * as all zeros. + * } + * + * So we will do the same. + */ + if (reshdr.size_in_wim > reshdr.uncompressed_size) + reshdr.uncompressed_size = reshdr.size_in_wim; + } } - if (!(cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED) - && (cur_entry->resource_entry.size != - cur_entry->resource_entry.original_size)) + /* + * Possibly start a new resource. + * + * We need to start a new resource if: + * + * - There is no previous resource (cur_rspec). + * + * OR + * + * - The resource header did not have PACKED_STREAMS set, so it + * specifies a new, single-stream resource. + * + * OR + * + * - The resource header had PACKED_STREAMS set, and it's a + * special entry that specifies the resource itself as opposed + * to a stream, and we already encountered one such entry in + * the current resource. We will interpret this as the + * beginning of a new packed resource. (However, note that + * wimlib does not currently allow create WIMs with multiple + * packed resources, as to remain compatible with WIMGAPI.) + */ + if (likely(!cur_rspec) || + !(reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) || + (reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER && + cur_rspec->size_in_wim != 0)) { - if (wimlib_print_errors) { - WARNING("Found uncompressed resource with " - "original size (%"PRIu64") not the same " - "as compressed size (%"PRIu64")", - cur_entry->resource_entry.original_size, - cur_entry->resource_entry.size); - if (cur_entry->resource_entry.original_size) { - WARNING("Overriding compressed size with original size."); - cur_entry->resource_entry.size = - cur_entry->resource_entry.original_size; - } else { - WARNING("Overriding original size with compressed size"); - cur_entry->resource_entry.original_size = - cur_entry->resource_entry.size; - } + /* Finish previous resource (if existent) */ + if (cur_rspec) { + ret = finish_resource(cur_rspec); + cur_rspec = NULL; + if (ret) + goto out; } + + /* Allocate the resource specification and initialize it + * with values from the current stream entry. */ + cur_rspec = MALLOC(sizeof(*cur_rspec)); + if (!cur_rspec) + goto oom; + + wim_res_hdr_to_spec(&reshdr, wim, cur_rspec); + + /* If this is a packed run, the current stream entry may + * specify a stream within the resource, and not the + * resource itself. Zero possibly irrelevant data until + * it is read for certain. */ + if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { + cur_rspec->size_in_wim = 0; + cur_rspec->uncompressed_size = 0; + cur_rspec->offset_in_wim = 0; + } + } + + /* Now cur_rspec != NULL. */ + + /* Checked for packed resource specification. */ + if (unlikely((reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && + reshdr.uncompressed_size == WIM_PACK_MAGIC_NUMBER)) + { + /* Found the specification for the packed resource. + * Transfer the values to the `struct + * wim_resource_spec', and discard the current stream + * since this lookup table entry did not, in fact, + * correspond to a "stream". */ + + /* The uncompressed size of the packed resource is + * actually stored in the header of the resource itself. + * Read it, and also grab the chunk size and compression + * type (which are not necessarily the defaults from the + * WIM header). */ + struct alt_chunk_table_header_disk hdr; + + ret = full_pread(&wim->in_fd, &hdr, + sizeof(hdr), reshdr.offset_in_wim); + if (ret) + goto out; + + cur_rspec->uncompressed_size = le64_to_cpu(hdr.res_usize); + cur_rspec->offset_in_wim = reshdr.offset_in_wim; + cur_rspec->size_in_wim = reshdr.size_in_wim; + cur_rspec->flags = reshdr.flags; + + /* Compression format numbers must be the same as in + * WIMGAPI to be compatible here. */ + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); + cur_rspec->compression_type = le32_to_cpu(hdr.compression_format); + + cur_rspec->chunk_size = le32_to_cpu(hdr.chunk_size); + + DEBUG("Full pack is %"PRIu64" compressed bytes " + "at file offset %"PRIu64" (flags 0x%02x)", + cur_rspec->size_in_wim, + cur_rspec->offset_in_wim, + cur_rspec->flags); + goto free_cur_entry_and_continue; } - if (cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) { - /* Lookup table entry for a metadata resource */ + /* Ignore entries with all zeroes in the hash field. */ + if (is_zero_hash(cur_entry->hash)) + goto free_cur_entry_and_continue; + + if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) { + + /* Lookup table entry for a metadata resource. */ + + /* Metadata entries with no references must be ignored. + * See, for example, the WinPE WIMs from the WAIK v2.1. + */ + if (cur_entry->refcnt == 0) + goto free_cur_entry_and_continue; + if (cur_entry->refcnt != 1) { - /* Metadata entries with no references must be - * ignored. See for example the WinPE WIMs from - * WAIK v2.1. */ - if (cur_entry->refcnt == 0) { - free_lookup_table_entry(cur_entry); - continue; - } - if (wimlib_print_errors) { - ERROR("Found metadata resource with refcnt != 1:"); - print_lookup_table_entry(cur_entry, stderr); - } + /* We don't currently support this case due to + * the complications of multiple images sharing + * the same metadata resource or a metadata + * resource also being referenced by files. + */ + ERROR("Found metadata resource with refcnt != 1"); ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out_free_cur_entry; + goto out; } if (wim->hdr.part_number != 1) { WARNING("Ignoring metadata resource found in a " "non-first part of the split WIM"); - free_lookup_table_entry(cur_entry); - continue; + goto free_cur_entry_and_continue; } - if (wim->current_image == wim->hdr.image_count) { - WARNING("The WIM header says there are %u images " - "in the WIM, but we found more metadata " - "resources than this (ignoring the extra)", - wim->hdr.image_count); - free_lookup_table_entry(cur_entry); - continue; + + /* The number of entries in the lookup table with + * WIM_RESHDR_FLAG_METADATA set should be the same as + * the image_count field in the WIM header. */ + if (image_index == wim->hdr.image_count) { + WARNING("Found more metadata resources than images"); + goto free_cur_entry_and_continue; } /* Notice very carefully: We are assigning the metadata - * resources in the exact order mirrored by their lookup - * table entries on disk, which is the behavior of - * Microsoft's software. In particular, this overrides - * the actual locations of the metadata resources - * themselves in the WIM file as well as any information - * written in the XML data. */ - DEBUG("Found metadata resource for image %u at " + * resources to images in the same order in which their + * lookup table entries occur on disk. (This is also + * the behavior of Microsoft's software.) In + * particular, this overrides the actual locations of + * the metadata resources themselves in the WIM file as + * well as any information written in the XML data. */ + DEBUG("Found metadata resource for image %"PRIu32" at " "offset %"PRIu64".", - wim->current_image + 1, - cur_entry->resource_entry.offset); - wim->image_metadata[ - wim->current_image++]->metadata_lte = cur_entry; + image_index + 1, + reshdr.offset_in_wim); + + wim->image_metadata[image_index++]->metadata_lte = cur_entry; } else { - /* Lookup table entry for a stream that is not a - * metadata resource */ - duplicate_entry = lookup_resource(table, cur_entry->hash); - if (duplicate_entry) { - if (wimlib_print_errors) { - WARNING("The WIM lookup table contains two entries with the " - "same SHA1 message digest!"); - WARNING("The first entry is:"); - print_lookup_table_entry(duplicate_entry, stderr); - WARNING("The second entry is:"); - print_lookup_table_entry(cur_entry, stderr); - } - free_lookup_table_entry(cur_entry); - continue; - } else { - lookup_table_insert(table, cur_entry); + /* Lookup table entry for a non-metadata stream. */ + + /* Ignore this stream if it's a duplicate. */ + if (lookup_stream(table, cur_entry->hash)) { + num_duplicate_entries++; + goto free_cur_entry_and_continue; } + + /* Insert the stream into the in-memory lookup table, + * keyed by its SHA1 message digest. */ + lookup_table_insert(table, cur_entry); + } + + /* Add the stream to the current resource specification. */ + lte_bind_wim_resource_spec(cur_entry, cur_rspec); + if (reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { + /* In packed runs, the offset field is used for + * in-resource offset, not the in-WIM offset, and the + * size field is used for the uncompressed size, not the + * compressed size. */ + cur_entry->offset_in_res = reshdr.offset_in_wim; + cur_entry->size = reshdr.size_in_wim; + cur_entry->flags = reshdr.flags; + /* cur_rspec stays the same */ + + } else { + /* Normal case: The stream corresponds one-to-one with + * the resource entry. */ + cur_entry->offset_in_res = 0; + cur_entry->size = reshdr.uncompressed_size; + cur_entry->flags = reshdr.flags; + ret = validate_resource(cur_rspec); + cur_rspec = NULL; + if (ret) + goto out; } + continue; + + free_cur_entry_and_continue: + free_lookup_table_entry(cur_entry); + } + cur_entry = NULL; + + /* Validate the last resource. */ + if (cur_rspec) { + ret = finish_resource(cur_rspec); + cur_rspec = NULL; + if (ret) + goto out; } - if (wim->hdr.part_number == 1 && wim->current_image != wim->hdr.image_count) { - WARNING("The header of \"%"TS"\" says there are %u images in\n" - " the WIM, but we only found %d metadata resources! Acting as if\n" - " the header specified only %d images instead.", - wim->filename, wim->hdr.image_count, - wim->current_image, wim->current_image); - for (int i = wim->current_image; i < wim->hdr.image_count; i++) + if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) { + WARNING("Could not find metadata resources for all images"); + for (u32 i = image_index; i < wim->hdr.image_count; i++) put_image_metadata(wim->image_metadata[i], NULL); - wim->hdr.image_count = wim->current_image; + wim->hdr.image_count = image_index; + } + + if (num_duplicate_entries > 0) { + WARNING("Ignoring %zu duplicate streams in the WIM lookup table", + num_duplicate_entries); + } + + if (num_wrong_part_entries > 0) { + WARNING("Ignoring %zu streams with wrong part number", + num_wrong_part_entries); } + DEBUG("Done reading lookup table."); wim->lookup_table = table; + table = NULL; ret = 0; - goto out_free_buf; -out_free_cur_entry: - FREE(cur_entry); -out_free_lookup_table: + goto out; +oom: + ERROR("Not enough memory to read lookup table!"); + ret = WIMLIB_ERR_NOMEM; +out: + if (cur_rspec && list_empty(&cur_rspec->stream_list)) + FREE(cur_rspec); + free_lookup_table_entry(cur_entry); free_lookup_table(table); -out_free_buf: FREE(buf); -out: - wim->current_image = 0; return ret; } - static void -write_wim_lookup_table_entry(const struct wim_lookup_table_entry *lte, - struct wim_lookup_table_entry_disk *disk_entry) +put_wim_lookup_table_entry(struct wim_lookup_table_entry_disk *disk_entry, + const struct wim_reshdr *out_reshdr, + u16 part_number, u32 refcnt, const u8 *hash) { - put_resource_entry(<e->output_resource_entry, &disk_entry->resource_entry); - disk_entry->part_number = cpu_to_le16(lte->part_number); - disk_entry->refcnt = cpu_to_le32(lte->out_refcnt); - copy_hash(disk_entry->hash, lte->hash); + put_wim_reshdr(out_reshdr, &disk_entry->reshdr); + disk_entry->part_number = cpu_to_le16(part_number); + disk_entry->refcnt = cpu_to_le32(refcnt); + copy_hash(disk_entry->hash, hash); } -static int +int write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct filedes *out_fd, - struct resource_entry *out_res_entry, + u16 part_number, + struct wim_reshdr *out_reshdr, int write_resource_flags) { size_t table_size; @@ -685,23 +1051,68 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, struct wim_lookup_table_entry_disk *table_buf; struct wim_lookup_table_entry_disk *table_buf_ptr; int ret; + u64 prev_res_offset_in_wim = ~0ULL; table_size = 0; - list_for_each_entry(lte, stream_list, lookup_table_list) + list_for_each_entry(lte, stream_list, lookup_table_list) { table_size += sizeof(struct wim_lookup_table_entry_disk); - DEBUG("Writing WIM lookup table (size=%zu, offset=%"PRIu64")", + if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS && + lte->out_res_offset_in_wim != prev_res_offset_in_wim) + { + table_size += sizeof(struct wim_lookup_table_entry_disk); + prev_res_offset_in_wim = lte->out_res_offset_in_wim; + } + } + + DEBUG("Writing WIM lookup table (size=%zu, offset=%"PRIu64")", table_size, out_fd->offset); table_buf = MALLOC(table_size); - if (!table_buf) { + if (table_buf == NULL) { ERROR("Failed to allocate %zu bytes for temporary lookup table", table_size); return WIMLIB_ERR_NOMEM; } table_buf_ptr = table_buf; - list_for_each_entry(lte, stream_list, lookup_table_list) - write_wim_lookup_table_entry(lte, table_buf_ptr++); + + prev_res_offset_in_wim = ~0ULL; + list_for_each_entry(lte, stream_list, lookup_table_list) { + + put_wim_lookup_table_entry(table_buf_ptr++, + <e->out_reshdr, + part_number, + lte->out_refcnt, + lte->hash); + if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS && + lte->out_res_offset_in_wim != prev_res_offset_in_wim) + { + /* Put the main resource entry for the pack. */ + + struct wim_reshdr reshdr; + + reshdr.offset_in_wim = lte->out_res_offset_in_wim; + reshdr.size_in_wim = lte->out_res_size_in_wim; + reshdr.uncompressed_size = WIM_PACK_MAGIC_NUMBER; + reshdr.flags = WIM_RESHDR_FLAG_PACKED_STREAMS; + + DEBUG("Putting main entry for pack: " + "size_in_wim=%"PRIu64", " + "offset_in_wim=%"PRIu64", " + "uncompressed_size=%"PRIu64, + reshdr.size_in_wim, + reshdr.offset_in_wim, + reshdr.uncompressed_size); + + put_wim_lookup_table_entry(table_buf_ptr++, + &reshdr, + part_number, + 1, zero_hash); + prev_res_offset_in_wim = lte->out_res_offset_in_wim; + } + + } + wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size); /* Write the lookup table uncompressed. Although wimlib can handle a * compressed lookup table, MS software cannot. */ @@ -710,103 +1121,15 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, WIM_RESHDR_FLAG_METADATA, out_fd, WIMLIB_COMPRESSION_TYPE_NONE, - out_res_entry, + 0, + out_reshdr, NULL, write_resource_flags); FREE(table_buf); + DEBUG("ret=%d", ret); return ret; } -static int -append_lookup_table_entry(struct wim_lookup_table_entry *lte, void *_list) -{ - /* Lookup table entries with 'out_refcnt' == 0 correspond to streams not - * written and not present in the resulting WIM file, and should not be - * included in the lookup table. - * - * Lookup table entries marked as filtered (EXTERNAL_WIM) with - * 'out_refcnt != 0' were referenced as part of the logical write but - * correspond to streams that were not in fact written, and should not - * be included in the lookup table. - * - * Lookup table entries marked as filtered (SAME_WIM) with 'out_refcnt - * != 0' were referenced as part of the logical write but correspond to - * streams that were not in fact written, but nevertheless were already - * present in the WIM being overwritten in-place. These entries must be - * included in the lookup table, and the resource information to write - * needs to be copied from the resource information read originally. - */ - if (lte->out_refcnt != 0 && !(lte->filtered & FILTERED_EXTERNAL_WIM)) { - if (lte->filtered & FILTERED_SAME_WIM) { - copy_resource_entry(<e->output_resource_entry, - <e->resource_entry); - } - list_add_tail(<e->lookup_table_list, (struct list_head*)_list); - } - return 0; -} - -int -write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, - struct resource_entry *out_res_entry, - struct list_head *stream_list_override) -{ - int write_resource_flags; - struct list_head _stream_list; - struct list_head *stream_list; - - if (stream_list_override) { - stream_list = stream_list_override; - } else { - stream_list = &_stream_list; - INIT_LIST_HEAD(stream_list); - } - - if (!(write_flags & WIMLIB_WRITE_FLAG_NO_METADATA)) { - int start_image; - int end_image; - - if (image == WIMLIB_ALL_IMAGES) { - start_image = 1; - end_image = wim->hdr.image_count; - } else { - start_image = image; - end_image = image; - } - - /* Push metadata resource lookup table entries onto the front of - * the list in reverse order, so that they're written in order. - */ - for (int i = end_image; i >= start_image; i--) { - struct wim_lookup_table_entry *metadata_lte; - - metadata_lte = wim->image_metadata[i - 1]->metadata_lte; - metadata_lte->out_refcnt = 1; - metadata_lte->part_number = wim->hdr.part_number; - metadata_lte->output_resource_entry.flags |= WIM_RESHDR_FLAG_METADATA; - - list_add(&metadata_lte->lookup_table_list, stream_list); - } - } - - /* Append additional lookup table entries that need to be written, with - * some special handling for streams that have been marked as filtered. - */ - if (!stream_list_override) { - for_lookup_table_entry(wim->lookup_table, - append_lookup_table_entry, stream_list); - } - - write_resource_flags = 0; - if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE) - write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE; - return write_wim_lookup_table_from_stream_list(stream_list, - &wim->out_fd, - out_res_entry, - write_resource_flags); -} - - int lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) { @@ -831,444 +1154,38 @@ lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore) return 0; } -void -print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) -{ - if (!lte) { - tputc(T('\n'), out); - return; - } - tfprintf(out, T("Offset = %"PRIu64" bytes\n"), - lte->resource_entry.offset); - - tfprintf(out, T("Size = %"PRIu64" bytes\n"), - (u64)lte->resource_entry.size); - - tfprintf(out, T("Original size = %"PRIu64" bytes\n"), - lte->resource_entry.original_size); - - tfprintf(out, T("Part Number = %hu\n"), lte->part_number); - tfprintf(out, T("Reference Count = %u\n"), lte->refcnt); - - if (lte->unhashed) { - tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"), - lte->back_inode, lte->back_stream_id); - } else { - tfprintf(out, T("Hash = 0x")); - print_hash(lte->hash, out); - tputc(T('\n'), out); - } - - tfprintf(out, T("Flags = ")); - u8 flags = lte->resource_entry.flags; - if (flags & WIM_RESHDR_FLAG_COMPRESSED) - tfputs(T("WIM_RESHDR_FLAG_COMPRESSED, "), out); - if (flags & WIM_RESHDR_FLAG_FREE) - tfputs(T("WIM_RESHDR_FLAG_FREE, "), out); - if (flags & WIM_RESHDR_FLAG_METADATA) - tfputs(T("WIM_RESHDR_FLAG_METADATA, "), out); - if (flags & WIM_RESHDR_FLAG_SPANNED) - tfputs(T("WIM_RESHDR_FLAG_SPANNED, "), out); - tputc(T('\n'), out); - switch (lte->resource_location) { - case RESOURCE_IN_WIM: - if (lte->wim->filename) { - tfprintf(out, T("WIM file = `%"TS"'\n"), - lte->wim->filename); - } - break; -#ifdef __WIN32__ - case RESOURCE_WIN32_ENCRYPTED: -#endif - case RESOURCE_IN_FILE_ON_DISK: - tfprintf(out, T("File on Disk = `%"TS"'\n"), - lte->file_on_disk); - break; -#ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: - tfprintf(out, T("Staging File = `%"TS"'\n"), - lte->staging_file_name); - break; -#endif - default: - break; - } - tputc(T('\n'), out); -} - -void -lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, - struct wimlib_resource_entry *wentry) -{ - wentry->uncompressed_size = lte->resource_entry.original_size; - wentry->compressed_size = lte->resource_entry.size; - wentry->offset = lte->resource_entry.offset; - copy_hash(wentry->sha1_hash, lte->hash); - wentry->part_number = lte->part_number; - wentry->reference_count = lte->refcnt; - wentry->is_compressed = (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED) != 0; - wentry->is_metadata = (lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) != 0; - wentry->is_free = (lte->resource_entry.flags & WIM_RESHDR_FLAG_FREE) != 0; - wentry->is_spanned = (lte->resource_entry.flags & WIM_RESHDR_FLAG_SPANNED) != 0; -} - -struct iterate_lte_context { - wimlib_iterate_lookup_table_callback_t cb; - void *user_ctx; -}; - -static int -do_iterate_lte(struct wim_lookup_table_entry *lte, void *_ctx) -{ - struct iterate_lte_context *ctx = _ctx; - struct wimlib_resource_entry entry; - - lte_to_wimlib_resource_entry(lte, &entry); - return (*ctx->cb)(&entry, ctx->user_ctx); -} - -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_iterate_lookup_table(WIMStruct *wim, int flags, - wimlib_iterate_lookup_table_callback_t cb, - void *user_ctx) -{ - struct iterate_lte_context ctx = { - .cb = cb, - .user_ctx = user_ctx, - }; - if (wim->hdr.part_number == 1) { - int ret; - for (int i = 0; i < wim->hdr.image_count; i++) { - ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte, - &ctx); - if (ret) - return ret; - } - } - return for_lookup_table_entry(wim->lookup_table, do_iterate_lte, &ctx); -} - -/* Given a SHA1 message digest, return the corresponding entry in the WIM's - * lookup table, or NULL if there is none. */ +/* Allocate a stream entry for the contents of the buffer, or re-use an existing + * entry in @lookup_table for the same stream. */ struct wim_lookup_table_entry * -lookup_resource(const struct wim_lookup_table *table, const u8 hash[]) +new_stream_from_data_buffer(const void *buffer, size_t size, + struct wim_lookup_table *lookup_table) { - size_t i; - struct wim_lookup_table_entry *lte; - struct hlist_node *pos; - - wimlib_assert(table != NULL); - wimlib_assert(hash != NULL); - - i = *(size_t*)hash % table->capacity; - hlist_for_each_entry(lte, pos, &table->array[i], hash_list) - if (hashes_equal(hash, lte->hash)) - return lte; - return NULL; -} - -#ifdef WITH_FUSE -/* - * Finds the dentry, lookup table entry, and stream index for a WIM file stream, - * given a path name. - * - * This is only for pre-resolved inodes. - */ -int -wim_pathname_to_stream(WIMStruct *wim, - const tchar *path, - int lookup_flags, - struct wim_dentry **dentry_ret, - struct wim_lookup_table_entry **lte_ret, - u16 *stream_idx_ret) -{ - struct wim_dentry *dentry; - struct wim_lookup_table_entry *lte; - u16 stream_idx; - const tchar *stream_name = NULL; - struct wim_inode *inode; - tchar *p = NULL; - - if (lookup_flags & LOOKUP_FLAG_ADS_OK) { - stream_name = path_stream_name(path); - if (stream_name) { - p = (tchar*)stream_name - 1; - *p = T('\0'); - } - } - - dentry = get_dentry(wim, path); - if (p) - *p = T(':'); - if (!dentry) - return -errno; - - inode = dentry->d_inode; - - if (!inode->i_resolved) - if (inode_resolve_ltes(inode, wim->lookup_table, false)) - return -EIO; - - if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK) - && inode_is_directory(inode)) - return -EISDIR; - - if (stream_name) { - struct wim_ads_entry *ads_entry; - u16 ads_idx; - ads_entry = inode_get_ads_entry(inode, stream_name, - &ads_idx); - if (ads_entry) { - stream_idx = ads_idx + 1; - lte = ads_entry->lte; - goto out; - } else { - return -ENOENT; - } + u8 hash[SHA1_HASH_SIZE]; + struct wim_lookup_table_entry *lte, *existing_lte; + + sha1_buffer(buffer, size, hash); + existing_lte = lookup_stream(lookup_table, hash); + if (existing_lte) { + wimlib_assert(existing_lte->size == size); + lte = existing_lte; + lte->refcnt++; } else { - lte = inode->i_lte; - stream_idx = 0; - } -out: - if (dentry_ret) - *dentry_ret = dentry; - if (lte_ret) - *lte_ret = lte; - if (stream_idx_ret) - *stream_idx_ret = stream_idx; - return 0; -} -#endif - -int -resource_not_found_error(const struct wim_inode *inode, const u8 *hash) -{ - if (wimlib_print_errors) { - ERROR("\"%"TS"\": resource not found", inode_first_full_path(inode)); - tfprintf(stderr, T(" SHA-1 message digest of missing resource:\n ")); - print_hash(hash, stderr); - tputc(T('\n'), stderr); - } - return WIMLIB_ERR_RESOURCE_NOT_FOUND; -} - -/* - * Resolve an inode's lookup table entries. - * - * This replaces the SHA1 hash fields (which are used to lookup an entry in the - * lookup table) with pointers directly to the lookup table entries. - * - * If @force is %false: - * If any needed SHA1 message digests are not found in the lookup table, - * WIMLIB_ERR_RESOURCE_NOT_FOUND is returned and the inode is left - * unmodified. - * If @force is %true: - * If any needed SHA1 message digests are not found in the lookup table, - * new entries are allocated and inserted into the lookup table. - */ -int -inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table, - bool force) -{ - const u8 *hash; - - if (!inode->i_resolved) { - struct wim_lookup_table_entry *lte, *ads_lte; - - /* Resolve the default file stream */ - lte = NULL; - hash = inode->i_hash; - if (!is_zero_hash(hash)) { - lte = lookup_resource(table, hash); - if (!lte) { - if (force) { - lte = new_lookup_table_entry(); - if (!lte) - return WIMLIB_ERR_NOMEM; - copy_hash(lte->hash, hash); - lookup_table_insert(table, lte); - } else { - goto resource_not_found; - } - } - } - - /* Resolve the alternate data streams */ - struct wim_lookup_table_entry *ads_ltes[inode->i_num_ads]; - for (u16 i = 0; i < inode->i_num_ads; i++) { - struct wim_ads_entry *cur_entry; - - ads_lte = NULL; - cur_entry = &inode->i_ads_entries[i]; - hash = cur_entry->hash; - if (!is_zero_hash(hash)) { - ads_lte = lookup_resource(table, hash); - if (!ads_lte) { - if (force) { - ads_lte = new_lookup_table_entry(); - if (!ads_lte) - return WIMLIB_ERR_NOMEM; - copy_hash(ads_lte->hash, hash); - lookup_table_insert(table, ads_lte); - } else { - goto resource_not_found; - } - } - } - ads_ltes[i] = ads_lte; - } - inode->i_lte = lte; - for (u16 i = 0; i < inode->i_num_ads; i++) - inode->i_ads_entries[i].lte = ads_ltes[i]; - inode->i_resolved = 1; - } - return 0; - -resource_not_found: - return resource_not_found_error(inode, hash); -} - -void -inode_unresolve_ltes(struct wim_inode *inode) -{ - if (inode->i_resolved) { - if (inode->i_lte) - copy_hash(inode->i_hash, inode->i_lte->hash); - else - zero_out_hash(inode->i_hash); - - for (u16 i = 0; i < inode->i_num_ads; i++) { - if (inode->i_ads_entries[i].lte) - copy_hash(inode->i_ads_entries[i].hash, - inode->i_ads_entries[i].lte->hash); - else - zero_out_hash(inode->i_ads_entries[i].hash); - } - inode->i_resolved = 0; - } -} - -/* - * Returns the lookup table entry for stream @stream_idx of the inode, where - * stream_idx = 0 means the default un-named file stream, and stream_idx >= 1 - * corresponds to an alternate data stream. - * - * This works for both resolved and un-resolved inodes. - */ -struct wim_lookup_table_entry * -inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx, - const struct wim_lookup_table *table) -{ - if (inode->i_resolved) - return inode_stream_lte_resolved(inode, stream_idx); - else - return inode_stream_lte_unresolved(inode, stream_idx, table); -} - -struct wim_lookup_table_entry * -inode_unnamed_lte_resolved(const struct wim_inode *inode) -{ - wimlib_assert(inode->i_resolved); - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - if (inode_stream_name_nbytes(inode, i) == 0 && - !is_zero_hash(inode_stream_hash_resolved(inode, i))) - { - return inode_stream_lte_resolved(inode, i); - } - } - return NULL; -} - -struct wim_lookup_table_entry * -inode_unnamed_lte_unresolved(const struct wim_inode *inode, - const struct wim_lookup_table *table) -{ - wimlib_assert(!inode->i_resolved); - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - if (inode_stream_name_nbytes(inode, i) == 0 && - !is_zero_hash(inode_stream_hash_unresolved(inode, i))) - { - return inode_stream_lte_unresolved(inode, i, table); - } - } - return NULL; -} - -/* Return the lookup table entry for the unnamed data stream of an inode, or - * NULL if there is none. - * - * You'd think this would be easier than it actually is, since the unnamed data - * stream should be the one referenced from the inode itself. Alas, if there - * are named data streams, Microsoft's "imagex.exe" program will put the unnamed - * data stream in one of the alternate data streams instead of inside the WIM - * dentry itself. So we need to check the alternate data streams too. - * - * Also, note that a dentry may appear to have more than one unnamed stream, but - * if the SHA1 message digest is all 0's then the corresponding stream does not - * really "count" (this is the case for the inode's own file stream when the - * file stream that should be there is actually in one of the alternate stream - * entries.). This is despite the fact that we may need to extract such a - * missing entry as an empty file or empty named data stream. - */ -struct wim_lookup_table_entry * -inode_unnamed_lte(const struct wim_inode *inode, - const struct wim_lookup_table *table) -{ - if (inode->i_resolved) - return inode_unnamed_lte_resolved(inode); - else - return inode_unnamed_lte_unresolved(inode, table); -} - -/* Returns the SHA1 message digest of the unnamed data stream of a WIM inode, or - * 'zero_hash' if the unnamed data stream is missing has all zeroes in its SHA1 - * message digest field. */ -const u8 * -inode_unnamed_stream_hash(const struct wim_inode *inode) -{ - const u8 *hash; - - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - if (inode_stream_name_nbytes(inode, i) == 0) { - hash = inode_stream_hash(inode, i); - if (!is_zero_hash(hash)) - return hash; + void *buffer_copy; + lte = new_lookup_table_entry(); + if (lte == NULL) + return NULL; + buffer_copy = memdup(buffer, size); + if (buffer_copy == NULL) { + free_lookup_table_entry(lte); + return NULL; } + lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; + lte->attached_buffer = buffer_copy; + lte->size = size; + copy_hash(lte->hash, hash); + lookup_table_insert(lookup_table, lte); } - return zero_hash; -} - - -static int -lte_add_stream_size(struct wim_lookup_table_entry *lte, void *total_bytes_p) -{ - *(u64*)total_bytes_p += lte->resource_entry.size; - return 0; -} - -u64 -lookup_table_total_stream_size(struct wim_lookup_table *table) -{ - u64 total_size = 0; - for_lookup_table_entry(table, lte_add_stream_size, &total_size); - return total_size; -} - -struct wim_lookup_table_entry ** -retrieve_lte_pointer(struct wim_lookup_table_entry *lte) -{ - wimlib_assert(lte->unhashed); - struct wim_inode *inode = lte->back_inode; - u32 stream_id = lte->back_stream_id; - if (stream_id == 0) - return &inode->i_lte; - else - for (u16 i = 0; i < inode->i_num_ads; i++) - if (inode->i_ads_entries[i].stream_id == stream_id) - return &inode->i_ads_entries[i].lte; - wimlib_assert(0); - return NULL; + return lte; } /* Calculate the SHA1 message digest of a stream and move it from the list of @@ -1300,212 +1217,100 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, * the SHA1 has been calculated. */ back_ptr = retrieve_lte_pointer(lte); - ret = sha1_resource(lte); + ret = sha1_stream(lte); if (ret) return ret; /* Look for a duplicate stream */ - duplicate_lte = lookup_resource(lookup_table, lte->hash); + duplicate_lte = lookup_stream(lookup_table, lte->hash); list_del(<e->unhashed_list); if (duplicate_lte) { /* We have a duplicate stream. Transfer the reference counts - * from this stream to the duplicate, update the reference to + * from this stream to the duplicate and update the reference to * this stream (in an inode or ads_entry) to point to the - * duplicate, then free this stream. */ + * duplicate. The caller is responsible for freeing @lte if + * needed. */ wimlib_assert(!(duplicate_lte->unhashed)); + wimlib_assert(duplicate_lte->size == lte->size); duplicate_lte->refcnt += lte->refcnt; - duplicate_lte->out_refcnt += lte->out_refcnt; + lte->refcnt = 0; *back_ptr = duplicate_lte; - free_lookup_table_entry(lte); lte = duplicate_lte; } else { - /* No duplicate stream, so we need to insert - * this stream into the lookup table and treat - * it as a hashed stream. */ + /* No duplicate stream, so we need to insert this stream into + * the lookup table and treat it as a hashed stream. */ lookup_table_insert(lookup_table, lte); lte->unhashed = 0; } - if (lte_ret) - *lte_ret = lte; + *lte_ret = lte; return 0; } -static int -lte_clone_if_new(struct wim_lookup_table_entry *lte, void *_lookup_table) -{ - struct wim_lookup_table *lookup_table = _lookup_table; - - if (lookup_resource(lookup_table, lte->hash)) - return 0; /* Resource already present. */ - - lte = clone_lookup_table_entry(lte); - if (!lte) - return WIMLIB_ERR_NOMEM; - lte->out_refcnt = 1; - lookup_table_insert(lookup_table, lte); - return 0; -} - -static int -lte_delete_if_new(struct wim_lookup_table_entry *lte, void *_lookup_table) -{ - struct wim_lookup_table *lookup_table = _lookup_table; - - if (lte->out_refcnt) { - lookup_table_unlink(lookup_table, lte); - free_lookup_table_entry(lte); - } - return 0; -} - -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_reference_resources(WIMStruct *wim, - WIMStruct **resource_wims, unsigned num_resource_wims, - int ref_flags) +void +lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, + struct wimlib_resource_entry *wentry) { - int ret; - unsigned i; - - if (wim == NULL) - return WIMLIB_ERR_INVALID_PARAM; - - if (num_resource_wims != 0 && resource_wims == NULL) - return WIMLIB_ERR_INVALID_PARAM; - - for (i = 0; i < num_resource_wims; i++) - if (resource_wims[i] == NULL) - return WIMLIB_ERR_INVALID_PARAM; - - for_lookup_table_entry(wim->lookup_table, lte_zero_out_refcnt, NULL); - - for (i = 0; i < num_resource_wims; i++) { - ret = for_lookup_table_entry(resource_wims[i]->lookup_table, - lte_clone_if_new, - wim->lookup_table); - if (ret) - goto out_rollback; + memset(wentry, 0, sizeof(*wentry)); + + wentry->uncompressed_size = lte->size; + if (lte->resource_location == RESOURCE_IN_WIM) { + wentry->part_number = lte->rspec->wim->hdr.part_number; + if (lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { + wentry->compressed_size = 0; + wentry->offset = lte->offset_in_res; + } else { + wentry->compressed_size = lte->rspec->size_in_wim; + wentry->offset = lte->rspec->offset_in_wim; + } + wentry->raw_resource_offset_in_wim = lte->rspec->offset_in_wim; + /*wentry->raw_resource_uncompressed_size = lte->rspec->uncompressed_size;*/ + wentry->raw_resource_compressed_size = lte->rspec->size_in_wim; } - return 0; - -out_rollback: - for_lookup_table_entry(wim->lookup_table, lte_delete_if_new, - wim->lookup_table); - return ret; + copy_hash(wentry->sha1_hash, lte->hash); + wentry->reference_count = lte->refcnt; + wentry->is_compressed = (lte->flags & WIM_RESHDR_FLAG_COMPRESSED) != 0; + wentry->is_metadata = (lte->flags & WIM_RESHDR_FLAG_METADATA) != 0; + wentry->is_free = (lte->flags & WIM_RESHDR_FLAG_FREE) != 0; + wentry->is_spanned = (lte->flags & WIM_RESHDR_FLAG_SPANNED) != 0; + wentry->packed = (lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) != 0; } -static int -reference_resource_paths(WIMStruct *wim, - const tchar * const *resource_wimfiles, - unsigned num_resource_wimfiles, - int ref_flags, - int open_flags, - wimlib_progress_func_t progress_func) -{ - WIMStruct **resource_wims; - unsigned i; - int ret; - - resource_wims = CALLOC(num_resource_wimfiles, sizeof(resource_wims[0])); - if (!resource_wims) - return WIMLIB_ERR_NOMEM; - - for (i = 0; i < num_resource_wimfiles; i++) { - DEBUG("Referencing resources from path \"%"TS"\"", - resource_wimfiles[i]); - ret = wimlib_open_wim(resource_wimfiles[i], open_flags, - &resource_wims[i], progress_func); - if (ret) - goto out_free_resource_wims; - } - - ret = wimlib_reference_resources(wim, resource_wims, - num_resource_wimfiles, ref_flags); - if (ret) - goto out_free_resource_wims; - - for (i = 0; i < num_resource_wimfiles; i++) - list_add_tail(&resource_wims[i]->subwim_node, &wim->subwims); - - ret = 0; - goto out_free_array; - -out_free_resource_wims: - for (i = 0; i < num_resource_wimfiles; i++) - wimlib_free(resource_wims[i]); -out_free_array: - FREE(resource_wims); - return ret; -} +struct iterate_lte_context { + wimlib_iterate_lookup_table_callback_t cb; + void *user_ctx; +}; static int -reference_resource_glob(WIMStruct *wim, const tchar *refglob, - int ref_flags, int open_flags, - wimlib_progress_func_t progress_func) +do_iterate_lte(struct wim_lookup_table_entry *lte, void *_ctx) { - glob_t globbuf; - int ret; - - /* Note: glob() is replaced in Windows native builds. */ - ret = tglob(refglob, GLOB_ERR | GLOB_NOSORT, NULL, &globbuf); - if (ret) { - if (ret == GLOB_NOMATCH) { - if (ref_flags & WIMLIB_REF_FLAG_GLOB_ERR_ON_NOMATCH) { - ERROR("Found no files for glob \"%"TS"\"", refglob); - return WIMLIB_ERR_GLOB_HAD_NO_MATCHES; - } else { - return reference_resource_paths(wim, - &refglob, - 1, - ref_flags, - open_flags, - progress_func); - } - } else { - ERROR_WITH_ERRNO("Failed to process glob \"%"TS"\"", refglob); - if (ret == GLOB_NOSPACE) - return WIMLIB_ERR_NOMEM; - else - return WIMLIB_ERR_READ; - } - } + struct iterate_lte_context *ctx = _ctx; + struct wimlib_resource_entry entry; - ret = reference_resource_paths(wim, - (const tchar * const *)globbuf.gl_pathv, - globbuf.gl_pathc, - ref_flags, - open_flags, - progress_func); - globfree(&globbuf); - return ret; + lte_to_wimlib_resource_entry(lte, &entry); + return (*ctx->cb)(&entry, ctx->user_ctx); } /* API function documented in wimlib.h */ WIMLIBAPI int -wimlib_reference_resource_files(WIMStruct *wim, - const tchar * const * resource_wimfiles_or_globs, - unsigned count, - int ref_flags, - int open_flags, - wimlib_progress_func_t progress_func) +wimlib_iterate_lookup_table(WIMStruct *wim, int flags, + wimlib_iterate_lookup_table_callback_t cb, + void *user_ctx) { - unsigned i; - int ret; + if (flags != 0) + return WIMLIB_ERR_INVALID_PARAM; - if (ref_flags & WIMLIB_REF_FLAG_GLOB_ENABLE) { - for (i = 0; i < count; i++) { - ret = reference_resource_glob(wim, - resource_wimfiles_or_globs[i], - ref_flags, - open_flags, - progress_func); + struct iterate_lte_context ctx = { + .cb = cb, + .user_ctx = user_ctx, + }; + if (wim->hdr.part_number == 1) { + int ret; + for (int i = 0; i < wim->hdr.image_count; i++) { + ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte, + &ctx); if (ret) return ret; } - return 0; - } else { - return reference_resource_paths(wim, resource_wimfiles_or_globs, - count, ref_flags, - open_flags, progress_func); } + return for_lookup_table_entry(wim->lookup_table, do_iterate_lte, &ctx); }