#include "lookup_table.h"
#include "buffer_io.h"
#include <errno.h>
+#include <stdlib.h>
#ifdef WITH_FUSE
#include <unistd.h>
struct wim_lookup_table *table;
struct hlist_head *array;
- table = MALLOC(sizeof(struct wim_lookup_table));
+ table = CALLOC(1, sizeof(struct wim_lookup_table));
if (table) {
array = CALLOC(capacity, sizeof(array[0]));
if (array) {
switch (new->resource_location) {
#ifdef __WIN32__
case RESOURCE_WIN32:
+ case RESOURCE_WIN32_ENCRYPTED:
+#else
+ case RESOURCE_IN_FILE_ON_DISK:
#endif
+#ifdef WITH_FUSE
case RESOURCE_IN_STAGING_FILE:
- case RESOURCE_IN_FILE_ON_DISK:
BUILD_BUG_ON((void*)&old->file_on_disk !=
(void*)&old->staging_file_name);
- new->staging_file_name = TSTRDUP(old->staging_file_name);
- if (!new->staging_file_name)
+#endif
+ new->file_on_disk = TSTRDUP(old->file_on_disk);
+ if (!new->file_on_disk)
goto out_free;
break;
case RESOURCE_IN_ATTACHED_BUFFER:
{
if (lte) {
switch (lte->resource_location) {
- case RESOURCE_IN_STAGING_FILE:
- case RESOURCE_IN_ATTACHED_BUFFER:
- case RESOURCE_IN_FILE_ON_DISK:
-#ifdef __WIN32__
+ #ifdef __WIN32__
case RESOURCE_WIN32:
-#endif
+ case RESOURCE_WIN32_ENCRYPTED:
+ #else
+ case RESOURCE_IN_FILE_ON_DISK:
+ #endif
+ #ifdef WITH_FUSE
+ case RESOURCE_IN_STAGING_FILE:
BUILD_BUG_ON((void*)<e->file_on_disk !=
(void*)<e->staging_file_name);
+ #endif
+ case RESOURCE_IN_ATTACHED_BUFFER:
BUILD_BUG_ON((void*)<e->file_on_disk !=
(void*)<e->attached_buffer);
FREE(lte->file_on_disk);
#ifdef WITH_FUSE
if (lte->resource_location == RESOURCE_IN_STAGING_FILE) {
unlink(lte->staging_file_name);
- list_del(<e->staging_list);
+ list_del(<e->unhashed_list);
}
#endif
free_lookup_table_entry(lte);
wimlib_assert(lte != NULL);
wimlib_assert(lte->refcnt != 0);
if (--lte->refcnt == 0) {
- lookup_table_unlink(table, lte);
+ if (!lte->unhashed)
+ lookup_table_unlink(table, lte);
#ifdef WITH_FUSE
if (lte->num_opened_fds == 0)
#endif
hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i],
hash_list)
{
+ wimlib_assert2(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA));
ret = visitor(lte, arg);
- if (ret != 0)
+ if (ret)
return ret;
}
}
return 0;
}
+int
+cmp_streams_by_wim_position(const void *p1, const void *p2)
+{
+ const struct wim_lookup_table_entry *lte1, *lte2;
+ lte1 = *(const struct wim_lookup_table_entry**)p1;
+ lte2 = *(const struct wim_lookup_table_entry**)p2;
+ if (lte1->resource_entry.offset < lte2->resource_entry.offset)
+ return -1;
+ else if (lte1->resource_entry.offset > lte2->resource_entry.offset)
+ return 1;
+ else
+ return 0;
+}
+
+
+static int
+add_lte_to_array(struct wim_lookup_table_entry *lte,
+ void *_pp)
+{
+ struct wim_lookup_table_entry ***pp = _pp;
+ *(*pp)++ = lte;
+ return 0;
+}
+
+/* Iterate through the lookup table entries, but first sort them by stream
+ * offset in the WIM. Caution: this is intended to be used when the stream
+ * offset field has actually been set. */
+int
+for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table,
+ int (*visitor)(struct wim_lookup_table_entry *,
+ void *),
+ void *arg)
+{
+ struct wim_lookup_table_entry **lte_array, **p;
+ size_t num_streams = table->num_entries;
+ int ret;
+
+ lte_array = MALLOC(num_streams * sizeof(lte_array[0]));
+ if (!lte_array)
+ return WIMLIB_ERR_NOMEM;
+ p = lte_array;
+ for_lookup_table_entry(table, add_lte_to_array, &p);
+
+ wimlib_assert(p == lte_array + num_streams);
+
+ qsort(lte_array, num_streams, sizeof(lte_array[0]),
+ cmp_streams_by_wim_position);
+ ret = 0;
+ for (size_t i = 0; i < num_streams; i++) {
+ ret = visitor(lte_array[i], arg);
+ if (ret)
+ break;
+ }
+ FREE(lte_array);
+ return ret;
+}
/*
* Reads the lookup table from a WIM file.
+ *
+ * Saves lookup table entries for non-metadata streams in a hash table, and
+ * saves the metadata entry for each image in a special per-image location (the
+ * image_metadata array).
*/
int
read_lookup_table(WIMStruct *w)
u8 buf[WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE];
int ret;
struct wim_lookup_table *table;
- struct wim_lookup_table_entry *cur_entry = NULL, *duplicate_entry;
+ struct wim_lookup_table_entry *cur_entry, *duplicate_entry;
if (resource_is_compressed(&w->hdr.lookup_table_res_entry)) {
ERROR("Didn't expect a compressed lookup table!");
if (!table)
return WIMLIB_ERR_NOMEM;
+ w->current_image = 0;
while (num_entries--) {
const u8 *p;
"table");
}
ret = WIMLIB_ERR_READ;
- goto out;
+ goto out_free_lookup_table;
}
cur_entry = new_lookup_table_entry();
if (!cur_entry) {
ret = WIMLIB_ERR_NOMEM;
- goto out;
+ goto out_free_lookup_table;
}
+
cur_entry->wim = w;
cur_entry->resource_location = RESOURCE_IN_WIM;
-
p = get_resource_entry(buf, &cur_entry->resource_entry);
p = get_u16(p, &cur_entry->part_number);
p = get_u32(p, &cur_entry->refcnt);
goto out_free_cur_entry;
}
- /* Ordinarily, no two streams should share the same SHA1 message
- * digest. However, this constraint can be broken for metadata
- * resources--- two identical images will have the same metadata
- * resource, but their lookup table entries are not shared. */
- duplicate_entry = __lookup_resource(table, cur_entry->hash);
- if (duplicate_entry
- && !((duplicate_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)
- && cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA))
- {
- #ifdef ENABLE_ERROR_MESSAGES
- ERROR("The WIM lookup table contains two entries with the "
- "same SHA1 message digest!");
- ERROR("The first entry is:");
- print_lookup_table_entry(duplicate_entry, stderr);
- ERROR("The second entry is:");
- print_lookup_table_entry(cur_entry, stderr);
- #endif
- ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
- goto out_free_cur_entry;
- }
-
if (!(cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED)
&& (cur_entry->resource_entry.size !=
cur_entry->resource_entry.original_size))
ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
goto out_free_cur_entry;
}
- if ((cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)
- && cur_entry->refcnt != 1)
- {
- #ifdef ENABLE_ERROR_MESSAGES
- ERROR("Found metadata resource with refcnt != 1:");
- print_lookup_table_entry(cur_entry, stderr);
- #endif
- ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
- goto out_free_cur_entry;
+
+ if (cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) {
+ /* Lookup table entry for a metadata resource */
+ if (cur_entry->refcnt != 1) {
+ #ifdef ENABLE_ERROR_MESSAGES
+ ERROR("Found metadata resource with refcnt != 1:");
+ print_lookup_table_entry(cur_entry, stderr);
+ #endif
+ ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+ goto out_free_cur_entry;
+ }
+
+ if (w->hdr.part_number != 1) {
+ ERROR("Found a metadata resource in a "
+ "non-first part of the split WIM!");
+ ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+ goto out_free_cur_entry;
+ }
+ if (w->current_image == w->hdr.image_count) {
+ ERROR("The WIM header says there are %u images "
+ "in the WIM, but we found more metadata "
+ "resources than this", w->hdr.image_count);
+ ret = WIMLIB_ERR_IMAGE_COUNT;
+ goto out_free_cur_entry;
+ }
+
+ /* Notice very carefully: We are assigning the metadata
+ * resources in the exact order mirrored by their lookup
+ * table entries on disk, which is the behavior of
+ * Microsoft's software. In particular, this overrides
+ * the actual locations of the metadata resources
+ * themselves in the WIM file as well as any information
+ * written in the XML data. */
+ DEBUG("Found metadata resource for image %u at "
+ "offset %"PRIu64".",
+ w->current_image + 1,
+ cur_entry->resource_entry.offset);
+ w->image_metadata[
+ w->current_image++]->metadata_lte = cur_entry;
+ } else {
+ /* Lookup table entry for a stream that is not a
+ * metadata resource */
+ duplicate_entry = __lookup_resource(table, cur_entry->hash);
+ if (duplicate_entry) {
+ #ifdef ENABLE_ERROR_MESSAGES
+ ERROR("The WIM lookup table contains two entries with the "
+ "same SHA1 message digest!");
+ ERROR("The first entry is:");
+ print_lookup_table_entry(duplicate_entry, stderr);
+ ERROR("The second entry is:");
+ print_lookup_table_entry(cur_entry, stderr);
+ #endif
+ ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+ goto out_free_cur_entry;
+ }
+ lookup_table_insert(table, cur_entry);
}
- lookup_table_insert(table, cur_entry);
+ }
+ if (w->hdr.part_number == 1 &&
+ w->current_image != w->hdr.image_count)
+ {
+ ERROR("The WIM header says there are %u images "
+ "in the WIM, but we only found %d metadata "
+ "resources!", w->hdr.image_count, w->current_image);
+ ret = WIMLIB_ERR_IMAGE_COUNT;
+ goto out_free_lookup_table;
}
DEBUG("Done reading lookup table.");
w->lookup_table = table;
- return 0;
+ ret = 0;
+ goto out;
out_free_cur_entry:
FREE(cur_entry);
-out:
+out_free_lookup_table:
free_lookup_table(table);
+out:
+ w->current_image = 0;
return ret;
}
* Writes a lookup table entry to the output file.
*/
int
-write_lookup_table_entry(struct wim_lookup_table_entry *lte, void *__out)
+write_lookup_table_entry(struct wim_lookup_table_entry *lte, void *_out)
{
FILE *out;
u8 buf[WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE];
u8 *p;
- out = __out;
+ out = _out;
/* Don't write entries that have not had file resources or metadata
* resources written for them. */
return 0;
}
-/* Writes the lookup table to the output file. */
+/* Writes the WIM lookup table to the output file. */
int
-write_lookup_table(struct wim_lookup_table *table, FILE *out,
- struct resource_entry *out_res_entry)
+write_lookup_table(WIMStruct *w, int image, struct resource_entry *out_res_entry)
{
+ FILE *out = w->out_fp;
off_t start_offset, end_offset;
int ret;
+ int start_image, end_image;
start_offset = ftello(out);
if (start_offset == -1)
return WIMLIB_ERR_WRITE;
- ret = for_lookup_table_entry(table, write_lookup_table_entry, out);
- if (ret != 0)
+ /* Write lookup table entries for metadata resources */
+ if (image == WIMLIB_ALL_IMAGES) {
+ start_image = 1;
+ end_image = w->hdr.image_count;
+ } else {
+ start_image = image;
+ end_image = image;
+ }
+ for (int i = start_image; i <= end_image; i++) {
+ struct wim_lookup_table_entry *metadata_lte;
+
+ metadata_lte = w->image_metadata[i - 1]->metadata_lte;
+ metadata_lte->out_refcnt = 1;
+ metadata_lte->output_resource_entry.flags |= WIM_RESHDR_FLAG_METADATA;
+ ret = write_lookup_table_entry(metadata_lte, out);
+ if (ret)
+ return ret;
+ }
+
+ /* Write lookup table entries for other resources */
+ ret = for_lookup_table_entry(w->lookup_table, write_lookup_table_entry, out);
+ if (ret)
return ret;
+ /* Fill in the resource entry for the lookup table itself */
end_offset = ftello(out);
if (end_offset == -1)
return WIMLIB_ERR_WRITE;
out_res_entry->size = end_offset - start_offset;
out_res_entry->original_size = end_offset - start_offset;
out_res_entry->flags = WIM_RESHDR_FLAG_METADATA;
-
return 0;
}
-
int
-lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *ignore)
+lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore)
{
lte->real_refcnt = 0;
return 0;
}
int
-lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *ignore)
+lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore)
{
lte->out_refcnt = 0;
return 0;
}
int
-lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *ignore)
+lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore)
{
if (lte->extracted_file != NULL) {
FREE(lte->extracted_file);
tfprintf(out, T("Part Number = %hu\n"), lte->part_number);
tfprintf(out, T("Reference Count = %u\n"), lte->refcnt);
- tfprintf(out, T("Hash = 0x"));
- print_hash(lte->hash);
- tputc(T('\n'), out);
+ if (lte->unhashed) {
+ tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"),
+ lte->back_inode, lte->back_stream_id);
+ } else {
+ tfprintf(out, T("Hash = 0x"));
+ print_hash(lte->hash, out);
+ tputc(T('\n'), out);
+ }
tfprintf(out, T("Flags = "));
u8 flags = lte->resource_entry.flags;
break;
#ifdef __WIN32__
case RESOURCE_WIN32:
-#endif
+ case RESOURCE_WIN32_ENCRYPTED:
+#else
case RESOURCE_IN_FILE_ON_DISK:
+#endif
tfprintf(out, T("File on Disk = `%"TS"'\n"),
lte->file_on_disk);
break;
+#ifdef WITH_FUSE
case RESOURCE_IN_STAGING_FILE:
tfprintf(out, T("Staging File = `%"TS"'\n"),
lte->staging_file_name);
break;
+#endif
default:
break;
}
return inode_stream_lte_unresolved(inode, stream_idx, table);
}
+struct wim_lookup_table_entry *
+inode_unnamed_lte_resolved(const struct wim_inode *inode)
+{
+ wimlib_assert(inode->i_resolved);
+ for (unsigned i = 0; i <= inode->i_num_ads; i++) {
+ if (inode_stream_name_nbytes(inode, i) == 0 &&
+ !is_zero_hash(inode_stream_hash_resolved(inode, i)))
+ {
+ return inode_stream_lte_resolved(inode, i);
+ }
+ }
+ return NULL;
+}
+
+struct wim_lookup_table_entry *
+inode_unnamed_lte_unresolved(const struct wim_inode *inode,
+ const struct wim_lookup_table *table)
+{
+ wimlib_assert(!inode->i_resolved);
+ for (unsigned i = 0; i <= inode->i_num_ads; i++) {
+ if (inode_stream_name_nbytes(inode, i) == 0 &&
+ !is_zero_hash(inode_stream_hash_unresolved(inode, i)))
+ {
+ return inode_stream_lte_unresolved(inode, i, table);
+ }
+ }
+ return NULL;
+}
/* Return the lookup table entry for the unnamed data stream of an inode, or
* NULL if there is none.
for_lookup_table_entry(table, lte_add_stream_size, &total_size);
return total_size;
}
+
+struct wim_lookup_table_entry **
+retrieve_lte_pointer(struct wim_lookup_table_entry *lte)
+{
+ wimlib_assert(lte->unhashed);
+ struct wim_inode *inode = lte->back_inode;
+ u32 stream_id = lte->back_stream_id;
+ if (stream_id == 0)
+ return &inode->i_lte;
+ else
+ for (u16 i = 0; i < inode->i_num_ads; i++)
+ if (inode->i_ads_entries[i].stream_id == stream_id)
+ return &inode->i_ads_entries[i].lte;
+ wimlib_assert(0);
+ return NULL;
+}
+
+/* Calculate the SHA1 message digest of a stream and move it from the list of
+ * unhashed streams to the stream lookup table, possibly joining it with an
+ * existing lookup table entry for an identical stream.
+ *
+ * @lte: An unhashed lookup table entry.
+ * @lookup_table: Lookup table for the WIM.
+ * @lte_ret: On success, write a pointer to the resulting lookup table
+ * entry to this location. This will be the same as @lte
+ * if it was inserted into the lookup table, or different if
+ * a duplicate stream was found.
+ *
+ * Returns 0 on success; nonzero if there is an error reading the stream.
+ */
+int
+hash_unhashed_stream(struct wim_lookup_table_entry *lte,
+ struct wim_lookup_table *lookup_table,
+ struct wim_lookup_table_entry **lte_ret)
+{
+ int ret;
+ struct wim_lookup_table_entry *duplicate_lte;
+ struct wim_lookup_table_entry **back_ptr;
+
+ wimlib_assert(lte->unhashed);
+
+ /* back_ptr must be saved because @back_inode and @back_stream_id are in
+ * union with the SHA1 message digest and will no longer be valid once
+ * the SHA1 has been calculated. */
+ back_ptr = retrieve_lte_pointer(lte);
+
+ ret = sha1_resource(lte);
+ if (ret)
+ return ret;
+
+ /* Look for a duplicate stream */
+ duplicate_lte = __lookup_resource(lookup_table, lte->hash);
+ list_del(<e->unhashed_list);
+ if (duplicate_lte) {
+ /* We have a duplicate stream. Transfer the reference counts
+ * from this stream to the duplicate, update the reference to
+ * this stream (in an inode or ads_entry) to point to the
+ * duplicate, then free this stream. */
+ wimlib_assert(!(duplicate_lte->unhashed));
+ duplicate_lte->refcnt += lte->refcnt;
+ duplicate_lte->out_refcnt += lte->refcnt;
+ *back_ptr = duplicate_lte;
+ free_lookup_table_entry(lte);
+ lte = duplicate_lte;
+ } else {
+ /* No duplicate stream, so we need to insert
+ * this stream into the lookup table and treat
+ * it as a hashed stream. */
+ lookup_table_insert(lookup_table, lte);
+ lte->unhashed = 0;
+ }
+ if (lte_ret)
+ *lte_ret = lte;
+ return 0;
+}
+