X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flookup_table.c;h=f7fb7c3a5dff9ee263e28b5c748798adc437d1f6;hp=e2ca5920745f84b733bdfb794928c4f5a516e821;hb=df1050ef86e54f027912a15fe60bf67c8ab43d2c;hpb=4f8059f2d0a74a9922128b162d9c9343b305999c diff --git a/src/lookup_table.c b/src/lookup_table.c index e2ca5920..f7fb7c3a 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -2,7 +2,7 @@ * lookup_table.c * * Lookup table, implemented as a hash table, that maps SHA1 message digests to - * data streams. + * data streams; plus code to read and write the corresponding on-disk data. */ /* @@ -24,14 +24,25 @@ * along with wimlib; if not, see http://www.gnu.org/licenses/. */ -#include "wimlib_internal.h" -#include "lookup_table.h" -#include "buffer_io.h" +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib/endianness.h" +#include "wimlib/error.h" +#include "wimlib/file_io.h" +#include "wimlib/glob.h" +#include "wimlib/lookup_table.h" +#include "wimlib/metadata.h" +#include "wimlib/paths.h" +#include "wimlib/resource.h" +#include "wimlib/util.h" +#include "wimlib/write.h" + #include #include - #ifdef WITH_FUSE -#include +# include /* for unlink() */ #endif struct wim_lookup_table * @@ -58,7 +69,7 @@ new_lookup_table(size_t capacity) } struct wim_lookup_table_entry * -new_lookup_table_entry() +new_lookup_table_entry(void) { struct wim_lookup_table_entry *lte; @@ -79,18 +90,15 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) { struct wim_lookup_table_entry *new; - new = MALLOC(sizeof(*new)); + new = memdup(old, sizeof(struct wim_lookup_table_entry)); if (!new) return NULL; - memcpy(new, old, sizeof(*old)); new->extracted_file = NULL; switch (new->resource_location) { + case RESOURCE_IN_FILE_ON_DISK: #ifdef __WIN32__ - case RESOURCE_WIN32: case RESOURCE_WIN32_ENCRYPTED: -#else - case RESOURCE_IN_FILE_ON_DISK: #endif #ifdef WITH_FUSE case RESOURCE_IN_STAGING_FILE: @@ -102,32 +110,30 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) goto out_free; break; case RESOURCE_IN_ATTACHED_BUFFER: - new->attached_buffer = MALLOC(wim_resource_size(old)); + new->attached_buffer = memdup(old->attached_buffer, + wim_resource_size(old)); if (!new->attached_buffer) goto out_free; - memcpy(new->attached_buffer, old->attached_buffer, - wim_resource_size(old)); break; #ifdef WITH_NTFS_3G case RESOURCE_IN_NTFS_VOLUME: if (old->ntfs_loc) { struct ntfs_location *loc; - loc = MALLOC(sizeof(*loc)); + loc = memdup(old->ntfs_loc, sizeof(struct ntfs_location)); if (!loc) goto out_free; - memcpy(loc, old->ntfs_loc, sizeof(*loc)); loc->path = NULL; loc->stream_name = NULL; new->ntfs_loc = loc; loc->path = STRDUP(old->ntfs_loc->path); if (!loc->path) goto out_free; - loc->stream_name = MALLOC((loc->stream_name_nchars + 1) * 2); - if (!loc->stream_name) - goto out_free; - memcpy(loc->stream_name, - old->ntfs_loc->stream_name, - (loc->stream_name_nchars + 1) * 2); + if (loc->stream_name_nchars) { + loc->stream_name = memdup(old->ntfs_loc->stream_name, + loc->stream_name_nchars * 2); + if (!loc->stream_name) + goto out_free; + } } break; #endif @@ -145,11 +151,9 @@ free_lookup_table_entry(struct wim_lookup_table_entry *lte) { if (lte) { switch (lte->resource_location) { + case RESOURCE_IN_FILE_ON_DISK: #ifdef __WIN32__ - case RESOURCE_WIN32: case RESOURCE_WIN32_ENCRYPTED: - #else - case RESOURCE_IN_FILE_ON_DISK: #endif #ifdef WITH_FUSE case RESOURCE_IN_STAGING_FILE: @@ -240,7 +244,9 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, wimlib_assert(lte != NULL); wimlib_assert(lte->refcnt != 0); if (--lte->refcnt == 0) { - if (!lte->unhashed) + if (lte->unhashed) + list_del(<e->unhashed_list); + else lookup_table_unlink(table, lte); #ifdef WITH_FUSE if (lte->num_opened_fds == 0) @@ -283,18 +289,104 @@ for_lookup_table_entry(struct wim_lookup_table *table, return 0; } -int -cmp_streams_by_wim_position(const void *p1, const void *p2) +/* qsort() callback that sorts streams (represented by `struct + * wim_lookup_table_entry's) into an order optimized for reading and writing. + * + * Sorting is done primarily by resource location, then secondarily by a + * per-resource location order. For example, resources in WIM files are sorted + * primarily by part number, then secondarily by offset, as to implement optimal + * reading of either a standalone or split WIM. */ +static int +cmp_streams_by_sequential_order(const void *p1, const void *p2) { const struct wim_lookup_table_entry *lte1, *lte2; + int v; + lte1 = *(const struct wim_lookup_table_entry**)p1; lte2 = *(const struct wim_lookup_table_entry**)p2; - if (lte1->resource_entry.offset < lte2->resource_entry.offset) - return -1; - else if (lte1->resource_entry.offset > lte2->resource_entry.offset) - return 1; - else + + v = (int)lte1->resource_location - (int)lte2->resource_location; + + /* Different resource locations? */ + if (v) + return v; + + switch (lte1->resource_location) { + case RESOURCE_IN_WIM: + + /* Different (possibly split) WIMs? */ + if (lte1->wim != lte2->wim) { + v = memcmp(lte1->wim->hdr.guid, lte2->wim->hdr.guid, + WIM_GID_LEN); + if (v) + return v; + } + + /* Different part numbers in the same WIM? */ + v = (int)lte1->wim->hdr.part_number - (int)lte2->wim->hdr.part_number; + if (v) + return v; + + /* Compare by offset. */ + if (lte1->resource_entry.offset < lte2->resource_entry.offset) + return -1; + else if (lte1->resource_entry.offset > lte2->resource_entry.offset) + return 1; + return 0; + case RESOURCE_IN_FILE_ON_DISK: +#ifdef __WIN32__ + case RESOURCE_WIN32_ENCRYPTED: +#endif + /* Compare files by path: just a heuristic that will place files + * in the same directory next to each other. */ + return tstrcmp(lte1->file_on_disk, lte2->file_on_disk); +#ifdef WITH_NTFS_3G + case RESOURCE_IN_NTFS_VOLUME: + return tstrcmp(lte1->ntfs_loc->path, lte2->ntfs_loc->path); +#endif + default: + /* No additional sorting order defined for this resource + * location (e.g. RESOURCE_IN_ATTACHED_BUFFER); simply compare + * everything equal to each other. */ return 0; + } +} + +int +sort_stream_list_by_sequential_order(struct list_head *stream_list, + size_t list_head_offset) +{ + struct list_head *cur; + struct wim_lookup_table_entry **array; + size_t i; + size_t array_size; + size_t num_streams = 0; + + list_for_each(cur, stream_list) + num_streams++; + + array_size = num_streams * sizeof(array[0]); + array = MALLOC(array_size); + if (!array) + return WIMLIB_ERR_NOMEM; + cur = stream_list->next; + for (i = 0; i < num_streams; i++) { + array[i] = (struct wim_lookup_table_entry*)((u8*)cur - + list_head_offset); + cur = cur->next; + } + + qsort(array, num_streams, sizeof(array[0]), + cmp_streams_by_sequential_order); + + INIT_LIST_HEAD(stream_list); + for (i = 0; i < num_streams; i++) { + list_add_tail((struct list_head*) + ((u8*)array[i] + list_head_offset), + stream_list); + } + FREE(array); + return 0; } @@ -329,7 +421,7 @@ for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, wimlib_assert(p == lte_array + num_streams); qsort(lte_array, num_streams, sizeof(lte_array[0]), - cmp_streams_by_wim_position); + cmp_streams_by_sequential_order); ret = 0; for (size_t i = 0; i < num_streams; i++) { ret = visitor(lte_array[i], arg); @@ -340,131 +432,169 @@ for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, return ret; } +/* On-disk format of a WIM lookup table entry (stream entry). */ +struct wim_lookup_table_entry_disk { + /* Location, offset, compression status, and metadata status of the + * stream. */ + struct resource_entry_disk resource_entry; + + /* Which part of the split WIM this stream is in; indexed from 1. */ + le16 part_number; + + /* Reference count of this stream over all WIM images. */ + le32 refcnt; + + /* SHA1 message digest of the uncompressed data of this stream, or + * optionally all zeroes if this stream is of zero length. */ + u8 hash[SHA1_HASH_SIZE]; +} _packed_attribute; + +#define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50 + +void +lte_init_wim(struct wim_lookup_table_entry *lte, WIMStruct *wim) +{ + lte->resource_location = RESOURCE_IN_WIM; + lte->wim = wim; + if (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED) + lte->compression_type = wim->compression_type; + else + lte->compression_type = WIMLIB_COMPRESSION_TYPE_NONE; + + if (wim_is_pipable(wim)) + lte->is_pipable = 1; +} + /* * Reads the lookup table from a WIM file. * * Saves lookup table entries for non-metadata streams in a hash table, and * saves the metadata entry for each image in a special per-image location (the * image_metadata array). + * + * Return values: + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY + * WIMLIB_ERR_RESOURCE_NOT_FOUND */ int -read_lookup_table(WIMStruct *w) +read_wim_lookup_table(WIMStruct *wim) { int ret; + size_t i; size_t num_entries; struct wim_lookup_table *table; struct wim_lookup_table_entry *cur_entry, *duplicate_entry; - u8 table_buf[(BUFFER_SIZE / WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE) * - WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE]; - const u8 *p; - off_t offset; - size_t buf_entries_remaining; + void *buf; + + BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) != + WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE); DEBUG("Reading lookup table: offset %"PRIu64", size %"PRIu64"", - w->hdr.lookup_table_res_entry.offset, - w->hdr.lookup_table_res_entry.original_size); + wim->hdr.lookup_table_res_entry.offset, + wim->hdr.lookup_table_res_entry.size); - if (resource_is_compressed(&w->hdr.lookup_table_res_entry)) { - ERROR("Didn't expect a compressed lookup table!"); - ERROR("Ask the author to implement support for this."); - return WIMLIB_ERR_COMPRESSED_LOOKUP_TABLE; - } + /* Calculate number of entries in the lookup table. */ + num_entries = wim->hdr.lookup_table_res_entry.size / + sizeof(struct wim_lookup_table_entry_disk); + + + /* Read the lookup table into a buffer. */ + ret = res_entry_to_data(&wim->hdr.lookup_table_res_entry, wim, &buf); + if (ret) + goto out; - num_entries = w->hdr.lookup_table_res_entry.size / - WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE; + /* Allocate hash table. */ table = new_lookup_table(num_entries * 2 + 1); - if (!table) - return WIMLIB_ERR_NOMEM; + if (!table) { + ERROR("Not enough memory to read lookup table."); + ret = WIMLIB_ERR_NOMEM; + goto out_free_buf; + } + + /* Allocate and initalize `struct wim_lookup_table_entry's from the + * on-disk lookup table. */ + wim->current_image = 0; + for (i = 0; i < num_entries; i++) { + const struct wim_lookup_table_entry_disk *disk_entry = + &((const struct wim_lookup_table_entry_disk*)buf)[i]; - w->current_image = 0; - offset = w->hdr.lookup_table_res_entry.offset; - buf_entries_remaining = 0; - for (; num_entries != 0; num_entries--, buf_entries_remaining--) { - if (buf_entries_remaining == 0) { - size_t entries_to_read, bytes_to_read; - - entries_to_read = min(sizeof(table_buf) / - WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE, - num_entries); - bytes_to_read = entries_to_read * - WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE; - if (full_pread(w->in_fd, table_buf, - bytes_to_read, offset) != bytes_to_read) - { - ERROR_WITH_ERRNO("Error reading lookup table " - "(offset=%"PRIu64")", offset); - ret = WIMLIB_ERR_READ; - goto out_free_lookup_table; - } - offset += bytes_to_read; - p = table_buf; - buf_entries_remaining = entries_to_read; - } cur_entry = new_lookup_table_entry(); if (!cur_entry) { + ERROR("Not enough memory to read lookup table."); ret = WIMLIB_ERR_NOMEM; goto out_free_lookup_table; } - cur_entry->wim = w; + cur_entry->wim = wim; cur_entry->resource_location = RESOURCE_IN_WIM; - p = get_resource_entry(p, &cur_entry->resource_entry); - p = get_u16(p, &cur_entry->part_number); - p = get_u32(p, &cur_entry->refcnt); - p = get_bytes(p, SHA1_HASH_SIZE, cur_entry->hash); - - if (cur_entry->part_number != w->hdr.part_number) { - ERROR("A lookup table entry in part %hu of the WIM " - "points to part %hu", - w->hdr.part_number, cur_entry->part_number); - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out_free_cur_entry; + get_resource_entry(&disk_entry->resource_entry, &cur_entry->resource_entry); + cur_entry->part_number = le16_to_cpu(disk_entry->part_number); + cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt); + copy_hash(cur_entry->hash, disk_entry->hash); + lte_init_wim(cur_entry, wim); + + if (cur_entry->part_number != wim->hdr.part_number) { + WARNING("A lookup table entry in part %hu of the WIM " + "points to part %hu (ignoring it)", + wim->hdr.part_number, cur_entry->part_number); + free_lookup_table_entry(cur_entry); + continue; } if (is_zero_hash(cur_entry->hash)) { - ERROR("The WIM lookup table contains an entry with a " - "SHA1 message digest of all 0's"); - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out_free_cur_entry; + WARNING("The WIM lookup table contains an entry with a " + "SHA1 message digest of all 0's (ignoring it)"); + free_lookup_table_entry(cur_entry); + continue; } if (!(cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED) && (cur_entry->resource_entry.size != cur_entry->resource_entry.original_size)) { - #ifdef ENABLE_ERROR_MESSAGES - ERROR("Found uncompressed resource with original size " - "not the same as compressed size"); - ERROR("The lookup table entry for the resource is as follows:"); - print_lookup_table_entry(cur_entry, stderr); - #endif - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out_free_cur_entry; + if (wimlib_print_errors) { + WARNING("Found uncompressed resource with " + "original size (%"PRIu64") not the same " + "as compressed size (%"PRIu64")", + cur_entry->resource_entry.original_size, + cur_entry->resource_entry.size); + if (cur_entry->resource_entry.original_size) { + WARNING("Overriding compressed size with original size."); + cur_entry->resource_entry.size = + cur_entry->resource_entry.original_size; + } else { + WARNING("Overriding original size with compressed size"); + cur_entry->resource_entry.original_size = + cur_entry->resource_entry.size; + } + } } if (cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) { /* Lookup table entry for a metadata resource */ if (cur_entry->refcnt != 1) { - #ifdef ENABLE_ERROR_MESSAGES - ERROR("Found metadata resource with refcnt != 1:"); - print_lookup_table_entry(cur_entry, stderr); - #endif + if (wimlib_print_errors) { + ERROR("Found metadata resource with refcnt != 1:"); + print_lookup_table_entry(cur_entry, stderr); + } ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; goto out_free_cur_entry; } - if (w->hdr.part_number != 1) { - ERROR("Found a metadata resource in a " - "non-first part of the split WIM!"); - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out_free_cur_entry; + if (wim->hdr.part_number != 1) { + WARNING("Ignoring metadata resource found in a " + "non-first part of the split WIM"); + free_lookup_table_entry(cur_entry); + continue; } - if (w->current_image == w->hdr.image_count) { - ERROR("The WIM header says there are %u images " - "in the WIM, but we found more metadata " - "resources than this", w->hdr.image_count); - ret = WIMLIB_ERR_IMAGE_COUNT; - goto out_free_cur_entry; + if (wim->current_image == wim->hdr.image_count) { + WARNING("The WIM header says there are %u images " + "in the WIM, but we found more metadata " + "resources than this (ignoring the extra)", + wim->hdr.image_count); + free_lookup_table_entry(cur_entry); + continue; } /* Notice very carefully: We are assigning the metadata @@ -476,153 +606,200 @@ read_lookup_table(WIMStruct *w) * written in the XML data. */ DEBUG("Found metadata resource for image %u at " "offset %"PRIu64".", - w->current_image + 1, + wim->current_image + 1, cur_entry->resource_entry.offset); - w->image_metadata[ - w->current_image++]->metadata_lte = cur_entry; + wim->image_metadata[ + wim->current_image++]->metadata_lte = cur_entry; } else { /* Lookup table entry for a stream that is not a * metadata resource */ - duplicate_entry = __lookup_resource(table, cur_entry->hash); + duplicate_entry = lookup_resource(table, cur_entry->hash); if (duplicate_entry) { - #ifdef ENABLE_ERROR_MESSAGES - ERROR("The WIM lookup table contains two entries with the " - "same SHA1 message digest!"); - ERROR("The first entry is:"); - print_lookup_table_entry(duplicate_entry, stderr); - ERROR("The second entry is:"); - print_lookup_table_entry(cur_entry, stderr); - #endif - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out_free_cur_entry; + if (wimlib_print_errors) { + WARNING("The WIM lookup table contains two entries with the " + "same SHA1 message digest!"); + WARNING("The first entry is:"); + print_lookup_table_entry(duplicate_entry, stderr); + WARNING("The second entry is:"); + print_lookup_table_entry(cur_entry, stderr); + } + free_lookup_table_entry(cur_entry); + continue; + } else { + lookup_table_insert(table, cur_entry); } - lookup_table_insert(table, cur_entry); } } - if (w->hdr.part_number == 1 && w->current_image != w->hdr.image_count) - { - ERROR("The WIM header says there are %u images " - "in the WIM, but we only found %d metadata " - "resources!", w->hdr.image_count, w->current_image); - ret = WIMLIB_ERR_IMAGE_COUNT; - goto out_free_lookup_table; + if (wim->hdr.part_number == 1 && wim->current_image != wim->hdr.image_count) { + WARNING("The header of \"%"TS"\" says there are %u images in\n" + " the WIM, but we only found %d metadata resources! Acting as if\n" + " the header specified only %d images instead.", + wim->filename, wim->hdr.image_count, + wim->current_image, wim->current_image); + for (int i = wim->current_image; i < wim->hdr.image_count; i++) + put_image_metadata(wim->image_metadata[i], NULL); + wim->hdr.image_count = wim->current_image; } DEBUG("Done reading lookup table."); - w->lookup_table = table; + wim->lookup_table = table; ret = 0; - goto out; + goto out_free_buf; out_free_cur_entry: FREE(cur_entry); out_free_lookup_table: free_lookup_table(table); +out_free_buf: + FREE(buf); out: - w->current_image = 0; + wim->current_image = 0; return ret; } -static u8 * -write_lookup_table_entry(struct wim_lookup_table_entry *lte, u8 *buf_p) +static void +write_wim_lookup_table_entry(const struct wim_lookup_table_entry *lte, + struct wim_lookup_table_entry_disk *disk_entry) { - buf_p = put_resource_entry(buf_p, <e->output_resource_entry); - buf_p = put_u16(buf_p, lte->part_number); - buf_p = put_u32(buf_p, lte->out_refcnt); - buf_p = put_bytes(buf_p, SHA1_HASH_SIZE, lte->hash); - return buf_p; + put_resource_entry(<e->output_resource_entry, &disk_entry->resource_entry); + disk_entry->part_number = cpu_to_le16(lte->part_number); + disk_entry->refcnt = cpu_to_le32(lte->out_refcnt); + copy_hash(disk_entry->hash, lte->hash); } -int -write_lookup_table_from_stream_list(struct list_head *stream_list, - int out_fd, - struct resource_entry *out_res_entry) +static int +write_wim_lookup_table_from_stream_list(struct list_head *stream_list, + struct filedes *out_fd, + struct resource_entry *out_res_entry, + int write_resource_flags) { - int ret; - off_t start_offset; - u8 table_buf[(BUFFER_SIZE / WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE) * - WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE]; - u8 *buf_p; size_t table_size; - size_t bytes_to_write; struct wim_lookup_table_entry *lte; + struct wim_lookup_table_entry_disk *table_buf; + struct wim_lookup_table_entry_disk *table_buf_ptr; + int ret; - start_offset = filedes_offset(out_fd); - if (start_offset == -1) - goto write_error; - - buf_p = table_buf; table_size = 0; - list_for_each_entry(lte, stream_list, lookup_table_list) { - if (buf_p == table_buf + sizeof(table_buf)) { - bytes_to_write = sizeof(table_buf); - if (full_write(out_fd, table_buf, - bytes_to_write) != bytes_to_write) - goto write_error; - table_size += bytes_to_write; - buf_p = table_buf; - } - buf_p = write_lookup_table_entry(lte, buf_p); - } - bytes_to_write = buf_p - table_buf; - if (bytes_to_write != 0) { - if (full_write(out_fd, table_buf, - bytes_to_write) != bytes_to_write) - goto write_error; - table_size += bytes_to_write; + list_for_each_entry(lte, stream_list, lookup_table_list) + table_size += sizeof(struct wim_lookup_table_entry_disk); + + DEBUG("Writing WIM lookup table (size=%zu, offset=%"PRIu64")", + table_size, out_fd->offset); + + table_buf = MALLOC(table_size); + if (!table_buf) { + ERROR("Failed to allocate %zu bytes for temporary lookup table", + table_size); + return WIMLIB_ERR_NOMEM; } - out_res_entry->offset = start_offset; - out_res_entry->size = table_size; - out_res_entry->original_size = table_size; - out_res_entry->flags = WIM_RESHDR_FLAG_METADATA; - ret = 0; -out: + table_buf_ptr = table_buf; + list_for_each_entry(lte, stream_list, lookup_table_list) + write_wim_lookup_table_entry(lte, table_buf_ptr++); + + /* Write the lookup table uncompressed. Although wimlib can handle a + * compressed lookup table, MS software cannot. */ + ret = write_wim_resource_from_buffer(table_buf, + table_size, + WIM_RESHDR_FLAG_METADATA, + out_fd, + WIMLIB_COMPRESSION_TYPE_NONE, + out_res_entry, + NULL, + write_resource_flags); + FREE(table_buf); return ret; -write_error: - ERROR_WITH_ERRNO("Failed to write lookup table"); - ret = WIMLIB_ERR_WRITE; - goto out; } static int append_lookup_table_entry(struct wim_lookup_table_entry *lte, void *_list) { - if (lte->out_refcnt != 0) + /* Lookup table entries with 'out_refcnt' == 0 correspond to streams not + * written and not present in the resulting WIM file, and should not be + * included in the lookup table. + * + * Lookup table entries marked as filtered (EXTERNAL_WIM) with + * 'out_refcnt != 0' were referenced as part of the logical write but + * correspond to streams that were not in fact written, and should not + * be included in the lookup table. + * + * Lookup table entries marked as filtered (SAME_WIM) with 'out_refcnt + * != 0' were referenced as part of the logical write but correspond to + * streams that were not in fact written, but nevertheless were already + * present in the WIM being overwritten in-place. These entries must be + * included in the lookup table, and the resource information to write + * needs to be copied from the resource information read originally. + */ + if (lte->out_refcnt != 0 && !(lte->filtered & FILTERED_EXTERNAL_WIM)) { + if (lte->filtered & FILTERED_SAME_WIM) { + copy_resource_entry(<e->output_resource_entry, + <e->resource_entry); + } list_add_tail(<e->lookup_table_list, (struct list_head*)_list); + } return 0; } -/* Writes the WIM lookup table to the output file. */ int -write_lookup_table(WIMStruct *w, int image, struct resource_entry *out_res_entry) +write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, + struct resource_entry *out_res_entry, + struct list_head *stream_list_override) { - LIST_HEAD(stream_list); - int start_image; - int end_image; + int write_resource_flags; + struct list_head _stream_list; + struct list_head *stream_list; - if (image == WIMLIB_ALL_IMAGES) { - start_image = 1; - end_image = w->hdr.image_count; + if (stream_list_override) { + stream_list = stream_list_override; } else { - start_image = image; - end_image = image; + stream_list = &_stream_list; + INIT_LIST_HEAD(stream_list); } - for (int i = start_image; i <= end_image; i++) { - struct wim_lookup_table_entry *metadata_lte; + if (!(write_flags & WIMLIB_WRITE_FLAG_NO_METADATA)) { + int start_image; + int end_image; - metadata_lte = w->image_metadata[i - 1]->metadata_lte; - metadata_lte->out_refcnt = 1; - metadata_lte->output_resource_entry.flags |= WIM_RESHDR_FLAG_METADATA; - append_lookup_table_entry(metadata_lte, &stream_list); + if (image == WIMLIB_ALL_IMAGES) { + start_image = 1; + end_image = wim->hdr.image_count; + } else { + start_image = image; + end_image = image; + } + + /* Push metadata resource lookup table entries onto the front of + * the list in reverse order, so that they're written in order. + */ + for (int i = end_image; i >= start_image; i--) { + struct wim_lookup_table_entry *metadata_lte; + + metadata_lte = wim->image_metadata[i - 1]->metadata_lte; + metadata_lte->out_refcnt = 1; + metadata_lte->part_number = wim->hdr.part_number; + metadata_lte->output_resource_entry.flags |= WIM_RESHDR_FLAG_METADATA; + + list_add(&metadata_lte->lookup_table_list, stream_list); + } } - for_lookup_table_entry(w->lookup_table, - append_lookup_table_entry, - &stream_list); - return write_lookup_table_from_stream_list(&stream_list, - w->out_fd, - out_res_entry); + + /* Append additional lookup table entries that need to be written, with + * some special handling for streams that have been marked as filtered. + */ + if (!stream_list_override) { + for_lookup_table_entry(wim->lookup_table, + append_lookup_table_entry, stream_list); + } + + write_resource_flags = 0; + if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE) + write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE; + return write_wim_lookup_table_from_stream_list(stream_list, + &wim->out_fd, + out_res_entry, + write_resource_flags); } + int lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) { @@ -694,11 +871,9 @@ print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) } break; #ifdef __WIN32__ - case RESOURCE_WIN32: case RESOURCE_WIN32_ENCRYPTED: -#else - case RESOURCE_IN_FILE_ON_DISK: #endif + case RESOURCE_IN_FILE_ON_DISK: tfprintf(out, T("File on Disk = `%"TS"'\n"), lte->file_on_disk); break; @@ -714,28 +889,63 @@ print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) tputc(T('\n'), out); } +void +lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, + struct wimlib_resource_entry *wentry) +{ + wentry->uncompressed_size = lte->resource_entry.original_size; + wentry->compressed_size = lte->resource_entry.size; + wentry->offset = lte->resource_entry.offset; + copy_hash(wentry->sha1_hash, lte->hash); + wentry->part_number = lte->part_number; + wentry->reference_count = lte->refcnt; + wentry->is_compressed = (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED) != 0; + wentry->is_metadata = (lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) != 0; + wentry->is_free = (lte->resource_entry.flags & WIM_RESHDR_FLAG_FREE) != 0; + wentry->is_spanned = (lte->resource_entry.flags & WIM_RESHDR_FLAG_SPANNED) != 0; +} + +struct iterate_lte_context { + wimlib_iterate_lookup_table_callback_t cb; + void *user_ctx; +}; + static int -do_print_lookup_table_entry(struct wim_lookup_table_entry *lte, void *fp) +do_iterate_lte(struct wim_lookup_table_entry *lte, void *_ctx) { - print_lookup_table_entry(lte, (FILE*)fp); - return 0; + struct iterate_lte_context *ctx = _ctx; + struct wimlib_resource_entry entry; + + lte_to_wimlib_resource_entry(lte, &entry); + return (*ctx->cb)(&entry, ctx->user_ctx); } -/* - * Prints the lookup table of a WIM file. - */ -WIMLIBAPI void -wimlib_print_lookup_table(WIMStruct *w) +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_iterate_lookup_table(WIMStruct *wim, int flags, + wimlib_iterate_lookup_table_callback_t cb, + void *user_ctx) { - for_lookup_table_entry(w->lookup_table, - do_print_lookup_table_entry, - stdout); + struct iterate_lte_context ctx = { + .cb = cb, + .user_ctx = user_ctx, + }; + if (wim->hdr.part_number == 1) { + int ret; + for (int i = 0; i < wim->hdr.image_count; i++) { + ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte, + &ctx); + if (ret) + return ret; + } + } + return for_lookup_table_entry(wim->lookup_table, do_iterate_lte, &ctx); } /* Given a SHA1 message digest, return the corresponding entry in the WIM's * lookup table, or NULL if there is none. */ struct wim_lookup_table_entry * -__lookup_resource(const struct wim_lookup_table *table, const u8 hash[]) +lookup_resource(const struct wim_lookup_table *table, const u8 hash[]) { size_t i; struct wim_lookup_table_entry *lte; @@ -759,12 +969,12 @@ __lookup_resource(const struct wim_lookup_table *table, const u8 hash[]) * This is only for pre-resolved inodes. */ int -lookup_resource(WIMStruct *w, - const tchar *path, - int lookup_flags, - struct wim_dentry **dentry_ret, - struct wim_lookup_table_entry **lte_ret, - u16 *stream_idx_ret) +wim_pathname_to_stream(WIMStruct *wim, + const tchar *path, + int lookup_flags, + struct wim_dentry **dentry_ret, + struct wim_lookup_table_entry **lte_ret, + u16 *stream_idx_ret) { struct wim_dentry *dentry; struct wim_lookup_table_entry *lte; @@ -781,7 +991,7 @@ lookup_resource(WIMStruct *w, } } - dentry = get_dentry(w, path); + dentry = get_dentry(wim, path); if (p) *p = T(':'); if (!dentry) @@ -789,7 +999,9 @@ lookup_resource(WIMStruct *w, inode = dentry->d_inode; - wimlib_assert(inode->i_resolved); + if (!inode->i_resolved) + if (inode_resolve_ltes(inode, wim->lookup_table, false)) + return -EIO; if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK) && inode_is_directory(inode)) @@ -822,33 +1034,92 @@ out: } #endif -/* Resolve an inode's lookup table entries +int +resource_not_found_error(struct wim_inode *inode, const u8 *hash) +{ + if (wimlib_print_errors) { + ERROR("\"%"TS"\": resource not found", inode_first_full_path(inode)); + tfprintf(stderr, T(" SHA-1 message digest of missing resource:\n ")); + print_hash(hash, stderr); + tputc(T('\n'), stderr); + } + return WIMLIB_ERR_RESOURCE_NOT_FOUND; +} + +/* + * Resolve an inode's lookup table entries. * * This replaces the SHA1 hash fields (which are used to lookup an entry in the - * lookup table) with pointers directly to the lookup table entries. A circular - * linked list of streams sharing the same lookup table entry is created. + * lookup table) with pointers directly to the lookup table entries. * - * This function always succeeds; unresolved lookup table entries are given a - * NULL pointer. + * If @force is %false: + * If any needed SHA1 message digests are not found in the lookup table, + * WIMLIB_ERR_RESOURCE_NOT_FOUND is returned and the inode is left + * unmodified. + * If @force is %true: + * If any needed SHA1 message digests are not found in the lookup table, + * new entries are allocated and inserted into the lookup table. */ -void -inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table) +int +inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table, + bool force) { + const u8 *hash; if (!inode->i_resolved) { - struct wim_lookup_table_entry *lte; + struct wim_lookup_table_entry *lte, *ads_lte; + /* Resolve the default file stream */ - lte = __lookup_resource(table, inode->i_hash); - inode->i_lte = lte; - inode->i_resolved = 1; + lte = NULL; + hash = inode->i_hash; + if (!is_zero_hash(hash)) { + lte = lookup_resource(table, hash); + if (!lte) { + if (force) { + lte = new_lookup_table_entry(); + if (!lte) + return WIMLIB_ERR_NOMEM; + copy_hash(lte->hash, hash); + lookup_table_insert(table, lte); + } else { + goto resource_not_found; + } + } + } /* Resolve the alternate data streams */ + struct wim_lookup_table_entry *ads_ltes[inode->i_num_ads]; for (u16 i = 0; i < inode->i_num_ads; i++) { - struct wim_ads_entry *cur_entry = &inode->i_ads_entries[i]; - lte = __lookup_resource(table, cur_entry->hash); - cur_entry->lte = lte; + struct wim_ads_entry *cur_entry; + + ads_lte = NULL; + cur_entry = &inode->i_ads_entries[i]; + hash = cur_entry->hash; + if (!is_zero_hash(hash)) { + ads_lte = lookup_resource(table, hash); + if (!ads_lte) { + if (force) { + ads_lte = new_lookup_table_entry(); + if (!ads_lte) + return WIMLIB_ERR_NOMEM; + copy_hash(ads_lte->hash, hash); + lookup_table_insert(table, ads_lte); + } else { + goto resource_not_found; + } + } + } + ads_ltes[i] = ads_lte; } + inode->i_lte = lte; + for (u16 i = 0; i < inode->i_num_ads; i++) + inode->i_ads_entries[i].lte = ads_ltes[i]; + inode->i_resolved = 1; } + return 0; + +resource_not_found: + return resource_not_found_error(inode, hash); } void @@ -1008,7 +1279,7 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, return ret; /* Look for a duplicate stream */ - duplicate_lte = __lookup_resource(lookup_table, lte->hash); + duplicate_lte = lookup_resource(lookup_table, lte->hash); list_del(<e->unhashed_list); if (duplicate_lte) { /* We have a duplicate stream. Transfer the reference counts @@ -1017,7 +1288,7 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, * duplicate, then free this stream. */ wimlib_assert(!(duplicate_lte->unhashed)); duplicate_lte->refcnt += lte->refcnt; - duplicate_lte->out_refcnt += lte->refcnt; + duplicate_lte->out_refcnt += lte->out_refcnt; *back_ptr = duplicate_lte; free_lookup_table_entry(lte); lte = duplicate_lte; @@ -1033,3 +1304,182 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, return 0; } +static int +lte_clone_if_new(struct wim_lookup_table_entry *lte, void *_lookup_table) +{ + struct wim_lookup_table *lookup_table = _lookup_table; + + if (lookup_resource(lookup_table, lte->hash)) + return 0; /* Resource already present. */ + + lte = clone_lookup_table_entry(lte); + if (!lte) + return WIMLIB_ERR_NOMEM; + lte->out_refcnt = 1; + lookup_table_insert(lookup_table, lte); + return 0; +} + +static int +lte_delete_if_new(struct wim_lookup_table_entry *lte, void *_lookup_table) +{ + struct wim_lookup_table *lookup_table = _lookup_table; + + if (lte->out_refcnt) { + lookup_table_unlink(lookup_table, lte); + free_lookup_table_entry(lte); + } + return 0; +} + +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_reference_resources(WIMStruct *wim, + WIMStruct **resource_wims, unsigned num_resource_wims, + int ref_flags) +{ + int ret; + unsigned i; + + if (wim == NULL) + return WIMLIB_ERR_INVALID_PARAM; + + if (num_resource_wims != 0 && resource_wims == NULL) + return WIMLIB_ERR_INVALID_PARAM; + + for (i = 0; i < num_resource_wims; i++) + if (resource_wims[i] == NULL) + return WIMLIB_ERR_INVALID_PARAM; + + for_lookup_table_entry(wim->lookup_table, lte_zero_out_refcnt, NULL); + + for (i = 0; i < num_resource_wims; i++) { + ret = for_lookup_table_entry(resource_wims[i]->lookup_table, + lte_clone_if_new, + wim->lookup_table); + if (ret) + goto out_rollback; + } + return 0; + +out_rollback: + for_lookup_table_entry(wim->lookup_table, lte_delete_if_new, + wim->lookup_table); + return ret; +} + +static int +reference_resource_paths(WIMStruct *wim, + const tchar * const *resource_wimfiles, + unsigned num_resource_wimfiles, + int ref_flags, + int open_flags, + wimlib_progress_func_t progress_func) +{ + WIMStruct **resource_wims; + unsigned i; + int ret; + + resource_wims = CALLOC(num_resource_wimfiles, sizeof(resource_wims[0])); + if (!resource_wims) + return WIMLIB_ERR_NOMEM; + + for (i = 0; i < num_resource_wimfiles; i++) { + DEBUG("Referencing resources from path \"%"TS"\"", + resource_wimfiles[i]); + ret = wimlib_open_wim(resource_wimfiles[i], open_flags, + &resource_wims[i], progress_func); + if (ret) + goto out_free_resource_wims; + } + + ret = wimlib_reference_resources(wim, resource_wims, + num_resource_wimfiles, ref_flags); + if (ret) + goto out_free_resource_wims; + + for (i = 0; i < num_resource_wimfiles; i++) + list_add_tail(&resource_wims[i]->subwim_node, &wim->subwims); + + ret = 0; + goto out_free_array; + +out_free_resource_wims: + for (i = 0; i < num_resource_wimfiles; i++) + wimlib_free(resource_wims[i]); +out_free_array: + FREE(resource_wims); + return ret; +} + +static int +reference_resource_glob(WIMStruct *wim, const tchar *refglob, + int ref_flags, int open_flags, + wimlib_progress_func_t progress_func) +{ + glob_t globbuf; + int ret; + + /* Note: glob() is replaced in Windows native builds. */ + ret = tglob(refglob, GLOB_ERR | GLOB_NOSORT, NULL, &globbuf); + if (ret) { + if (ret == GLOB_NOMATCH) { + if (ref_flags & WIMLIB_REF_FLAG_GLOB_ERR_ON_NOMATCH) { + ERROR("Found no files for glob \"%"TS"\"", refglob); + return WIMLIB_ERR_GLOB_HAD_NO_MATCHES; + } else { + return reference_resource_paths(wim, + &refglob, + 1, + ref_flags, + open_flags, + progress_func); + } + } else { + ERROR_WITH_ERRNO("Failed to process glob \"%"TS"\"", refglob); + if (ret == GLOB_NOSPACE) + return WIMLIB_ERR_NOMEM; + else + return WIMLIB_ERR_READ; + } + } + + ret = reference_resource_paths(wim, + (const tchar * const *)globbuf.gl_pathv, + globbuf.gl_pathc, + ref_flags, + open_flags, + progress_func); + globfree(&globbuf); + return ret; +} + +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_reference_resource_files(WIMStruct *wim, + const tchar * const * resource_wimfiles_or_globs, + unsigned count, + int ref_flags, + int open_flags, + wimlib_progress_func_t progress_func) +{ + unsigned i; + int ret; + + if (ref_flags & WIMLIB_REF_FLAG_GLOB_ENABLE) { + for (i = 0; i < count; i++) { + ret = reference_resource_glob(wim, + resource_wimfiles_or_globs[i], + ref_flags, + open_flags, + progress_func); + if (ret) + return ret; + } + return 0; + } else { + return reference_resource_paths(wim, resource_wimfiles_or_globs, + count, ref_flags, + open_flags, progress_func); + } +}