X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Flookup_table.c;h=214e6dbf7a7dcdebaa2d59cb8c4eec72e7063296;hp=bfa0c858c4ddea8a84842f630377fb94a3c5ddc7;hb=3f9b53a4a214a254bb27ed30994faf2a0fd12375;hpb=650997e4865a090b6856c7ca34b02f42994e8e29 diff --git a/src/lookup_table.c b/src/lookup_table.c index bfa0c858..214e6dbf 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -28,6 +28,7 @@ #include "lookup_table.h" #include "buffer_io.h" #include +#include #ifdef WITH_FUSE #include @@ -39,7 +40,7 @@ new_lookup_table(size_t capacity) struct wim_lookup_table *table; struct hlist_head *array; - table = MALLOC(sizeof(struct wim_lookup_table)); + table = CALLOC(1, sizeof(struct wim_lookup_table)); if (table) { array = CALLOC(capacity, sizeof(array[0])); if (array) { @@ -87,8 +88,11 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *old) switch (new->resource_location) { #ifdef __WIN32__ case RESOURCE_WIN32: + case RESOURCE_WIN32_ENCRYPTED: #endif +#ifdef WITH_FUSE case RESOURCE_IN_STAGING_FILE: +#endif case RESOURCE_IN_FILE_ON_DISK: BUILD_BUG_ON((void*)&old->file_on_disk != (void*)&old->staging_file_name); @@ -214,7 +218,7 @@ finalize_lte(struct wim_lookup_table_entry *lte) #ifdef WITH_FUSE if (lte->resource_location == RESOURCE_IN_STAGING_FILE) { unlink(lte->staging_file_name); - list_del(<e->staging_list); + list_del(<e->unhashed_list); } #endif free_lookup_table_entry(lte); @@ -231,7 +235,8 @@ lte_decrement_refcnt(struct wim_lookup_table_entry *lte, wimlib_assert(lte != NULL); wimlib_assert(lte->refcnt != 0); if (--lte->refcnt == 0) { - lookup_table_unlink(table, lte); + if (!lte->unhashed) + lookup_table_unlink(table, lte); #ifdef WITH_FUSE if (lte->num_opened_fds == 0) #endif @@ -264,17 +269,78 @@ for_lookup_table_entry(struct wim_lookup_table *table, hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i], hash_list) { + wimlib_assert2(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)); ret = visitor(lte, arg); - if (ret != 0) + if (ret) return ret; } } return 0; } +int +cmp_streams_by_wim_position(const void *p1, const void *p2) +{ + const struct wim_lookup_table_entry *lte1, *lte2; + lte1 = *(const struct wim_lookup_table_entry**)p1; + lte2 = *(const struct wim_lookup_table_entry**)p2; + if (lte1->resource_entry.offset < lte2->resource_entry.offset) + return -1; + else if (lte1->resource_entry.offset > lte2->resource_entry.offset) + return 1; + else + return 0; +} + + +static int +add_lte_to_array(struct wim_lookup_table_entry *lte, + void *_pp) +{ + struct wim_lookup_table_entry ***pp = _pp; + *(*pp)++ = lte; + return 0; +} + +/* Iterate through the lookup table entries, but first sort them by stream + * offset in the WIM. Caution: this is intended to be used when the stream + * offset field has actually been set. */ +int +for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, + int (*visitor)(struct wim_lookup_table_entry *, + void *), + void *arg) +{ + struct wim_lookup_table_entry **lte_array, **p; + size_t num_streams = table->num_entries; + int ret; + + lte_array = MALLOC(num_streams * sizeof(lte_array[0])); + if (!lte_array) + return WIMLIB_ERR_NOMEM; + p = lte_array; + for_lookup_table_entry(table, add_lte_to_array, &p); + + wimlib_assert(p == lte_array + num_streams); + + qsort(lte_array, num_streams, sizeof(lte_array[0]), + cmp_streams_by_wim_position); + ret = 0; + for (size_t i = 0; i < num_streams; i++) { + ret = visitor(lte_array[i], arg); + if (ret) + break; + } + FREE(lte_array); + return ret; +} /* * Reads the lookup table from a WIM file. + * + * Saves lookup table entries for non-metadata streams in a hash table, and + * saves the metadata entry for each image in a special per-image location (the + * image_metadata array). */ int read_lookup_table(WIMStruct *w) @@ -283,7 +349,7 @@ read_lookup_table(WIMStruct *w) u8 buf[WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE]; int ret; struct wim_lookup_table *table; - struct wim_lookup_table_entry *cur_entry = NULL, *duplicate_entry; + struct wim_lookup_table_entry *cur_entry, *duplicate_entry; if (resource_is_compressed(&w->hdr.lookup_table_res_entry)) { ERROR("Didn't expect a compressed lookup table!"); @@ -309,6 +375,7 @@ read_lookup_table(WIMStruct *w) if (!table) return WIMLIB_ERR_NOMEM; + w->current_image = 0; while (num_entries--) { const u8 *p; @@ -320,16 +387,16 @@ read_lookup_table(WIMStruct *w) "table"); } ret = WIMLIB_ERR_READ; - goto out; + goto out_free_lookup_table; } cur_entry = new_lookup_table_entry(); if (!cur_entry) { ret = WIMLIB_ERR_NOMEM; - goto out; + goto out_free_lookup_table; } + cur_entry->wim = w; cur_entry->resource_location = RESOURCE_IN_WIM; - p = get_resource_entry(buf, &cur_entry->resource_entry); p = get_u16(p, &cur_entry->part_number); p = get_u32(p, &cur_entry->refcnt); @@ -350,27 +417,6 @@ read_lookup_table(WIMStruct *w) goto out_free_cur_entry; } - /* Ordinarily, no two streams should share the same SHA1 message - * digest. However, this constraint can be broken for metadata - * resources--- two identical images will have the same metadata - * resource, but their lookup table entries are not shared. */ - duplicate_entry = __lookup_resource(table, cur_entry->hash); - if (duplicate_entry - && !((duplicate_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) - && cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)) - { - #ifdef ENABLE_ERROR_MESSAGES - ERROR("The WIM lookup table contains two entries with the " - "same SHA1 message digest!"); - ERROR("The first entry is:"); - print_lookup_table_entry(duplicate_entry, stderr); - ERROR("The second entry is:"); - print_lookup_table_entry(cur_entry, stderr); - #endif - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out_free_cur_entry; - } - if (!(cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED) && (cur_entry->resource_entry.size != cur_entry->resource_entry.original_size)) @@ -384,26 +430,84 @@ read_lookup_table(WIMStruct *w) ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; goto out_free_cur_entry; } - if ((cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) - && cur_entry->refcnt != 1) - { - #ifdef ENABLE_ERROR_MESSAGES - ERROR("Found metadata resource with refcnt != 1:"); - print_lookup_table_entry(cur_entry, stderr); - #endif - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out_free_cur_entry; + + if (cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) { + /* Lookup table entry for a metadata resource */ + if (cur_entry->refcnt != 1) { + #ifdef ENABLE_ERROR_MESSAGES + ERROR("Found metadata resource with refcnt != 1:"); + print_lookup_table_entry(cur_entry, stderr); + #endif + ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; + goto out_free_cur_entry; + } + + if (w->hdr.part_number != 1) { + ERROR("Found a metadata resource in a " + "non-first part of the split WIM!"); + ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; + goto out_free_cur_entry; + } + if (w->current_image == w->hdr.image_count) { + ERROR("The WIM header says there are %u images " + "in the WIM, but we found more metadata " + "resources than this", w->hdr.image_count); + ret = WIMLIB_ERR_IMAGE_COUNT; + goto out_free_cur_entry; + } + + /* Notice very carefully: We are assigning the metadata + * resources in the exact order mirrored by their lookup + * table entries on disk, which is the behavior of + * Microsoft's software. In particular, this overrides + * the actual locations of the metadata resources + * themselves in the WIM file as well as any information + * written in the XML data. */ + DEBUG("Found metadata resource for image %u at " + "offset %"PRIu64".", + w->current_image + 1, + cur_entry->resource_entry.offset); + w->image_metadata[ + w->current_image++]->metadata_lte = cur_entry; + } else { + /* Lookup table entry for a stream that is not a + * metadata resource */ + duplicate_entry = __lookup_resource(table, cur_entry->hash); + if (duplicate_entry) { + #ifdef ENABLE_ERROR_MESSAGES + ERROR("The WIM lookup table contains two entries with the " + "same SHA1 message digest!"); + ERROR("The first entry is:"); + print_lookup_table_entry(duplicate_entry, stderr); + ERROR("The second entry is:"); + print_lookup_table_entry(cur_entry, stderr); + #endif + ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; + goto out_free_cur_entry; + } + lookup_table_insert(table, cur_entry); } - lookup_table_insert(table, cur_entry); + } + if (w->hdr.part_number == 1 && + w->current_image != w->hdr.image_count) + { + ERROR("The WIM header says there are %u images " + "in the WIM, but we only found %d metadata " + "resources!", w->hdr.image_count, w->current_image); + ret = WIMLIB_ERR_IMAGE_COUNT; + goto out_free_lookup_table; } DEBUG("Done reading lookup table."); w->lookup_table = table; - return 0; + ret = 0; + goto out; out_free_cur_entry: FREE(cur_entry); -out: +out_free_lookup_table: free_lookup_table(table); +out: + w->current_image = 0; return ret; } @@ -412,13 +516,13 @@ out: * Writes a lookup table entry to the output file. */ int -write_lookup_table_entry(struct wim_lookup_table_entry *lte, void *__out) +write_lookup_table_entry(struct wim_lookup_table_entry *lte, void *_out) { FILE *out; u8 buf[WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE]; u8 *p; - out = __out; + out = _out; /* Don't write entries that have not had file resources or metadata * resources written for them. */ @@ -442,22 +546,44 @@ write_lookup_table_entry(struct wim_lookup_table_entry *lte, void *__out) return 0; } -/* Writes the lookup table to the output file. */ +/* Writes the WIM lookup table to the output file. */ int -write_lookup_table(struct wim_lookup_table *table, FILE *out, - struct resource_entry *out_res_entry) +write_lookup_table(WIMStruct *w, int image, struct resource_entry *out_res_entry) { + FILE *out = w->out_fp; off_t start_offset, end_offset; int ret; + int start_image, end_image; start_offset = ftello(out); if (start_offset == -1) return WIMLIB_ERR_WRITE; - ret = for_lookup_table_entry(table, write_lookup_table_entry, out); - if (ret != 0) + /* Write lookup table entries for metadata resources */ + if (image == WIMLIB_ALL_IMAGES) { + start_image = 1; + end_image = w->hdr.image_count; + } else { + start_image = image; + end_image = image; + } + for (int i = start_image; i <= end_image; i++) { + struct wim_lookup_table_entry *metadata_lte; + + metadata_lte = w->image_metadata[i - 1]->metadata_lte; + metadata_lte->out_refcnt = 1; + metadata_lte->output_resource_entry.flags |= WIM_RESHDR_FLAG_METADATA; + ret = write_lookup_table_entry(metadata_lte, out); + if (ret) + return ret; + } + + /* Write lookup table entries for other resources */ + ret = for_lookup_table_entry(w->lookup_table, write_lookup_table_entry, out); + if (ret) return ret; + /* Fill in the resource entry for the lookup table itself */ end_offset = ftello(out); if (end_offset == -1) return WIMLIB_ERR_WRITE; @@ -466,27 +592,25 @@ write_lookup_table(struct wim_lookup_table *table, FILE *out, out_res_entry->size = end_offset - start_offset; out_res_entry->original_size = end_offset - start_offset; out_res_entry->flags = WIM_RESHDR_FLAG_METADATA; - return 0; } - int -lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *ignore) +lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) { lte->real_refcnt = 0; return 0; } int -lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *ignore) +lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore) { lte->out_refcnt = 0; return 0; } int -lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *ignore) +lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore) { if (lte->extracted_file != NULL) { FREE(lte->extracted_file); @@ -514,9 +638,14 @@ print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) tfprintf(out, T("Part Number = %hu\n"), lte->part_number); tfprintf(out, T("Reference Count = %u\n"), lte->refcnt); - tfprintf(out, T("Hash = 0x")); - print_hash(lte->hash); - tputc(T('\n'), out); + if (lte->unhashed) { + tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"), + lte->back_inode, lte->back_stream_id); + } else { + tfprintf(out, T("Hash = 0x")); + print_hash(lte->hash, out); + tputc(T('\n'), out); + } tfprintf(out, T("Flags = ")); u8 flags = lte->resource_entry.flags; @@ -768,3 +897,79 @@ lookup_table_total_stream_size(struct wim_lookup_table *table) for_lookup_table_entry(table, lte_add_stream_size, &total_size); return total_size; } + +struct wim_lookup_table_entry ** +retrieve_lte_pointer(struct wim_lookup_table_entry *lte) +{ + wimlib_assert(lte->unhashed); + struct wim_inode *inode = lte->back_inode; + u32 stream_id = lte->back_stream_id; + if (stream_id == 0) + return &inode->i_lte; + else + for (u16 i = 0; i < inode->i_num_ads; i++) + if (inode->i_ads_entries[i].stream_id == stream_id) + return &inode->i_ads_entries[i].lte; + wimlib_assert(0); + return NULL; +} + +/* Calculate the SHA1 message digest of a stream and move it from the list of + * unhashed streams to the stream lookup table, possibly joining it with an + * existing lookup table entry for an identical stream. + * + * @lte: An unhashed lookup table entry. + * @lookup_table: Lookup table for the WIM. + * @lte_ret: On success, write a pointer to the resulting lookup table + * entry to this location. This will be the same as @lte + * if it was inserted into the lookup table, or different if + * a duplicate stream was found. + * + * Returns 0 on success; nonzero if there is an error reading the stream. + */ +int +hash_unhashed_stream(struct wim_lookup_table_entry *lte, + struct wim_lookup_table *lookup_table, + struct wim_lookup_table_entry **lte_ret) +{ + int ret; + struct wim_lookup_table_entry *duplicate_lte; + struct wim_lookup_table_entry **back_ptr; + + wimlib_assert(lte->unhashed); + + /* back_ptr must be saved because @back_inode and @back_stream_id are in + * union with the SHA1 message digest and will no longer be valid once + * the SHA1 has been calculated. */ + back_ptr = retrieve_lte_pointer(lte); + + ret = sha1_resource(lte); + if (ret) + return ret; + + /* Look for a duplicate stream */ + duplicate_lte = __lookup_resource(lookup_table, lte->hash); + list_del(<e->unhashed_list); + if (duplicate_lte) { + /* We have a duplicate stream. Transfer the reference counts + * from this stream to the duplicate, update the reference to + * this stream (in an inode or ads_entry) to point to the + * duplicate, then free this stream. */ + wimlib_assert(!(duplicate_lte->unhashed)); + duplicate_lte->refcnt += lte->refcnt; + duplicate_lte->out_refcnt += lte->refcnt; + *back_ptr = duplicate_lte; + free_lookup_table_entry(lte); + lte = duplicate_lte; + } else { + /* No duplicate stream, so we need to insert + * this stream into the lookup table and treat + * it as a hashed stream. */ + lookup_table_insert(lookup_table, lte); + lte->unhashed = 0; + } + if (lte_ret) + *lte_ret = lte; + return 0; +} +