From: Eric Biggers Date: Sat, 14 Dec 2013 07:41:05 +0000 (-0600) Subject: Read concat runs X-Git-Tag: v1.6.0~145 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=8e5d4209c12a7436488d9a3d1f8d59383c5c48f2 Read concat runs --- diff --git a/include/wimlib.h b/include/wimlib.h index c7775416..00e8e5e7 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -987,8 +987,15 @@ struct wimlib_resource_entry { */ uint32_t is_missing : 1; - uint32_t reserved_flags : 27; - uint64_t reserved[4]; + uint32_t is_partial : 1; + + uint32_t reserved_flags : 26; + + uint64_t raw_resource_offset_in_wim; + uint64_t raw_resource_uncompressed_size; + uint64_t raw_resource_compressed_size; + + uint64_t reserved[1]; }; /** A stream of a file in the WIM. */ diff --git a/include/wimlib/list.h b/include/wimlib/list.h index 763178ba..d6bd55c1 100644 --- a/include/wimlib/list.h +++ b/include/wimlib/list.h @@ -129,6 +129,15 @@ static inline int list_empty(const struct list_head *head) return head->next == head; } +/** + * list_is_singular - tests whether a list has just one entry. + * @head: the list to test. + */ +static inline int list_is_singular(const struct list_head *head) +{ + return !list_empty(head) && (head->next == head->prev); +} + static inline void __list_splice(const struct list_head *list, struct list_head *prev, struct list_head *next) diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h index b5b3830f..a6bed894 100644 --- a/include/wimlib/lookup_table.h +++ b/include/wimlib/lookup_table.h @@ -185,7 +185,10 @@ struct wim_lookup_table_entry { /* Pointers to somewhere where the stream is actually located. See the * comments for the @resource_location field above. */ union { - struct wim_resource_spec *rspec; + struct { + struct wim_resource_spec *rspec; + u64 offset_in_res; + }; tchar *file_on_disk; void *attached_buffer; #ifdef WITH_FUSE @@ -283,6 +286,13 @@ lte_cchunk_size(const struct wim_lookup_table_entry * lte) return 32768; } +static inline bool +lte_is_partial(const struct wim_lookup_table_entry * lte) +{ + return lte->resource_location == RESOURCE_IN_WIM && + lte->size != lte->rspec->uncompressed_size; +} + static inline bool lte_filename_valid(const struct wim_lookup_table_entry *lte) { @@ -388,9 +398,7 @@ lte_bind_wim_resource_spec(struct wim_lookup_table_entry *lte, { lte->resource_location = RESOURCE_IN_WIM; lte->rspec = rspec; - list_add(<e->wim_resource_list, &rspec->lte_list); - lte->flags = rspec->flags; - lte->size = rspec->uncompressed_size; + list_add_tail(<e->wim_resource_list, &rspec->lte_list); } static inline void diff --git a/programs/imagex.c b/programs/imagex.c index d3588a9e..888f0e41 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -2794,25 +2794,37 @@ static int print_resource(const struct wimlib_resource_entry *resource, void *_ignore) { - - tprintf(T("Uncompressed size = %"PRIu64" bytes\n"), + tprintf(T("Uncompressed size = %"PRIu64" bytes\n"), resource->uncompressed_size); + if (resource->is_partial) { + tprintf(T("Raw uncompressed size = %"PRIu64" bytes\n"), + resource->raw_resource_uncompressed_size); + + tprintf(T("Raw compressed size = %"PRIu64" bytes\n"), + resource->raw_resource_compressed_size); - tprintf(T("Compressed size = %"PRIu64" bytes\n"), - resource->compressed_size); + tprintf(T("Raw offset in WIM = %"PRIu64" bytes\n"), + resource->raw_resource_offset_in_wim); - tprintf(T("Offset = %"PRIu64" bytes\n"), - resource->offset); + tprintf(T("Offset in raw = %"PRIu64" bytes\n"), + resource->offset); + } else { + tprintf(T("Compressed size = %"PRIu64" bytes\n"), + resource->compressed_size); + + tprintf(T("Offset in WIM = %"PRIu64" bytes\n"), + resource->offset); + } - tprintf(T("Part Number = %u\n"), resource->part_number); - tprintf(T("Reference Count = %u\n"), resource->reference_count); + tprintf(T("Part Number = %u\n"), resource->part_number); + tprintf(T("Reference Count = %u\n"), resource->reference_count); - tprintf(T("Hash = 0x")); + tprintf(T("Hash = 0x")); print_byte_field(resource->sha1_hash, sizeof(resource->sha1_hash)); tputchar(T('\n')); - tprintf(T("Flags = ")); + tprintf(T("Flags = ")); if (resource->is_compressed) tprintf(T("WIM_RESHDR_FLAG_COMPRESSED ")); if (resource->is_metadata) diff --git a/src/extract.c b/src/extract.c index 549c92f7..fbe5458d 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1365,6 +1365,9 @@ read_pwm_stream_header(WIMStruct *pwm, struct wim_lookup_table_entry *lte, reshdr.uncompressed_size = le64_to_cpu(buf.stream_hdr.uncompressed_size); wim_res_hdr_to_spec(&reshdr, pwm, rspec); lte_bind_wim_resource_spec(lte, rspec); + lte->flags = rspec->flags; + lte->size = rspec->uncompressed_size; + lte->offset_in_res = 0; return 0; read_error: diff --git a/src/lookup_table.c b/src/lookup_table.c index d08498f8..0429a110 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -464,6 +464,33 @@ struct wim_lookup_table_entry_disk { #define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50 +static int +validate_resource(const struct wim_resource_spec *rspec, + u64 offset_save, u64 size_save) +{ + struct wim_lookup_table_entry *lte; + list_for_each_entry(lte, &rspec->lte_list, wim_resource_list) { + if (rspec->flags & WIM_RESHDR_FLAG_COMPRESSED) + lte->flags |= WIM_RESHDR_FLAG_COMPRESSED; + else + lte->flags &= ~WIM_RESHDR_FLAG_COMPRESSED; + + if (!(lte->flags & WIM_RESHDR_FLAG_CONCAT)) { + lte->offset_in_res = offset_save; + lte->size = size_save; + } + + + if (lte->offset_in_res + lte->size < lte->size || + lte->offset_in_res + lte->size > rspec->uncompressed_size) + { + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; + } + print_lookup_table_entry(lte, stderr); + } + return 0; +} + /* * Reads the lookup table from a WIM file. * @@ -484,8 +511,10 @@ read_wim_lookup_table(WIMStruct *wim) size_t num_entries; struct wim_lookup_table *table; struct wim_lookup_table_entry *cur_entry, *duplicate_entry; + struct wim_resource_spec *cur_rspec; + u64 size_save; + u64 offset_save; void *buf; - bool in_concat_run; BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) != WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE); @@ -503,7 +532,7 @@ read_wim_lookup_table(WIMStruct *wim) /* Allocate hash table. */ table = new_lookup_table(num_entries * 2 + 1); - if (!table) { + if (table == NULL) { ERROR("Not enough memory to read lookup table."); ret = WIMLIB_ERR_NOMEM; goto out_free_buf; @@ -512,13 +541,19 @@ read_wim_lookup_table(WIMStruct *wim) /* Allocate and initalize `struct wim_lookup_table_entry's from the * on-disk lookup table. */ wim->current_image = 0; - in_concat_run = false; + cur_rspec = NULL; for (i = 0; i < num_entries; i++) { const struct wim_lookup_table_entry_disk *disk_entry = &((const struct wim_lookup_table_entry_disk*)buf)[i]; u16 part_number; struct wim_reshdr reshdr; - struct wim_resource_spec *cur_rspec; + + get_wim_reshdr(&disk_entry->reshdr, &reshdr); + + DEBUG("reshdr: size=%"PRIu64", original_size=%"PRIu64", " + "offset=%"PRIu64", flags=0x%02x", + reshdr.size_in_wim, reshdr.uncompressed_size, + reshdr.offset_in_wim, reshdr.flags); cur_entry = new_lookup_table_entry(); if (cur_entry == NULL) { @@ -538,23 +573,73 @@ read_wim_lookup_table(WIMStruct *wim) free_lookup_table_entry(cur_entry); continue; } - if (is_zero_hash(cur_entry->hash)) { - WARNING("The WIM lookup table contains an entry with a " - "SHA1 message digest of all 0's (ignoring it)"); + + if (cur_rspec == NULL || + !(reshdr.flags & WIM_RESHDR_FLAG_CONCAT)) + { + /* Starting new run of stream entries that all share the + * same WIM resource (streams concatenated together); or + * simply a single normal entry by itself. */ + + if (cur_rspec != NULL) { + ret = validate_resource(cur_rspec, offset_save, + size_save); + if (ret) + goto out_free_cur_entry; + } + + cur_rspec = MALLOC(sizeof(struct wim_resource_spec)); + if (cur_rspec == NULL) { + ERROR("Not enough memory to read lookup table."); + ret = WIMLIB_ERR_NOMEM; + goto out_free_cur_entry; + } + offset_save = reshdr.offset_in_wim; + size_save = reshdr.size_in_wim; + wim_res_hdr_to_spec(&reshdr, wim, cur_rspec); + } else if (is_zero_hash(cur_entry->hash)) { + /* Found the resource specification for the run. */ + cur_rspec->offset_in_wim = reshdr.offset_in_wim; + cur_rspec->size_in_wim = reshdr.size_in_wim; + cur_rspec->flags = reshdr.flags; + DEBUG("Full run is %"PRIu64" compressed bytes " + "at file offset %"PRIu64" (flags 0x%02x)", + cur_rspec->size_in_wim, + cur_rspec->offset_in_wim, + cur_rspec->flags); free_lookup_table_entry(cur_entry); continue; + } else { + /* Continuing the run with another stream. */ + DEBUG("Continuing concat run with stream: " + "%"PRIu64" uncompressed bytes @ resource offset %"PRIu64")", + reshdr.size_in_wim, reshdr.offset_in_wim); + cur_rspec->uncompressed_size += reshdr.size_in_wim; } - cur_rspec = MALLOC(sizeof(struct wim_resource_spec)); - if (cur_rspec == NULL) { - ERROR("Not enough memory to read lookup table."); - ret = WIMLIB_ERR_NOMEM; - goto out_free_cur_entry; + lte_bind_wim_resource_spec(cur_entry, cur_rspec); + if (reshdr.flags & WIM_RESHDR_FLAG_CONCAT) { + /* In concatenation runs, the offset field is used for + * in-resource offset, not the in-WIM offset, and the + * size field is used for the uncompressed size, not the + * compressed size. */ + cur_entry->offset_in_res = reshdr.offset_in_wim; + cur_entry->size = reshdr.size_in_wim; + cur_entry->flags = reshdr.flags; + } else { + /* These may be overwritten in validate_resource() if + * the run turns out to be a concatenation. */ + cur_entry->offset_in_res = 0; + cur_entry->size = reshdr.uncompressed_size; + cur_entry->flags = reshdr.flags; } - get_wim_reshdr(&disk_entry->reshdr, &reshdr); - wim_res_hdr_to_spec(&reshdr, wim, cur_rspec); - lte_bind_wim_resource_spec(cur_entry, cur_rspec); + if (is_zero_hash(cur_entry->hash)) { + WARNING("The WIM lookup table contains an entry with a " + "SHA1 message digest of all 0's (ignoring it)"); + free_lookup_table_entry(cur_entry); + continue; + } if (cur_entry->flags & WIM_RESHDR_FLAG_METADATA) { /* Lookup table entry for a metadata resource */ @@ -623,6 +708,12 @@ read_wim_lookup_table(WIMStruct *wim) } } + if (cur_rspec != NULL) { + ret = validate_resource(cur_rspec, offset_save, size_save); + if (ret) + goto out_free_cur_entry; + } + if (wim->hdr.part_number == 1 && wim->current_image != wim->hdr.image_count) { WARNING("The header of \"%"TS"\" says there are %u images in\n" " the WIM, but we only found %d metadata resources! Acting as if\n" @@ -829,27 +920,41 @@ print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) return; } - tfprintf(out, T("Reference Count = %u\n"), lte->refcnt); - tfprintf(out, T("Uncompressed Size = %"PRIu64" bytes\n"), lte->size); - if (lte->resource_location == RESOURCE_IN_WIM) { - tfprintf(out, T("Offset in WIM = %"PRIu64" bytes\n"), - lte->rspec->offset_in_wim); + tprintf(T("Uncompressed size = %"PRIu64" bytes\n"), + lte->size); + if (lte_is_partial(lte)) { + tprintf(T("Offset = %"PRIu64" bytes\n"), + lte->offset_in_res); + + tprintf(T("Raw uncompressed size = %"PRIu64" bytes\n"), + lte->rspec->uncompressed_size); + + tprintf(T("Raw compressed size = %"PRIu64" bytes\n"), + lte->rspec->size_in_wim); + + tprintf(T("Raw offset = %"PRIu64" bytes\n"), + lte->rspec->offset_in_wim); + } else if (lte->resource_location == RESOURCE_IN_WIM) { + tprintf(T("Compressed size = %"PRIu64" bytes\n"), + lte->rspec->size_in_wim); - tfprintf(out, T("Size in WIM = %"PRIu64" bytes\n"), - lte->rspec->size_in_wim); + tprintf(T("Offset = %"PRIu64" bytes\n"), + lte->rspec->offset_in_wim); } + tfprintf(out, T("Reference Count = %u\n"), lte->refcnt); + if (lte->unhashed) { tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"), lte->back_inode, lte->back_stream_id); } else { - tfprintf(out, T("Hash = 0x")); + tfprintf(out, T("Hash = 0x")); print_hash(lte->hash, out); tputc(T('\n'), out); } - tfprintf(out, T("Flags = ")); + tfprintf(out, T("Flags = ")); u8 flags = lte->flags; if (flags & WIM_RESHDR_FLAG_COMPRESSED) tfputs(T("WIM_RESHDR_FLAG_COMPRESSED, "), out); @@ -865,7 +970,7 @@ print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) switch (lte->resource_location) { case RESOURCE_IN_WIM: if (lte->rspec->wim->filename) { - tfprintf(out, T("WIM file = `%"TS"'\n"), + tfprintf(out, T("WIM file = `%"TS"'\n"), lte->rspec->wim->filename); } break; @@ -873,12 +978,12 @@ print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out) case RESOURCE_WIN32_ENCRYPTED: #endif case RESOURCE_IN_FILE_ON_DISK: - tfprintf(out, T("File on Disk = `%"TS"'\n"), + tfprintf(out, T("File on Disk = `%"TS"'\n"), lte->file_on_disk); break; #ifdef WITH_FUSE case RESOURCE_IN_STAGING_FILE: - tfprintf(out, T("Staging File = `%"TS"'\n"), + tfprintf(out, T("Staging File = `%"TS"'\n"), lte->staging_file_name); break; #endif @@ -892,16 +997,21 @@ void lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, struct wimlib_resource_entry *wentry) { - wentry->uncompressed_size = lte->size; + memset(wentry, 0, sizeof(*wentry)); + wentry->uncompressed_size = lte->size; if (lte->resource_location == RESOURCE_IN_WIM) { - wentry->compressed_size = lte->rspec->size_in_wim; - wentry->offset = lte->rspec->offset_in_wim; wentry->part_number = lte->rspec->wim->hdr.part_number; - } else { - wentry->compressed_size = 0; - wentry->offset = 0; - wentry->part_number = 0; + if (lte_is_partial(lte)) { + wentry->compressed_size = 0; + wentry->offset = lte->offset_in_res; + } else { + wentry->compressed_size = lte->rspec->size_in_wim; + wentry->offset = lte->rspec->offset_in_wim; + } + wentry->raw_resource_offset_in_wim = lte->rspec->offset_in_wim; + wentry->raw_resource_uncompressed_size = lte->rspec->uncompressed_size; + wentry->raw_resource_compressed_size = lte->rspec->size_in_wim; } copy_hash(wentry->sha1_hash, lte->hash); wentry->reference_count = lte->refcnt; @@ -909,6 +1019,7 @@ lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, wentry->is_metadata = (lte->flags & WIM_RESHDR_FLAG_METADATA) != 0; wentry->is_free = (lte->flags & WIM_RESHDR_FLAG_FREE) != 0; wentry->is_spanned = (lte->flags & WIM_RESHDR_FLAG_SPANNED) != 0; + wentry->is_partial = lte_is_partial(lte); } struct iterate_lte_context { diff --git a/src/resource.c b/src/resource.c index d8897448..1d82a967 100644 --- a/src/resource.c +++ b/src/resource.c @@ -624,12 +624,14 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte, wimlib_assert(is_power_of_2(cb_chunk_size)); if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) { /* Raw chunks mode is subject to the restrictions noted. */ + wimlib_assert(!lte_is_partial(lte)); wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL)); wimlib_assert(cb_chunk_size == rspec->cchunk_size); wimlib_assert(size == rspec->uncompressed_size); wimlib_assert(offset == 0); } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) { /* Raw full mode: read must not overrun end of store size. */ + wimlib_assert(!lte_is_partial(lte)); wimlib_assert(offset + size >= size && offset + size <= rspec->size_in_wim); } else { @@ -638,10 +640,10 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte, offset + size <= rspec->uncompressed_size); } - DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64" " + DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64"[+%"PRIu64"] " "from %"PRIu64"(%"PRIu64") @ +%"PRIu64" " "(readflags 0x%08x, resflags 0x%02x%s)", - size, offset, + size, offset, lte->offset_in_res, rspec->size_in_wim, rspec->uncompressed_size, rspec->offset_in_wim, @@ -660,7 +662,7 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte, } else { return read_compressed_wim_resource(rspec, size, cb, cb_chunk_size, - ctx_or_buf, flags, offset); + ctx_or_buf, flags, offset + lte->offset_in_res); } } @@ -869,6 +871,9 @@ wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret) lte->unhashed = 1; lte_bind_wim_resource_spec(lte, rspec); + lte->flags = rspec->flags; + lte->size = rspec->uncompressed_size; + lte->offset_in_res = 0; ret = read_full_resource_into_alloc_buf(lte, buf_ret); @@ -1008,7 +1013,7 @@ wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim, INIT_LIST_HEAD(&spec->lte_list); spec->flags = reshdr->flags; spec->is_pipable = wim_is_pipable(wim); - if (spec->flags & WIM_RESHDR_FLAG_COMPRESSED) { + if (spec->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_CONCAT)) { spec->ctype = wim->compression_type; spec->cchunk_size = wim->chunk_size; } else { diff --git a/src/wim.c b/src/wim.c index a62be39e..adfbeff2 100644 --- a/src/wim.c +++ b/src/wim.c @@ -602,7 +602,7 @@ begin_read(WIMStruct *wim, const void *wim_filename_or_fd, wim->compression_type = WIMLIB_COMPRESSION_TYPE_LZX; } else if (wim->hdr.flags & WIM_HDR_FLAG_COMPRESS_XPRESS) { wim->compression_type = WIMLIB_COMPRESSION_TYPE_XPRESS; - #if 0 + #if 1 /* TODO */ } else if (wim->hdr.flags & WIM_HDR_FLAG_COMPRESS_LZMS) { wim->compression_type = WIMLIB_COMPRESSION_TYPE_LZMS; diff --git a/src/write.c b/src/write.c index 80a3b096..053893da 100644 --- a/src/write.c +++ b/src/write.c @@ -74,9 +74,19 @@ static bool can_raw_copy(const struct wim_lookup_table_entry *lte, int write_resource_flags, int out_ctype, u32 out_chunk_size) { - return (out_ctype == lte_ctype(lte) - && out_chunk_size == lte_cchunk_size(lte) - && out_ctype != WIMLIB_COMPRESSION_TYPE_NONE); + if (lte->resource_location != RESOURCE_IN_WIM) + return false; + if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE) + return false; + if (lte->rspec->ctype != out_ctype) + return false; + if (out_chunk_size != lte->rspec->cchunk_size) + return false; + if (lte->offset_in_res != 0) + return false; + if (lte->size != lte->rspec->uncompressed_size) + return false; + return true; } @@ -253,7 +263,7 @@ write_pwm_stream_header(const struct wim_lookup_table_entry *lte, copy_hash(stream_hdr.hash, lte->hash); } - reshdr_flags = lte->flags & ~WIM_RESHDR_FLAG_COMPRESSED; + reshdr_flags = lte->flags & ~(WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_CONCAT); reshdr_flags |= additional_reshdr_flags; stream_hdr.flags = cpu_to_le32(reshdr_flags); ret = full_write(out_fd, &stream_hdr, sizeof(stream_hdr)); @@ -543,7 +553,7 @@ try_write_again: /* Fill in out_reshdr with information about the newly written * resource. */ out_reshdr->size_in_wim = out_fd->offset - res_start_offset; - out_reshdr->flags = lte->flags; + out_reshdr->flags = lte->flags & ~WIM_RESHDR_FLAG_CONCAT; if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE) out_reshdr->flags &= ~WIM_RESHDR_FLAG_COMPRESSED; else