X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fresource.c;h=7631dd492309283bb7d8fa72a569004f64ae619d;hp=3256a2b8e54625590cfcda7c4a098102af62e31a;hb=b5b9681794d1f5f13350e3567f6f6e74f5c779cf;hpb=31aabd89accc03ec6e307620ee911f50ba05b0e8 diff --git a/src/resource.c b/src/resource.c index 3256a2b8..7631dd49 100644 --- a/src/resource.c +++ b/src/resource.c @@ -27,12 +27,14 @@ #endif #include "wimlib.h" +#include "wimlib/assert.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/file_io.h" #include "wimlib/lookup_table.h" #include "wimlib/resource.h" #include "wimlib/sha1.h" +#include "wimlib/wim.h" #ifdef __WIN32__ /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */ @@ -90,29 +92,6 @@ */ -/* Decompress the specified chunk that uses the specified compression type - * @ctype, part of a WIM with default chunk size @wim_chunk_size. For LZX the - * separate @wim_chunk_size is needed because it determines the window size used - * for LZX compression. */ -static int -decompress(const void *cchunk, unsigned clen, void *uchunk, unsigned ulen, - int ctype, u32 wim_chunk_size) -{ - switch (ctype) { - case WIMLIB_COMPRESSION_TYPE_LZX: - return wimlib_lzx_decompress2(cchunk, clen, - uchunk, ulen, wim_chunk_size); - case WIMLIB_COMPRESSION_TYPE_XPRESS: - return wimlib_xpress_decompress(cchunk, clen, - uchunk, ulen); - case WIMLIB_COMPRESSION_TYPE_LZMS: - return wimlib_lzms_decompress(cchunk, clen, uchunk, ulen); - default: - ERROR("Invalid compression format (%d)", ctype); - return -1; - } -} - struct data_range { u64 offset; u64 size; @@ -164,6 +143,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, bool chunk_offsets_malloced = false; bool ubuf_malloced = false; bool cbuf_malloced = false; + struct wimlib_decompressor *decompressor = NULL; /* Sanity checks */ wimlib_assert(rspec != NULL); @@ -197,41 +177,40 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Get the maximum size of uncompressed chunks in this resource, which * we require be a power of 2. */ - u32 chunk_size; u64 cur_read_offset = rspec->offset_in_wim; - int ctype; + int ctype = rspec->compression_type; + u32 chunk_size = rspec->chunk_size; if (alt_chunk_table) { /* Alternate chunk table format. Its header specifies the chunk - * size and compression format. */ - struct alt_chunk_table_header_disk hdr; - - ret = full_pread(in_fd, &hdr, sizeof(hdr), cur_read_offset); - if (ret) - goto read_error; - cur_read_offset += sizeof(hdr); - - chunk_size = le32_to_cpu(hdr.chunk_size); - ctype = le32_to_cpu(hdr.compression_format); - - /* Format numbers must be the same as in WIMGAPI to be - * compatible. */ - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 1); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 2); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); - } else { - /* "Normal" format: the maximum uncompressed chunk size and the - * compression format default to those of the WIM itself. */ - chunk_size = rspec->wim->chunk_size; - ctype = rspec->wim->compression_type; + * size and compression format. Note: it could be read here; + * however, the relevant data was already loaded into @rspec by + * read_wim_lookup_table(). */ + cur_read_offset += sizeof(struct alt_chunk_table_header_disk); } + if (!is_power_of_2(chunk_size)) { ERROR("Invalid compressed resource: " - "expected power-of-2 chunk size (got %u)", chunk_size); + "expected power-of-2 chunk size (got %"PRIu32")", + chunk_size); ret = WIMLIB_ERR_INVALID_CHUNK_SIZE; goto out_free_memory; } + /* Get valid decompressor. */ + if (ctype == rspec->wim->decompressor_ctype && + chunk_size == rspec->wim->decompressor_max_block_size) + { + /* Cached decompressor. */ + decompressor = rspec->wim->decompressor; + rspec->wim->decompressor_ctype = WIMLIB_COMPRESSION_TYPE_NONE; + rspec->wim->decompressor = NULL; + } else { + ret = wimlib_create_decompressor(ctype, chunk_size, NULL, + &decompressor); + if (ret) + goto out_free_memory; + } + const u32 chunk_order = bsr32(chunk_size); /* Calculate the total number of chunks the resource is divided into. */ @@ -260,11 +239,9 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const u64 num_chunk_entries = (alt_chunk_table ? num_chunks : num_chunks - 1); /* Set the size of each chunk table entry based on the resource's - * uncompressed size. XXX: Does the alternate chunk table really - * always have 4-byte entries? */ - const u64 chunk_entry_size = - (rspec->uncompressed_size > (1ULL << 32) && !alt_chunk_table) - ? 8 : 4; + * uncompressed size. */ + const u64 chunk_entry_size = get_chunk_entry_size(rspec->uncompressed_size, + alt_chunk_table); /* Calculate the size of the chunk table in bytes. */ const u64 chunk_table_size = num_chunk_entries * chunk_entry_size; @@ -494,12 +471,11 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, DEBUG("Decompressing chunk %"PRIu64" " "(csize=%"PRIu32" usize=%"PRIu32")", i, chunk_csize, chunk_usize); - ret = decompress(cbuf, - chunk_csize, - ubuf, - chunk_usize, - ctype, - chunk_size); + ret = wimlib_decompress(cbuf, + chunk_csize, + ubuf, + chunk_usize, + decompressor); if (ret) { ERROR("Failed to decompress data!"); ret = WIMLIB_ERR_DECOMPRESSION; @@ -556,8 +532,15 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, goto read_error; } ret = 0; + out_free_memory: errno_save = errno; + if (decompressor) { + wimlib_free_decompressor(rspec->wim->decompressor); + rspec->wim->decompressor = decompressor; + rspec->wim->decompressor_ctype = ctype; + rspec->wim->decompressor_max_block_size = chunk_size; + } if (chunk_offsets_malloced) FREE(chunk_offsets); if (ubuf_malloced) @@ -579,6 +562,28 @@ read_error: goto out_free_memory; } +static int +fill_zeroes(u64 size, consume_data_callback_t cb, void *cb_ctx) +{ + if (unlikely(size)) { + u8 buf[min(size, BUFFER_SIZE)]; + + memset(buf, 0, sizeof(buf)); + + do { + size_t len; + int ret; + + len = min(size, BUFFER_SIZE); + ret = cb(buf, len, cb_ctx); + if (ret) + return ret; + size -= len; + } while (size); + } + return 0; +} + /* Read raw data from a file descriptor at the specified offset, feeding the * data it in chunks into the specified callback function. */ static int @@ -671,11 +676,37 @@ read_partial_wim_resource(const struct wim_resource_spec *rspec, return read_compressed_wim_resource(rspec, &range, 1, cb, cb_ctx); } else { - return read_raw_file_data(&rspec->wim->in_fd, - rspec->offset_in_wim + offset, - size, - cb, - cb_ctx); + /* Reading uncompressed resource. For completeness, handle the + * weird case where size_in_wim < uncompressed_size. */ + + u64 read_size; + u64 zeroes_size; + int ret; + + if (likely(offset + size <= rspec->size_in_wim) || + rspec->is_pipable) + { + read_size = size; + zeroes_size = 0; + } else { + if (offset >= rspec->size_in_wim) { + read_size = 0; + zeroes_size = size; + } else { + read_size = rspec->size_in_wim - offset; + zeroes_size = offset + size - rspec->size_in_wim; + } + } + + ret = read_raw_file_data(&rspec->wim->in_fd, + rspec->offset_in_wim + offset, + read_size, + cb, + cb_ctx); + if (ret) + return ret; + + return fill_zeroes(zeroes_size, cb, cb_ctx); } } @@ -727,7 +758,6 @@ read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, cb, cb_ctx); } -#ifndef __WIN32__ /* This function handles reading stream data that is located in an external * file, such as a file that has been added to the WIM image through execution * of a wimlib_add_command. @@ -748,7 +778,7 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk); - raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY); + raw_fd = topen(lte->file_on_disk, O_BINARY | O_RDONLY); if (raw_fd < 0) { ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk); return WIMLIB_ERR_OPEN; @@ -758,7 +788,6 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, filedes_close(&fd); return ret; } -#endif /* !__WIN32__ */ /* This function handles the trivial case of reading stream data that is, in * fact, already located in an in-memory buffer. */ @@ -796,11 +825,7 @@ read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, { static const read_stream_prefix_handler_t handlers[] = { [RESOURCE_IN_WIM] = read_wim_stream_prefix, - #ifdef __WIN32__ - [RESOURCE_IN_FILE_ON_DISK] = read_win32_file_prefix, - #else [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix, - #endif [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix, #ifdef WITH_FUSE [RESOURCE_IN_STAGING_FILE] = read_file_on_disk_prefix, @@ -809,6 +834,7 @@ read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix, #endif #ifdef __WIN32__ + [RESOURCE_IN_WINNT_FILE_ON_DISK] = read_winnt_file_prefix, [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix, #endif }; @@ -895,14 +921,53 @@ wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_r return wim_resource_spec_to_data(&rspec, buf_ret); } +int +wim_reshdr_to_hash(const struct wim_reshdr *reshdr, WIMStruct *wim, + u8 hash[SHA1_HASH_SIZE]) +{ + struct wim_resource_spec rspec; + int ret; + struct wim_lookup_table_entry *lte; + + wim_res_hdr_to_spec(reshdr, wim, &rspec); + + lte = new_lookup_table_entry(); + if (lte == NULL) + return WIMLIB_ERR_NOMEM; + + lte_bind_wim_resource_spec(lte, &rspec); + lte->flags = rspec.flags; + lte->size = rspec.uncompressed_size; + lte->offset_in_res = 0; + lte->unhashed = 1; + + ret = sha1_stream(lte); + + lte_unbind_wim_resource_spec(lte); + copy_hash(hash, lte->hash); + free_lookup_table_entry(lte); + return ret; +} + struct streamifier_context { struct read_stream_list_callbacks cbs; struct wim_lookup_table_entry *cur_stream; + struct wim_lookup_table_entry *next_stream; u64 cur_stream_offset; struct wim_lookup_table_entry *final_stream; size_t list_head_offset; }; +static struct wim_lookup_table_entry * +next_stream(struct wim_lookup_table_entry *lte, size_t list_head_offset) +{ + struct list_head *cur; + + cur = (struct list_head*)((u8*)lte + list_head_offset); + + return (struct wim_lookup_table_entry*)((u8*)cur->next - list_head_offset); +} + /* A consume_data_callback_t implementation that translates raw resource data * into streams, calling the begin_stream, consume_chunk, and end_stream * callback functions as appropriate. */ @@ -918,10 +983,17 @@ streamifier_cb(const void *chunk, size_t size, void *_ctx) wimlib_assert(size <= ctx->cur_stream->size - ctx->cur_stream_offset); if (ctx->cur_stream_offset == 0) { + u32 flags; + /* Starting a new stream. */ DEBUG("Begin new stream (size=%"PRIu64").", ctx->cur_stream->size); - ret = (*ctx->cbs.begin_stream)(ctx->cur_stream, true, + + flags = BEGIN_STREAM_FLAG_PARTIAL_RESOURCE; + if (size == ctx->cur_stream->size) + flags |= BEGIN_STREAM_FLAG_WHOLE_STREAM; + ret = (*ctx->cbs.begin_stream)(ctx->cur_stream, + flags, ctx->cbs.begin_stream_ctx); if (ret) return ret; @@ -930,18 +1002,14 @@ streamifier_cb(const void *chunk, size_t size, void *_ctx) /* Consume the chunk. */ ret = (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); + ctx->cur_stream_offset += size; if (ret) return ret; - ctx->cur_stream_offset += size; if (ctx->cur_stream_offset == ctx->cur_stream->size) { /* Finished reading all the data for a stream. */ - struct list_head *cur, *next; - - cur = (struct list_head *) - ((u8*)ctx->cur_stream + ctx->list_head_offset); - next = cur->next; + ctx->cur_stream_offset = 0; DEBUG("End stream (size=%"PRIu64").", ctx->cur_stream->size); ret = (*ctx->cbs.end_stream)(ctx->cur_stream, 0, @@ -949,15 +1017,14 @@ streamifier_cb(const void *chunk, size_t size, void *_ctx) if (ret) return ret; - if (ctx->cur_stream != ctx->final_stream) { - /* Advance to next stream. */ - ctx->cur_stream = (struct wim_lookup_table_entry *) - ((u8*)next - ctx->list_head_offset); - - ctx->cur_stream_offset = 0; - } else { - /* No more streams. */ - ctx->cur_stream = NULL; + /* Advance to next stream. */ + ctx->cur_stream = ctx->next_stream; + if (ctx->cur_stream != NULL) { + if (ctx->cur_stream != ctx->final_stream) + ctx->next_stream = next_stream(ctx->cur_stream, + ctx->list_head_offset); + else + ctx->next_stream = NULL; } } return 0; @@ -972,7 +1039,7 @@ struct hasher_context { /* Callback for starting to read a stream while calculating its SHA1 message * digest. */ static int -hasher_begin_stream(struct wim_lookup_table_entry *lte, bool is_partial_res, +hasher_begin_stream(struct wim_lookup_table_entry *lte, u32 flags, void *_ctx) { struct hasher_context *ctx = _ctx; @@ -982,7 +1049,7 @@ hasher_begin_stream(struct wim_lookup_table_entry *lte, bool is_partial_res, if (ctx->cbs.begin_stream == NULL) return 0; else - return (*ctx->cbs.begin_stream)(lte, is_partial_res, + return (*ctx->cbs.begin_stream)(lte, flags, ctx->cbs.begin_stream_ctx); } @@ -1002,6 +1069,13 @@ hasher_consume_chunk(const void *chunk, size_t size, void *_ctx) return (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); } +static void +get_sha1_string(const u8 md[SHA1_HASH_SIZE], tchar *str) +{ + for (size_t i = 0; i < SHA1_HASH_SIZE; i++) + str += tsprintf(str, T("%02x"), md[i]); +} + /* Callback for finishing reading a stream while calculating its SHA1 message * digest. */ static int @@ -1034,9 +1108,14 @@ hasher_end_stream(struct wim_lookup_table_entry *lte, int status, void *_ctx) * that it is the same as the calculated value. */ if (!hashes_equal(hash, lte->hash)) { if (wimlib_print_errors) { - ERROR("Invalid SHA1 message digest " - "on the following WIM stream:"); - print_lookup_table_entry(lte, stderr); + tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1]; + tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1]; + get_sha1_string(lte->hash, expected_hashstr); + get_sha1_string(hash, actual_hashstr); + ERROR("The stream is corrupted!\n" + " (Expected SHA1=%"TS",\n" + " got SHA1=%"TS")", + expected_hashstr, actual_hashstr); } ret = WIMLIB_ERR_INVALID_RESOURCE_HASH; errno = EINVAL; @@ -1060,7 +1139,7 @@ read_full_stream_with_cbs(struct wim_lookup_table_entry *lte, { int ret; - ret = (*cbs->begin_stream)(lte, false, cbs->begin_stream_ctx); + ret = (*cbs->begin_stream)(lte, 0, cbs->begin_stream_ctx); if (ret) return ret; @@ -1088,11 +1167,86 @@ read_full_stream_with_sha1(struct wim_lookup_table_entry *lte, .consume_chunk_ctx = &hasher_ctx, .end_stream = hasher_end_stream, .end_stream_ctx = &hasher_ctx, - }; return read_full_stream_with_cbs(lte, &hasher_cbs); } +static int +read_packed_streams(struct wim_lookup_table_entry *first_stream, + struct wim_lookup_table_entry *last_stream, + u64 stream_count, + size_t list_head_offset, + const struct read_stream_list_callbacks *sink_cbs) +{ + struct data_range *ranges; + bool ranges_malloced; + struct wim_lookup_table_entry *cur_stream; + size_t i; + int ret; + u64 ranges_alloc_size; + + DEBUG("Reading %"PRIu64" streams combined in same WIM resource", + stream_count); + + /* Setup data ranges array (one range per stream to read); this way + * read_compressed_wim_resource() does not need to be aware of streams. + */ + + ranges_alloc_size = stream_count * sizeof(ranges[0]); + + if (unlikely((size_t)ranges_alloc_size != ranges_alloc_size)) { + ERROR("Too many streams in one resource!"); + return WIMLIB_ERR_NOMEM; + } + if (likely(ranges_alloc_size <= STACK_MAX)) { + ranges = alloca(ranges_alloc_size); + ranges_malloced = false; + } else { + ranges = MALLOC(ranges_alloc_size); + if (ranges == NULL) { + ERROR("Too many streams in one resource!"); + return WIMLIB_ERR_NOMEM; + } + ranges_malloced = true; + } + + for (i = 0, cur_stream = first_stream; + i < stream_count; + i++, cur_stream = next_stream(cur_stream, list_head_offset)) + { + ranges[i].offset = cur_stream->offset_in_res; + ranges[i].size = cur_stream->size; + } + + struct streamifier_context streamifier_ctx = { + .cbs = *sink_cbs, + .cur_stream = first_stream, + .next_stream = next_stream(first_stream, list_head_offset), + .cur_stream_offset = 0, + .final_stream = last_stream, + .list_head_offset = list_head_offset, + }; + + ret = read_compressed_wim_resource(first_stream->rspec, + ranges, + stream_count, + streamifier_cb, + &streamifier_ctx); + + if (ranges_malloced) + FREE(ranges); + + if (ret) { + if (streamifier_ctx.cur_stream_offset != 0) { + ret = (*streamifier_ctx.cbs.end_stream) + (streamifier_ctx.cur_stream, + ret, + streamifier_ctx.cbs.end_stream_ctx); + } + } + return ret; +} + /* * Read a list of streams, each of which may be in any supported location (e.g. * in a WIM or in an external file). Unlike read_stream_prefix() or the @@ -1125,6 +1279,9 @@ read_full_stream_with_sha1(struct wim_lookup_table_entry *lte, * STREAM_LIST_ALREADY_SORTED * @stream_list is already sorted in sequential order for reading. * + * The callback functions are allowed to delete the current stream from the list + * if necessary. + * * Returns 0 on success; a nonzero error code on failure. Failure can occur due * to an error reading the data or due to an error status being returned by any * of the callback functions. @@ -1178,7 +1335,7 @@ read_stream_list(struct list_head *stream_list, struct wim_lookup_table_entry *lte_next, *lte_last; struct list_head *next2; - size_t stream_count; + u64 stream_count; /* The next stream is a proper sub-sequence of a WIM * resource. See if there are other streams in the same @@ -1207,52 +1364,13 @@ read_stream_list(struct list_head *stream_list, * first stream in the resource that needs to be * read and @lte_last specifies the last stream * in the resource that needs to be read. */ - - DEBUG("Reading %zu streams combined in same " - "WIM resource", stream_count); - next = next2; - - struct data_range ranges[stream_count]; - - { - struct list_head *next3; - size_t i; - struct wim_lookup_table_entry *lte_cur; - - next3 = cur; - for (i = 0; i < stream_count; i++) { - lte_cur = (struct wim_lookup_table_entry*) - ((u8*)next3 - list_head_offset); - ranges[i].offset = lte_cur->offset_in_res; - ranges[i].size = lte_cur->size; - next3 = next3->next; - } - } - - struct streamifier_context streamifier_ctx = { - .cbs = *sink_cbs, - .cur_stream = lte, - .cur_stream_offset = 0, - .final_stream = lte_last, - .list_head_offset = list_head_offset, - }; - - ret = read_compressed_wim_resource(lte->rspec, - ranges, - stream_count, - streamifier_cb, - &streamifier_ctx); - - if (ret) { - if (streamifier_ctx.cur_stream_offset != 0) { - ret = (*streamifier_ctx.cbs.end_stream) - (streamifier_ctx.cur_stream, - ret, - streamifier_ctx.cbs.end_stream_ctx); - } + ret = read_packed_streams(lte, lte_last, + stream_count, + list_head_offset, + sink_cbs); + if (ret) return ret; - } continue; } } @@ -1314,6 +1432,15 @@ extract_stream_to_fd(struct wim_lookup_table_entry *lte, return extract_stream(lte, size, extract_chunk_to_fd, fd); } +/* Extract the full uncompressed contents of the specified stream to the + * specified file descriptor. */ +int +extract_full_stream_to_fd(struct wim_lookup_table_entry *lte, + struct filedes *fd) +{ + return extract_stream_to_fd(lte, fd, lte->size); +} + /* Calculate the SHA1 message digest of a stream and store it in @lte->hash. */ int sha1_stream(struct wim_lookup_table_entry *lte) @@ -1325,7 +1452,10 @@ sha1_stream(struct wim_lookup_table_entry *lte) } /* Convert a short WIM resource header to a stand-alone WIM resource - * specification. */ + * specification. + * + * Note: for packed resources some fields still need to be overridden. + */ void wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim, struct wim_resource_spec *rspec) @@ -1337,6 +1467,13 @@ wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim, INIT_LIST_HEAD(&rspec->stream_list); rspec->flags = reshdr->flags; rspec->is_pipable = wim_is_pipable(wim); + if (rspec->flags & WIM_RESHDR_FLAG_COMPRESSED) { + rspec->compression_type = wim->compression_type; + rspec->chunk_size = wim->chunk_size; + } else { + rspec->compression_type = WIMLIB_COMPRESSION_TYPE_NONE; + rspec->chunk_size = 0; + } } /* Convert a stand-alone resource specification to a WIM resource header. */