From f7e62e27ceae4cf2ad2cc74b1a97ebf3015b95eb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 20 Dec 2013 14:21:03 -0600 Subject: [PATCH] Use read_stream_list() for extraction --- include/wimlib/apply.h | 3 + include/wimlib/dentry.h | 4 - include/wimlib/file_io.h | 1 + include/wimlib/ntfs_3g.h | 3 +- include/wimlib/resource.h | 60 ++- src/extract.c | 327 +++++++++----- src/lookup_table.c | 9 +- src/mount_image.c | 5 + src/ntfs-3g_capture.c | 40 +- src/resource.c | 900 ++++++++++++++++++++------------------ src/write.c | 5 +- 11 files changed, 762 insertions(+), 595 deletions(-) diff --git a/include/wimlib/apply.h b/include/wimlib/apply.h index f8023a37..11a03651 100644 --- a/include/wimlib/apply.h +++ b/include/wimlib/apply.h @@ -1,6 +1,7 @@ #ifndef _WIMLIB_APPLY_H #define _WIMLIB_APPLY_H +#include "wimlib/file_io.h" #include "wimlib/types.h" #include "wimlib/list.h" #include "wimlib.h" @@ -220,6 +221,8 @@ struct apply_ctx { unsigned long invalid_sequence; unsigned long partial_security_descriptors; unsigned long no_security_descriptors; + struct filedes tmpfile_fd; + tchar *tmpfile_name; u64 num_streams_remaining; bool root_dentry_is_special; uint64_t next_progress; diff --git a/include/wimlib/dentry.h b/include/wimlib/dentry.h index 12a8f174..a2735e5e 100644 --- a/include/wimlib/dentry.h +++ b/include/wimlib/dentry.h @@ -230,10 +230,6 @@ struct wim_dentry { * alias file_name. */ tchar *extraction_name; size_t extraction_name_nchars; - - /* (Extraction only) List head for building a list of dentries that - * contain a certain stream. */ - struct list_head extraction_stream_list; }; #define rbnode_dentry(node) container_of(node, struct wim_dentry, rb_node) diff --git a/include/wimlib/file_io.h b/include/wimlib/file_io.h index 85f293f1..01f97178 100644 --- a/include/wimlib/file_io.h +++ b/include/wimlib/file_io.h @@ -3,6 +3,7 @@ #include #include +#include /* Wrapper around a file descriptor that keeps track of offset (including in * pipes, which don't support lseek()) and a cached flag that tells whether the diff --git a/include/wimlib/ntfs_3g.h b/include/wimlib/ntfs_3g.h index fd93fa7b..b8ff3723 100644 --- a/include/wimlib/ntfs_3g.h +++ b/include/wimlib/ntfs_3g.h @@ -14,8 +14,7 @@ extern int read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, consume_data_callback_t cb, - u32 in_chunk_size, - void *ctx_or_buf, + void *cb_ctx, int _ignored_flags); diff --git a/include/wimlib/resource.h b/include/wimlib/resource.h index 249b0bc8..1d68649d 100644 --- a/include/wimlib/resource.h +++ b/include/wimlib/resource.h @@ -151,17 +151,15 @@ put_wim_reshdr(const struct wim_reshdr *reshdr, #define WIMLIB_READ_RESOURCE_MASK 0xffff0000 -/* Functions to read a resource. */ - -extern int -read_partial_wim_resource(const struct wim_lookup_table_entry *lte, - u64 size, consume_data_callback_t cb, - u32 in_chunk_size, void *ctx_or_buf, - int flags, u64 offset); +/* Functions to read streams */ extern int read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte, size_t size, u64 offset, void *buf); + +extern int +skip_wim_stream(struct wim_lookup_table_entry *lte); + extern int read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf); @@ -176,35 +174,59 @@ wim_reshdr_to_data(const struct wim_reshdr *reshdr, extern int read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, consume_data_callback_t cb, - u32 in_chunk_size, void *ctx_or_buf, int flags); + void *cb_ctx, int flags); + +typedef int (*read_stream_list_begin_stream_t)(struct wim_lookup_table_entry *lte, + bool is_partial_res, + void *ctx); +typedef int (*read_stream_list_end_stream_t)(struct wim_lookup_table_entry *lte, + int status, + void *ctx); + +/* Callbacks for read_stream_list(). */ +struct read_stream_list_callbacks { -/* Functions to read a list of resources. */ + /* Called when a stream is about to be read. */ + read_stream_list_begin_stream_t begin_stream; -typedef int (*read_stream_list_begin_stream_t)(struct wim_lookup_table_entry *lte, void *ctx); -typedef int (*read_stream_list_end_stream_t)(struct wim_lookup_table_entry *lte, void *ctx); + /* Called when a chunk of data has been read. */ + consume_data_callback_t consume_chunk; + + /* Called when a stream has been fully read. */ + read_stream_list_end_stream_t end_stream; + + /* Parameter passed to @begin_stream. */ + void *begin_stream_ctx; + + /* Parameter passed to @consume_chunk. */ + void *consume_chunk_ctx; + + /* Parameter passed to @end_stream. */ + void *end_stream_ctx; +}; extern int read_stream_list(struct list_head *stream_list, size_t list_head_offset, - read_stream_list_begin_stream_t begin_stream, - consume_data_callback_t consume_chunk, - read_stream_list_end_stream_t end_stream, u32 cb_chunk_size, - void *cb_ctx); + const struct read_stream_list_callbacks *cbs); -/* Functions to extract a resource. */ +/* Functions for stream extraction. */ extern int -extract_stream(const struct wim_lookup_table_entry *lte, +extract_stream(struct wim_lookup_table_entry *lte, u64 size, consume_data_callback_t extract_chunk, void *extract_chunk_arg); extern int -extract_stream_to_fd(const struct wim_lookup_table_entry *lte, +extract_stream_to_fd(struct wim_lookup_table_entry *lte, struct filedes *fd, u64 size); -/* Miscellaneous resource functions. */ +extern int +extract_chunk_to_fd(const void *chunk, size_t size, void *_fd_p); + +/* Miscellaneous stream functions. */ extern int sha1_stream(struct wim_lookup_table_entry *lte); diff --git a/src/extract.c b/src/extract.c index 1907e699..59396717 100644 --- a/src/extract.c +++ b/src/extract.c @@ -165,7 +165,7 @@ ref_stream_to_extract(struct wim_lookup_table_entry *lte, lte_dentries = REALLOC(prev_lte_dentries, alloc_lte_dentries * sizeof(lte_dentries[0])); - if (!lte_dentries) + if (lte_dentries == NULL) return WIMLIB_ERR_NOMEM; if (prev_lte_dentries == NULL) { memcpy(lte_dentries, @@ -1202,23 +1202,49 @@ dentry_extract(struct wim_dentry *dentry, void *_ctx) return extract_streams(path, ctx, dentry, NULL, NULL); } +/* Creates a temporary file opened for writing. The open file descriptor is + * returned in @fd_ret and its name is returned in @name_ret (dynamically + * allocated). */ +static int +create_temporary_file(struct filedes *fd_ret, tchar **name_ret) +{ + tchar *name; + int raw_fd; + +retry: + name = ttempnam(NULL, T("wimlib")); + if (name == NULL) { + ERROR_WITH_ERRNO("Failed to create temporary filename"); + return WIMLIB_ERR_NOMEM; + } + + raw_fd = topen(name, O_WRONLY | O_CREAT | O_EXCL | O_BINARY, 0600); + + if (raw_fd < 0) { + int errno_save = errno; + FREE(name); + if (errno_save == EEXIST) + goto retry; + ERROR_WITH_ERRNO("Failed to open temporary file \"%"TS"\"", name); + return WIMLIB_ERR_OPEN; + } + + filedes_init(fd_ret, raw_fd); + *name_ret = name; + return 0; +} + /* Extract all instances of the stream @lte that are being extracted in this - * call of extract_tree(). @can_seek specifies whether the WIM file descriptor - * is seekable or not (e.g. is a pipe). If not and the stream needs to be - * extracted multiple times, it is extracted to a temporary file first. - * - * This is intended for use with sequential extraction of a WIM image - * (WIMLIB_EXTRACT_FLAG_SEQUENTIAL specified). */ + * call of extract_tree(), but actually read the stream data from @lte_override. + */ static int extract_stream_instances(struct wim_lookup_table_entry *lte, - struct apply_ctx *ctx, bool can_seek) + struct wim_lookup_table_entry *lte_override, + struct apply_ctx *ctx) { struct wim_dentry **lte_dentries; - struct wim_lookup_table_entry *lte_tmp = NULL; - struct wim_lookup_table_entry *lte_override; - tchar *stream_tmp_filename = NULL; tchar path[ctx->ops->path_max]; - unsigned i; + size_t i; int ret; if (lte->out_refcnt <= ARRAY_LEN(lte->inline_lte_dentries)) @@ -1226,49 +1252,6 @@ extract_stream_instances(struct wim_lookup_table_entry *lte, else lte_dentries = lte->lte_dentries; - if (likely(can_seek || lte->out_refcnt < 2)) { - lte_override = lte; - } else { - /* Need to extract stream to temporary file. */ - struct filedes fd; - int raw_fd; - - stream_tmp_filename = ttempnam(NULL, T("wimlib")); - if (!stream_tmp_filename) { - ERROR_WITH_ERRNO("Failed to create temporary filename"); - ret = WIMLIB_ERR_OPEN; - goto out; - } - - lte_tmp = memdup(lte, sizeof(struct wim_lookup_table_entry)); - if (!lte_tmp) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_stream_tmp_filename; - } - lte_tmp->resource_location = RESOURCE_IN_FILE_ON_DISK; - lte_tmp->file_on_disk = stream_tmp_filename; - lte_override = lte_tmp; - - raw_fd = topen(stream_tmp_filename, - O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0600); - if (raw_fd < 0) { - ERROR_WITH_ERRNO("Failed to open temporary file"); - ret = WIMLIB_ERR_OPEN; - goto out_free_lte_tmp; - } - filedes_init(&fd, raw_fd); - ret = extract_stream_to_fd(lte, &fd, lte->size); - if (filedes_close(&fd) && !ret) - ret = WIMLIB_ERR_WRITE; - if (ret) - goto out_unlink_stream_tmp_file; - } - - /* Extract all instances of the stream, reading either from the stream - * in the WIM file or from the temporary file containing the stream. - * dentry->tmp_flag is used to ensure that each dentry is processed only - * once regardless of how many times this stream appears in the streams - * of the corresponding inode. */ for (i = 0; i < lte->out_refcnt; i++) { struct wim_dentry *dentry = lte_dentries[i]; @@ -1276,8 +1259,7 @@ extract_stream_instances(struct wim_lookup_table_entry *lte, continue; if (!build_extraction_path(path, dentry, ctx)) continue; - ret = extract_streams(path, ctx, dentry, - lte, lte_override); + ret = extract_streams(path, ctx, dentry, lte, lte_override); if (ret) goto out_clear_tmp_flags; dentry->tmp_flag = 1; @@ -1286,38 +1268,140 @@ extract_stream_instances(struct wim_lookup_table_entry *lte, out_clear_tmp_flags: for (i = 0; i < lte->out_refcnt; i++) lte_dentries[i]->tmp_flag = 0; -out_unlink_stream_tmp_file: - if (stream_tmp_filename) - tunlink(stream_tmp_filename); -out_free_lte_tmp: - FREE(lte_tmp); -out_free_stream_tmp_filename: - FREE(stream_tmp_filename); -out: + return ret; +} + +/* Determine whether the specified stream needs to be extracted to a temporary + * file or not. + * + * @lte->out_refcnt specifies the number of instances of this stream that must + * be extracted. + * + * @is_partial_res is %true if this stream is just one of multiple in a single + * WIM resource being extracted. */ +static bool +need_tmpfile_to_extract(struct wim_lookup_table_entry *lte, + bool is_partial_res) +{ + /* Temporary file is always required when reading a partial resource, + * since in that case we retrieve all the contained streams in one pass. + * */ + if (is_partial_res) + return true; + + /* Otherwise we don't need a temporary file if only a single instance of + * the stream is needed. */ + if (lte->out_refcnt == 1) + return false; + + wimlib_assert(lte->out_refcnt >= 2); + + /* We also don't need a temporary file if random access to the stream is + * allowed. */ + if (lte->resource_location != RESOURCE_IN_WIM || + filedes_is_seekable(<e->rspec->wim->in_fd)) + return false; + + return true; +} + +static int +begin_extract_stream_to_tmpfile(struct wim_lookup_table_entry *lte, + bool is_partial_res, void *_ctx) +{ + struct apply_ctx *ctx = _ctx; + int ret; + + if (!need_tmpfile_to_extract(lte, is_partial_res)) { + DEBUG("Temporary file not needed " + "for stream (size=%"PRIu64")", lte->size); + ret = extract_stream_instances(lte, lte, ctx); + if (ret) + return ret; + + /* Negative return value here means the function was successful, + * but the consume_chunk and end_chunk callbacks need not be + * called. */ + return -1; + } + + DEBUG("Temporary file needed for stream (size=%"PRIu64")", lte->size); + return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name); +} + +static int +end_extract_stream_to_tmpfile(struct wim_lookup_table_entry *lte, + int status, void *_ctx) +{ + struct apply_ctx *ctx = _ctx; + struct wim_lookup_table_entry lte_override; + int ret; + int errno_save = errno; + + ret = filedes_close(&ctx->tmpfile_fd); + + if (status) { + ret = status; + errno = errno_save; + goto out_delete_tmpfile; + } + + if (ret) { + ERROR_WITH_ERRNO("Error writing temporary file %"TS, ctx->tmpfile_name); + ret = WIMLIB_ERR_WRITE; + goto out_delete_tmpfile; + } + + /* Now that a full stream has been extracted to a temporary file, + * extract all instances of it to the actual target. */ + + memcpy(<e_override, lte, sizeof(struct wim_lookup_table_entry)); + lte_override.resource_location = RESOURCE_IN_FILE_ON_DISK; + lte_override.file_on_disk = ctx->tmpfile_name; + + ret = extract_stream_instances(lte, <e_override, ctx); + +out_delete_tmpfile: + errno_save = errno; + tunlink(ctx->tmpfile_name); + FREE(ctx->tmpfile_name); + errno = errno_save; return ret; } /* Extracts a list of streams (ctx.stream_list), assuming that the directory * structure and empty files were already created. This relies on the * per-`struct wim_lookup_table_entry' list of dentries that reference each - * stream that was constructed earlier. Streams are extracted exactly in the - * order of the stream list; however, unless the WIM's file descriptor is - * detected to be non-seekable, streams may be read from the WIM file more than - * one time if multiple copies need to be extracted. */ + * stream that was constructed earlier. */ static int extract_stream_list(struct apply_ctx *ctx) { - struct wim_lookup_table_entry *lte; - bool can_seek; - int ret; + if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) { + /* Sequential extraction: read the streams in the order in which + * they appear in the WIM file. */ + struct read_stream_list_callbacks cbs = { + .begin_stream = begin_extract_stream_to_tmpfile, + .begin_stream_ctx = ctx, + .consume_chunk = extract_chunk_to_fd, + .consume_chunk_ctx = &ctx->tmpfile_fd, + .end_stream = end_extract_stream_to_tmpfile, + .end_stream_ctx = ctx, + }; + return read_stream_list(&ctx->stream_list, + offsetof(struct wim_lookup_table_entry, extraction_list), + 0, &cbs); + } else { + /* Extract the streams in unsorted order. */ + struct wim_lookup_table_entry *lte; + int ret; - can_seek = (lseek(ctx->wim->in_fd.fd, 0, SEEK_CUR) != -1); - list_for_each_entry(lte, &ctx->stream_list, extraction_list) { - ret = extract_stream_instances(lte, ctx, can_seek); - if (ret) - return ret; + list_for_each_entry(lte, &ctx->stream_list, extraction_list) { + ret = extract_stream_instances(lte, lte, ctx); + if (ret) + return ret; + } + return 0; } - return 0; } #define PWM_ALLOW_WIM_HDR 0x00001 @@ -1376,25 +1460,6 @@ read_error: return ret; } -static int -skip_pwm_chunk_cb(const void *chunk, size_t chunk_size, void *_ctx) -{ - return 0; -} - -/* Skip over an unneeded stream in a pipable WIM being read from a pipe. */ -static int -skip_pwm_stream(struct wim_lookup_table_entry *lte) -{ - return read_partial_wim_resource(lte, - lte->size, - skip_pwm_chunk_cb, - lte_cchunk_size(lte), - NULL, - WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS, - 0); -} - static int extract_streams_from_pipe(struct apply_ctx *ctx) { @@ -1444,19 +1509,60 @@ extract_streams_from_pipe(struct apply_ctx *ctx) && (needed_lte = lookup_resource(lookup_table, found_lte->hash)) && (needed_lte->out_refcnt)) { - lte_unbind_wim_resource_spec(found_lte); - lte_bind_wim_resource_spec(needed_lte, rspec); + char *tmpfile_name = NULL; + struct wim_lookup_table_entry *lte_override; + struct wim_lookup_table_entry tmpfile_lte; + needed_lte->offset_in_res = found_lte->offset_in_res; needed_lte->flags = found_lte->flags; needed_lte->size = found_lte->size; - ret = extract_stream_instances(needed_lte, ctx, false); - lte_unbind_wim_resource_spec(needed_lte); + lte_unbind_wim_resource_spec(found_lte); + lte_bind_wim_resource_spec(needed_lte, rspec); + + if (needed_lte->out_refcnt > 1) { + + struct filedes tmpfile_fd; + + /* Extract stream to temporary file. */ + ret = create_temporary_file(&tmpfile_fd, &tmpfile_name); + if (ret) + goto out_free_found_lte; + + ret = extract_stream_to_fd(needed_lte, &tmpfile_fd, + needed_lte->size); + if (ret) { + filedes_close(&tmpfile_fd); + goto delete_tmpfile; + } + + if (filedes_close(&tmpfile_fd)) { + ERROR_WITH_ERRNO("Error writing to temporary " + "file \"%"TS"\"", tmpfile_name); + ret = WIMLIB_ERR_WRITE; + goto delete_tmpfile; + } + memcpy(&tmpfile_lte, needed_lte, + sizeof(struct wim_lookup_table_entry)); + tmpfile_lte.resource_location = RESOURCE_IN_FILE_ON_DISK; + tmpfile_lte.file_on_disk = tmpfile_name; + lte_override = &tmpfile_lte; + } else { + lte_override = needed_lte; + } + + ret = extract_stream_instances(needed_lte, lte_override, ctx); + delete_tmpfile: + lte_unbind_wim_resource_spec(needed_lte); + if (tmpfile_name) { + tunlink(tmpfile_name); + FREE(tmpfile_name); + } if (ret) goto out_free_found_lte; ctx->num_streams_remaining--; } else if (found_lte->resource_location != RESOURCE_NONEXISTENT) { - ret = skip_pwm_stream(found_lte); + ret = skip_wim_stream(found_lte); if (ret) goto out_free_found_lte; } else { @@ -2268,21 +2374,6 @@ extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target, goto out_teardown_stream_list; } - /* If a sequential extraction was specified, sort the streams to be - * extracted by their position in the WIM file so that the WIM file can - * be read sequentially. */ - if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SEQUENTIAL | - WIMLIB_EXTRACT_FLAG_FROM_PIPE)) - == WIMLIB_EXTRACT_FLAG_SEQUENTIAL) - { - ret = sort_stream_list_by_sequential_order( - &ctx.stream_list, - offsetof(struct wim_lookup_table_entry, - extraction_list)); - if (ret) - goto out_teardown_stream_list; - } - if (ctx.ops->realpath_works_on_nonexisting_files && ((extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX) || ctx.ops->requires_realtarget_in_paths)) @@ -2920,7 +3011,7 @@ wimlib_extract_image_from_pipe(int pipe_fd, const tchar *image_num_or_name, } else { /* Metadata resource is not for the image being * extracted. Skip over it. */ - ret = skip_pwm_stream(metadata_lte); + ret = skip_wim_stream(metadata_lte); if (ret) goto out_wimlib_free; } diff --git a/src/lookup_table.c b/src/lookup_table.c index 43287341..fe0872cc 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -82,7 +82,6 @@ new_lookup_table_entry(void) } lte->refcnt = 1; BUILD_BUG_ON(RESOURCE_NONEXISTENT != 0); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); return lte; } @@ -311,6 +310,7 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) const struct wim_lookup_table_entry *lte1, *lte2; int v; WIMStruct *wim1, *wim2; + u64 offset1, offset2; lte1 = *(const struct wim_lookup_table_entry**)p1; lte2 = *(const struct wim_lookup_table_entry**)p2; @@ -339,9 +339,12 @@ cmp_streams_by_sequential_order(const void *p1, const void *p2) return v; /* Compare by offset. */ - if (lte1->rspec->offset_in_wim < lte2->rspec->offset_in_wim) + offset1 = lte1->rspec->offset_in_wim + lte1->offset_in_res; + offset2 = lte2->rspec->offset_in_wim + lte2->offset_in_res; + + if (offset1 < offset2) return -1; - if (lte1->rspec->offset_in_wim > lte2->rspec->offset_in_wim) + if (offset1 > offset2) return 1; return 0; case RESOURCE_IN_FILE_ON_DISK: diff --git a/src/mount_image.c b/src/mount_image.c index 7543b6e4..99db4ab3 100644 --- a/src/mount_image.c +++ b/src/mount_image.c @@ -2414,6 +2414,11 @@ wimlib_mount_image(WIMStruct *wim, int image, const char *dir, return ret; } + if (wim->hdr.wim_version == WIM_VERSION_STREAM_CONCAT) { + WARNING("WIM contains streams not compressed independently; " + "access may be slow."); + } + ret = select_wim_image(wim, image); if (ret) return ret; diff --git a/src/ntfs-3g_capture.c b/src/ntfs-3g_capture.c index 829c41c9..7a07cd3e 100644 --- a/src/ntfs-3g_capture.c +++ b/src/ntfs-3g_capture.c @@ -78,8 +78,7 @@ int read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, consume_data_callback_t cb, - u32 in_chunk_size, - void *ctx_or_buf, + void *cb_ctx, int _ignored_flags) { struct ntfs_location *loc = lte->ntfs_loc; @@ -88,9 +87,8 @@ read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte, ntfs_attr *na; s64 pos; s64 bytes_remaining; - void *out_buf; - bool out_buf_malloced; int ret; + u8 buf[BUFFER_SIZE]; ni = ntfs_pathname_to_inode(vol, NULL, loc->path); if (!ni) { @@ -105,44 +103,22 @@ read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte, goto out_close_ntfs_inode; } - out_buf_malloced = false; - if (cb) { - if (in_chunk_size <= STACK_MAX) { - out_buf = alloca(in_chunk_size); - } else { - out_buf = MALLOC(in_chunk_size); - if (out_buf == NULL) { - ret = WIMLIB_ERR_NOMEM; - goto out_close_ntfs_attr; - } - out_buf_malloced = true; - } - } else { - out_buf = ctx_or_buf; - } pos = (loc->is_reparse_point) ? 8 : 0; bytes_remaining = size; while (bytes_remaining) { - s64 to_read = min(bytes_remaining, in_chunk_size); - if (ntfs_attr_pread(na, pos, to_read, out_buf) != to_read) { + s64 to_read = min(bytes_remaining, sizeof(buf)); + if (ntfs_attr_pread(na, pos, to_read, buf) != to_read) { ERROR_WITH_ERRNO("Error reading \"%"TS"\"", loc->path); ret = WIMLIB_ERR_NTFS_3G; - goto out_free_memory; + goto out_close_ntfs_attr; } pos += to_read; bytes_remaining -= to_read; - if (cb) { - ret = cb(out_buf, to_read, ctx_or_buf); - if (ret) - goto out_free_memory; - } else { - out_buf += to_read; - } + ret = cb(buf, to_read, cb_ctx); + if (ret) + goto out_close_ntfs_attr; } ret = 0; -out_free_memory: - if (out_buf_malloced) - FREE(out_buf); out_close_ntfs_attr: ntfs_attr_close(na); out_close_ntfs_inode: diff --git a/src/resource.c b/src/resource.c index 1539460e..90197d79 100644 --- a/src/resource.c +++ b/src/resource.c @@ -135,7 +135,41 @@ struct alt_chunk_table_header_disk { * the chunks. */ } _packed_attribute; -/* Read data from a compressed WIM resource. */ +/* + * read_compressed_wim_resource() - + * + * Read data from a compressed WIM resource. + * + * @rspec + * Specification of the compressed WIM resource to read from. + * @ranges + * Nonoverlapping, nonempty ranges of the uncompressed resource data to + * read, sorted by increasing offset. + * @num_ranges + * Number of ranges in @ranges; must be at least 1. + * @cb + * Callback function to feed the data being read. Each call provides the + * next chunk of the requested data. Each chunk will be of nonzero size + * and will not cross range boundaries, but otherwise is of unspecified + * size. + * @cb_ctx + * Parameter to pass to @cb_ctx. + * @raw_chunks_mode + * If %true, this function will provide the raw compressed chunks of the + * resource rather than the uncompressed data. In this mode, only a single + * data range can be requested, and it must cover the entire uncompressed + * resource. + * + * Possible return values: + * + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_READ (errno set) + * WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0) + * WIMLIB_ERR_NOMEM (errno set to ENOMEM) + * WIMLIB_ERR_DECOMPRESSION (errno set to EINVAL) + * + * or other error code returned by the @cb function. + */ static int read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const struct data_range * const ranges, @@ -161,6 +195,9 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, wimlib_assert(cb != NULL); wimlib_assert(num_ranges != 0); for (size_t i = 0; i < num_ranges; i++) { + DEBUG("Range %zu/%zu: %"PRIu64"@+%"PRIu64" / %"PRIu64, + i + 1, num_ranges, ranges[i].size, ranges[i].offset, + rspec->uncompressed_size); wimlib_assert(ranges[i].size != 0); wimlib_assert(ranges[i].offset + ranges[i].size >= ranges[i].size); wimlib_assert(ranges[i].offset + ranges[i].size <= rspec->uncompressed_size); @@ -168,6 +205,12 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, for (size_t i = 0; i < num_ranges - 1; i++) wimlib_assert(ranges[i].offset + ranges[i].size <= ranges[i + 1].offset); + if (raw_chunks_mode) { + wimlib_assert(num_ranges == 1); + wimlib_assert(ranges[0].offset == 0); + wimlib_assert(ranges[0].size == rspec->uncompressed_size); + } + /* Get the offsets of the first and last bytes of the read. */ const u64 first_offset = ranges[0].offset; const u64 last_offset = ranges[num_ranges - 1].offset + ranges[num_ranges - 1].size - 1; @@ -242,7 +285,8 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Calculate the size of the chunk table in bytes. */ const u64 chunk_table_size = num_chunk_entries * chunk_entry_size; - /* Includes header */ + /* Calculate the size of the chunk table in bytes, including the header + * in the case of the alternate chunk table format. */ const u64 chunk_table_full_size = (alt_chunk_table) ? chunk_table_size + sizeof(struct alt_chunk_table_header_disk) : chunk_table_size; @@ -332,6 +376,8 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, *chunk_offsets_p++ = cur_offset; cur_offset += entry; } + if (last_needed_chunk < num_chunks - 1) + *chunk_offsets_p = cur_offset; } else { if (read_start_chunk == 0) *chunk_offsets_p++ = 0; @@ -380,12 +426,13 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, } } - /* Read and process each needed chunk. */ + /* Set current data range. */ const struct data_range *cur_range = ranges; const struct data_range * const end_range = &ranges[num_ranges]; u64 cur_range_pos = cur_range->offset; u64 cur_range_end = cur_range->offset + cur_range->size; + /* Read and process each needed chunk. */ for (u64 i = read_start_chunk; i <= last_needed_chunk; i++) { /* Calculate uncompressed size of next chunk. */ @@ -426,7 +473,8 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, if (rspec->is_pipable) cur_read_offset += sizeof(struct pwm_chunk_hdr); - /* Uncompressed offsets */ + /* Offsets in the uncompressed resource at which this chunk + * starts and ends. */ const u64 chunk_start_offset = i << chunk_order; const u64 chunk_end_offset = chunk_start_offset + chunk_usize; @@ -434,7 +482,6 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* The next range does not require data in this chunk, * so skip it. */ - cur_read_offset += chunk_csize; if (is_pipe_read) { u8 dummy; @@ -457,6 +504,9 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, goto read_error; if (chunk_csize != chunk_usize && !raw_chunks_mode) { + DEBUG("Decompressing chunk %"PRIu64" " + "(csize=%"PRIu64" usize=%"PRIu64"", + i, chunk_csize, chunk_usize); ret = decompress(cbuf, chunk_csize, ubuf, @@ -471,16 +521,21 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, } cb_buf = ubuf; } else { + /* Raw chunks requested, or data stored + * uncompressed. */ cb_buf = cbuf; } cur_read_offset += chunk_csize; - /* At least one range requires data in this chunk. - * However, the data fed to the callback function must - * not overlap range boundaries. */ + /* At least one range requires data in this chunk. */ do { size_t start, end, size; + /* Calculate how many bytes of data should be + * sent to the callback function, taking into + * account that data sent to the callback + * function must not overlap range boundaries. + */ start = cur_range_pos - chunk_start_offset; end = min(cur_range_end, chunk_end_offset) - chunk_start_offset; size = end - start; @@ -495,6 +550,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, cur_range_pos += size; if (cur_range_pos == cur_range_end) { + /* Advance to next range. */ if (++cur_range == end_range) { cur_range_pos = ~0ULL; } else { @@ -506,13 +562,15 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, } } - if (is_pipe_read - && last_offset == rspec->uncompressed_size - 1 - && chunk_table_size) + if (is_pipe_read && + last_offset == rspec->uncompressed_size - 1 && + chunk_table_size) { u8 dummy; - /* Skip chunk table at end of pipable resource. */ - + /* If reading a pipable resource from a pipe and the full data + * was requested, skip the chunk table at the end so that the + * file descriptor is fully clear of the resource after this + * returns. */ cur_read_offset += chunk_table_size; ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1); if (ret) @@ -532,65 +590,40 @@ out_free_memory: oom: ERROR("Not enough memory available to read size=%"PRIu64" bytes " - "from compressed resource!", last_offset - first_offset + 1); + "from compressed WIM resource!", last_offset - first_offset + 1); errno = ENOMEM; ret = WIMLIB_ERR_NOMEM; goto out_free_memory; read_error: - ERROR_WITH_ERRNO("Error reading compressed file resource!"); + ERROR_WITH_ERRNO("Error reading compressed WIM resource!"); goto out_free_memory; } -/* Read raw data from a file descriptor at the specified offset. */ +/* Read raw data from a file descriptor at the specified offset, feeding the + * data it in chunks into the specified callback function. */ static int -read_raw_file_data(struct filedes *in_fd, u64 size, consume_data_callback_t cb, - u32 cb_chunk_size, void *ctx_or_buf, u64 offset) +read_raw_file_data(struct filedes *in_fd, u64 size, + consume_data_callback_t cb, void *cb_ctx, u64 offset) { + u8 buf[BUFFER_SIZE]; + size_t bytes_to_read; int ret; - u8 *tmp_buf; - bool tmp_buf_malloced = false; - if (cb) { - /* Send data to callback function in chunks. */ - if (cb_chunk_size <= STACK_MAX) { - tmp_buf = alloca(cb_chunk_size); - } else { - tmp_buf = MALLOC(cb_chunk_size); - if (tmp_buf == NULL) { - ret = WIMLIB_ERR_NOMEM; - goto out; - } - tmp_buf_malloced = true; - } - - while (size) { - size_t bytes_to_read = min(cb_chunk_size, size); - ret = full_pread(in_fd, tmp_buf, bytes_to_read, - offset); - if (ret) - goto read_error; - ret = cb(tmp_buf, bytes_to_read, ctx_or_buf); - if (ret) - goto out; - size -= bytes_to_read; - offset += bytes_to_read; + while (size) { + bytes_to_read = min(sizeof(buf), size); + ret = full_pread(in_fd, buf, bytes_to_read, offset); + if (ret) { + ERROR_WITH_ERRNO("Read error"); + return ret; } - } else { - /* Read data directly into buffer. */ - ret = full_pread(in_fd, ctx_or_buf, size, offset); + ret = cb(buf, bytes_to_read, cb_ctx); if (ret) - goto read_error; + return ret; + size -= bytes_to_read; + offset += bytes_to_read; } - ret = 0; - goto out; - -read_error: - ERROR_WITH_ERRNO("Read error"); -out: - if (tmp_buf_malloced) - FREE(tmp_buf); - return ret; + return 0; } static int @@ -602,71 +635,15 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx) return 0; } -struct rechunker_context { - u8 *buffer; - u32 buffer_filled; - u32 cb_chunk_size; - - const struct data_range *ranges; - size_t num_ranges; - size_t cur_range; - u64 range_bytes_remaining; - - consume_data_callback_t cb; - void *cb_ctx; -}; - -static int -rechunker_cb(const void *chunk, size_t size, void *_ctx) -{ - struct rechunker_context *ctx = _ctx; - const u8 *chunkptr = chunk; - size_t bytes_to_copy; - int ret; - - wimlib_assert(ctx->cur_range != ctx->num_ranges); - - while (size) { - bytes_to_copy = size; - - if (bytes_to_copy > ctx->cb_chunk_size - ctx->buffer_filled) - bytes_to_copy = ctx->cb_chunk_size - ctx->buffer_filled; - - if (bytes_to_copy > ctx->range_bytes_remaining - ctx->buffer_filled) - bytes_to_copy = ctx->range_bytes_remaining - ctx->buffer_filled; - - memcpy(&ctx->buffer[ctx->buffer_filled], chunkptr, bytes_to_copy); - - ctx->buffer_filled += bytes_to_copy; - chunkptr += bytes_to_copy; - size -= bytes_to_copy; - ctx->range_bytes_remaining -= bytes_to_copy; - - if (ctx->buffer_filled == ctx->cb_chunk_size || - ctx->range_bytes_remaining == 0) - { - ret = (*ctx->cb)(ctx->buffer, ctx->buffer_filled, ctx->cb_ctx); - if (ret) - return ret; - ctx->buffer_filled = 0; - - if (ctx->range_bytes_remaining == 0 && - ++ctx->cur_range != ctx->num_ranges) - ctx->range_bytes_remaining = ctx->ranges[ctx->cur_range].size; - } - } - return 0; -} - /* * read_partial_wim_resource()- * * Read a range of data from an uncompressed or compressed resource in a WIM - * file. Data is written into a buffer or fed into a callback function, as - * documented in read_stream_prefix(). + * file. Data is fed chunk-by-chunk into the callback function @cb, passing it + * the argument @cb_ctx. * * By default, this function provides the uncompressed data of the resource, and - * @size and @offset and interpreted relative to the uncompressed contents of + * @offset and @size and interpreted relative to the uncompressed contents of * the resource. This behavior can be modified by either of the following * flags: * @@ -689,135 +666,99 @@ rechunker_cb(const void *chunk, size_t size, void *_ctx) * * or other error code returned by the @cb function. */ -int -read_partial_wim_resource(const struct wim_lookup_table_entry *lte, - u64 size, consume_data_callback_t cb, - u32 cb_chunk_size, void *ctx_or_buf, - int flags, u64 offset) +static int +read_partial_wim_resource(const struct wim_resource_spec *rspec, + u64 offset, u64 size, consume_data_callback_t cb, + void *cb_ctx, int flags) { - const struct wim_resource_spec *rspec; - struct filedes *in_fd; - - /* Verify parameters. */ - wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); - rspec = lte->rspec; - in_fd = &rspec->wim->in_fd; - if (cb) - wimlib_assert(is_power_of_2(cb_chunk_size)); - if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) { - /* Raw chunks mode is subject to the restrictions noted. */ - wimlib_assert(!lte_is_partial(lte)); - wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL)); - wimlib_assert(cb_chunk_size == rspec->cchunk_size); - wimlib_assert(size == lte->size); + /* Sanity checks. */ + if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) { + wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)); + wimlib_assert(offset + size >= offset); + wimlib_assert(offset + size <= rspec->size_in_wim); + } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) { wimlib_assert(offset == 0); - } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) { - /* Raw full mode: read must not overrun end of store size. */ - wimlib_assert(!lte_is_partial(lte)); - wimlib_assert(offset + size >= size && - offset + size <= rspec->size_in_wim); + wimlib_assert(offset == rspec->uncompressed_size); } else { - /* Normal mode: read must not overrun end of original size. */ - wimlib_assert(offset + size >= size && - offset + size <= lte->size); + wimlib_assert(offset + size >= offset); + wimlib_assert(offset + size <= rspec->uncompressed_size); } - DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64"[+%"PRIu64"] " - "from %"PRIu64"(%"PRIu64") @ +%"PRIu64" " - "(readflags 0x%08x, resflags 0x%02x%s)", - size, offset, lte->offset_in_res, - rspec->size_in_wim, - rspec->uncompressed_size, - rspec->offset_in_wim, - flags, lte->flags, - (rspec->is_pipable ? ", pipable" : "")); + DEBUG("Reading %"PRIu64" @ %"PRIu64" from WIM resource " + "%"PRIu64" => %"PRIu64" @ %"PRIu64" (flags 0x%08x)", + size, offset, rspec->uncompressed_size, + rspec->size_in_wim, rspec->offset_in_wim, flags); + + /* Trivial case. */ + if (size == 0) + return 0; if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) || rspec->ctype == WIMLIB_COMPRESSION_TYPE_NONE) { - return read_raw_file_data(in_fd, + return read_raw_file_data(&rspec->wim->in_fd, size, cb, - cb_chunk_size, - ctx_or_buf, - rspec->offset_in_wim + lte->offset_in_res + offset); + cb_ctx, + rspec->offset_in_wim + offset); } else { - bool raw_chunks; - struct data_range range; - consume_data_callback_t internal_cb; - void *internal_cb_ctx; - u8 *buf; - bool rechunker_buf_malloced = false; - struct rechunker_context *rechunker_ctx; - int ret; - - if (size == 0) - return 0; - - range.offset = lte->offset_in_res + offset; - range.size = size; - raw_chunks = !!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS); - - if (cb != NULL && - cb_chunk_size == rspec->cchunk_size && - !(rspec->flags & WIM_RESHDR_FLAG_CONCAT)) - { - internal_cb = cb; - internal_cb_ctx = ctx_or_buf; - } else if (cb == NULL) { - buf = ctx_or_buf; - internal_cb = bufferer_cb; - internal_cb_ctx = &buf; - } else { - rechunker_ctx = alloca(sizeof(struct rechunker_context)); - - if (cb_chunk_size <= STACK_MAX) { - rechunker_ctx->buffer = alloca(cb_chunk_size); - } else { - rechunker_ctx->buffer = MALLOC(cb_chunk_size); - if (rechunker_ctx->buffer == NULL) - return WIMLIB_ERR_NOMEM; - rechunker_buf_malloced = true; - } - rechunker_ctx->buffer_filled = 0; - rechunker_ctx->cb_chunk_size = cb_chunk_size; - - rechunker_ctx->ranges = ⦥ - rechunker_ctx->num_ranges = 1; - rechunker_ctx->cur_range = 0; - rechunker_ctx->range_bytes_remaining = range.size; + bool raw_chunks = !!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS); + struct data_range range = { + .offset = offset, + .size = size, + }; + return read_compressed_wim_resource(rspec, &range, 1, + cb, cb_ctx, raw_chunks); + } +} - rechunker_ctx->cb = cb; - rechunker_ctx->cb_ctx = ctx_or_buf; +int +read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte, + size_t size, u64 offset, void *_buf) +{ + u8 *buf = _buf; - internal_cb = rechunker_cb; - internal_cb_ctx = rechunker_ctx; - } + wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); - ret = read_compressed_wim_resource(rspec, &range, 1, - internal_cb, internal_cb_ctx, - raw_chunks); - if (rechunker_buf_malloced) - FREE(rechunker_ctx->buffer); + return read_partial_wim_resource(lte->rspec, + lte->offset_in_res + offset, + size, + bufferer_cb, + &buf, + 0); +} - return ret; - } +static int +skip_chunk_cb(const void *chunk, size_t size, void *_ctx) +{ + return 0; } +/* Skip over the data of the specified stream, which must correspond to a full + * WIM resource. */ int -read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte, - size_t size, u64 offset, void *buf) +skip_wim_stream(struct wim_lookup_table_entry *lte) { - return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset); + wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); + wimlib_assert(!lte_is_partial(lte)); + return read_partial_wim_resource(lte->rspec, + 0, + lte->rspec->uncompressed_size, + skip_chunk_cb, + NULL, + WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS); } static int read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, u32 cb_chunk_size, - void *ctx_or_buf, int flags) + consume_data_callback_t cb, void *cb_ctx, int flags) { - return read_partial_wim_resource(lte, size, cb, cb_chunk_size, - ctx_or_buf, flags, 0); + return read_partial_wim_resource(lte->rspec, + lte->offset_in_res, + size, + cb, + cb_ctx, + flags); } #ifndef __WIN32__ @@ -828,18 +769,18 @@ read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, * This assumes the file can be accessed using the standard POSIX open(), * read(), and close(). On Windows this will not necessarily be the case (since * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be - * encrypted), so Windows uses its own code for its equivalent case. - */ + * encrypted), so Windows uses its own code for its equivalent case. */ static int read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, u32 cb_chunk_size, - void *ctx_or_buf, int _ignored_flags) + consume_data_callback_t cb, void *cb_ctx, + int _ignored_flags) { int ret; int raw_fd; struct filedes fd; wimlib_assert(size <= lte->size); + DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk); raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY); @@ -848,7 +789,7 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, return WIMLIB_ERR_OPEN; } filedes_init(&fd, raw_fd); - ret = read_raw_file_data(&fd, size, cb, cb_chunk_size, ctx_or_buf, 0); + ret = read_raw_file_data(&fd, size, cb, cb_ctx, 0); filedes_close(&fd); return ret; } @@ -859,34 +800,16 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, static int read_buffer_prefix(const struct wim_lookup_table_entry *lte, u64 size, consume_data_callback_t cb, - u32 cb_chunk_size, void *ctx_or_buf, int _ignored_flags) + void *cb_ctx, int _ignored_flags) { wimlib_assert(size <= lte->size); - - if (cb) { - /* Feed the data into the callback function in - * appropriately-sized chunks. */ - int ret; - u32 chunk_size; - - for (u64 offset = 0; offset < size; offset += chunk_size) { - chunk_size = min(cb_chunk_size, size - offset); - ret = cb((const u8*)lte->attached_buffer + offset, - chunk_size, ctx_or_buf); - if (ret) - return ret; - } - } else { - /* Copy the data directly into the specified buffer. */ - memcpy(ctx_or_buf, lte->attached_buffer, size); - } - return 0; + return (*cb)(lte->attached_buffer, size, cb_ctx); } typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry *lte, - u64 size, consume_data_callback_t cb, - u32 cb_chunk_size, void *ctx_or_buf, - int flags); + u64 size, + consume_data_callback_t cb, + void *cb_ctx, int flags); /* * read_stream_prefix()- @@ -895,33 +818,20 @@ typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry * any one of several locations, such as in a WIM file (compressed or * uncompressed), in an external file, or directly in an in-memory buffer. * - * This function feeds the data either to a callback function (@cb != NULL, - * passing it @ctx_or_buf), or write it directly into a buffer (@cb == NULL, - * @ctx_or_buf specifies the buffer, which must have room for at least @size - * bytes). - * - * When (@cb != NULL), @cb_chunk_size specifies the maximum size of data chunks - * to feed the callback function. @cb_chunk_size must be positive, and if the - * stream is in a WIM file, must be a power of 2. All chunks, except possibly - * the last one, will be this size. If (@cb == NULL), @cb_chunk_size is - * ignored. + * This function feeds the data to a callback function @cb. * * If the stream is located in a WIM file, @flags can be set as documented in * read_partial_wim_resource(). Otherwise @flags are ignored. * * Returns 0 on success; nonzero on error. A nonzero value will be returned if * the stream data cannot be successfully read (for a number of different - * reasons, depending on the stream location), or if a callback function was - * specified and it returned nonzero. + * reasons, depending on the stream location), or if @cb returned nonzero in + * which case that error code will be returned. */ int read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, u32 cb_chunk_size, - void *ctx_or_buf, int flags) + consume_data_callback_t cb, void *cb_ctx, int flags) { - /* This function merely verifies several preconditions, then passes - * control to an appropriate function for understanding each possible - * stream location. */ static const read_stream_prefix_handler_t handlers[] = { [RESOURCE_IN_WIM] = read_wim_stream_prefix, #ifdef __WIN32__ @@ -942,17 +852,16 @@ read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, }; wimlib_assert(lte->resource_location < ARRAY_LEN(handlers) && handlers[lte->resource_location] != NULL); - wimlib_assert(cb == NULL || cb_chunk_size > 0); - return handlers[lte->resource_location](lte, size, cb, cb_chunk_size, - ctx_or_buf, flags); + return handlers[lte->resource_location](lte, size, cb, cb_ctx, flags); } /* Read the full uncompressed data of the specified stream into the specified * buffer, which must have space for at least lte->size bytes. */ int -read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf) +read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *_buf) { - return read_stream_prefix(lte, lte->size, NULL, 0, buf, 0); + u8 *buf = _buf; + return read_stream_prefix(lte, lte->size, bufferer_cb, &buf, 0); } /* Read the full uncompressed data of the specified stream. A buffer sufficient @@ -1021,56 +930,226 @@ wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_r return wim_resource_spec_to_data(&rspec, buf_ret); } -struct read_stream_list_ctx { - read_stream_list_begin_stream_t begin_stream; - consume_data_callback_t consume_chunk; - read_stream_list_end_stream_t end_stream; - void *begin_stream_ctx; - void *consume_chunk_ctx; - void *end_stream_ctx; +struct streamifier_context { + struct read_stream_list_callbacks cbs; struct wim_lookup_table_entry *cur_stream; u64 cur_stream_offset; struct wim_lookup_table_entry *final_stream; size_t list_head_offset; }; +/* Callback for translating raw resource data into streams. */ static int -read_stream_list_wrapper_cb(const void *chunk, size_t size, void *_ctx) +streamifier_cb(const void *chunk, size_t size, void *_ctx) { - struct read_stream_list_ctx *ctx = _ctx; + struct streamifier_context *ctx = _ctx; int ret; + DEBUG("%zu bytes passed to streamifier", size); + + wimlib_assert(ctx->cur_stream != NULL); + if (ctx->cur_stream_offset == 0) { /* Starting a new stream. */ - ret = (*ctx->begin_stream)(ctx->cur_stream, ctx->begin_stream_ctx); + DEBUG("Begin new stream (size=%"PRIu64").", ctx->cur_stream->size); + ret = (*ctx->cbs.begin_stream)(ctx->cur_stream, true, ctx->cbs.begin_stream_ctx); if (ret) return ret; } - ret = (*ctx->consume_chunk)(chunk, size, ctx->consume_chunk_ctx); + /* Consume the chunk. */ + ret = (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); if (ret) return ret; - ctx->cur_stream_offset += size; if (ctx->cur_stream_offset == ctx->cur_stream->size) { - /* Finished reading all the data for a stream; advance - * to the next one. */ - ret = (*ctx->end_stream)(ctx->cur_stream, ctx->end_stream_ctx); + /* Finished reading all the data for a stream; advance to the + * next one. */ + DEBUG("End stream (size=%"PRIu64").", ctx->cur_stream->size); + ret = (*ctx->cbs.end_stream)(ctx->cur_stream, 0, ctx->cbs.end_stream_ctx); if (ret) return ret; - if (ctx->cur_stream == ctx->final_stream) - return 0; + if (ctx->cur_stream != ctx->final_stream) { + struct list_head *cur = (struct list_head *) + ((u8*)ctx->cur_stream + ctx->list_head_offset); + struct list_head *next = cur->next; - struct list_head *cur = (struct list_head *) - ((u8*)ctx->cur_stream + ctx->list_head_offset); - struct list_head *next = cur->next; + ctx->cur_stream = (struct wim_lookup_table_entry *) + ((u8*)next - ctx->list_head_offset); - ctx->cur_stream = (struct wim_lookup_table_entry *) - ((u8*)next - ctx->list_head_offset); + ctx->cur_stream_offset = 0; + } else { + ctx->cur_stream = NULL; + } + } + return 0; +} - ctx->cur_stream_offset = 0; +struct hasher_context { + SHA_CTX sha_ctx; + struct read_stream_list_callbacks cbs; +}; + +/* Callback for starting to read a stream while calculating its SHA1 message + * digest. */ +static int +hasher_begin_stream(struct wim_lookup_table_entry *lte, bool is_partial_res, + void *_ctx) +{ + struct hasher_context *ctx = _ctx; + + sha1_init(&ctx->sha_ctx); + + if (ctx->cbs.begin_stream == NULL) + return 0; + else + return (*ctx->cbs.begin_stream)(lte, is_partial_res, + ctx->cbs.begin_stream_ctx); +} + +/* Callback for continuing to read a stream while calculating its SHA1 message + * digest. */ +static int +hasher_consume_chunk(const void *chunk, size_t size, void *_ctx) +{ + struct hasher_context *ctx = _ctx; + + sha1_update(&ctx->sha_ctx, chunk, size); + if (ctx->cbs.consume_chunk == NULL) + return 0; + else + return (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); +} + +/* Callback for finishing reading a stream while calculating its SHA1 message + * digest. */ +static int +hasher_end_stream(struct wim_lookup_table_entry *lte, int status, void *_ctx) +{ + struct hasher_context *ctx = _ctx; + u8 hash[SHA1_HASH_SIZE]; + int ret; + + if (status) { + ret = status; + goto out_next_cb; + } + + sha1_final(hash, &ctx->sha_ctx); + + if (lte->unhashed) { + /* No SHA1 message digest was present before; fill it in with + * the calculated value. */ + DEBUG("Set SHA1 message digest for stream (size=%"PRIu64").", lte->size); + copy_hash(lte->hash, hash); + } else { + /* A SHA1 message digest was present before. Verify that it is + * the same as the calculated value. */ + if (!hashes_equal(hash, lte->hash)) { + if (wimlib_print_errors) { + ERROR("Invalid SHA1 message digest " + "on the following WIM stream:"); + print_lookup_table_entry(lte, stderr); + if (lte->resource_location == RESOURCE_IN_WIM) + ERROR("The WIM file appears to be corrupt!"); + } + ret = WIMLIB_ERR_INVALID_RESOURCE_HASH; + errno = EINVAL; + goto out_next_cb; + } + DEBUG("SHA1 message digest okay for stream (size=%"PRIu64").", lte->size); + } + ret = 0; +out_next_cb: + if (ctx->cbs.end_stream == NULL) + return ret; + else + return (*ctx->cbs.end_stream)(lte, ret, ctx->cbs.end_stream_ctx); +} + +/* Read the full data of the stream @lte, passing the data into the specified + * callbacks (all of which are optional) and either checking or computing the + * SHA1 message digest of the stream. */ +static int +read_full_stream_with_sha1(struct wim_lookup_table_entry *lte, + const struct read_stream_list_callbacks *cbs) +{ + int ret; + + struct hasher_context hasher_ctx = { + .cbs = *cbs, + }; + + ret = hasher_begin_stream(lte, false, &hasher_ctx); + if (ret) + return ret; + + ret = read_stream_prefix(lte, lte->size, hasher_consume_chunk, + &hasher_ctx, 0); + + return hasher_end_stream(lte, ret, &hasher_ctx); +} + +struct rechunkifier_context { + u8 *buffer; + u32 buffer_filled; + u32 cb_chunk_size; + + const struct data_range *ranges; + size_t num_ranges; + size_t cur_range; + u64 range_bytes_remaining; + + consume_data_callback_t cb; + void *cb_ctx; +}; + +/* Wrapper callback for adjusting the data chunk size. */ +static int +rechunkifier_cb(const void *chunk, size_t size, void *_ctx) +{ + struct rechunkifier_context *ctx = _ctx; + const u8 *chunkptr = chunk; + size_t bytes_to_copy; + int ret; + + wimlib_assert(ctx->cur_range != ctx->num_ranges); + + while (size) { + + /* Append more data to the buffer. */ + bytes_to_copy = size; + + if (bytes_to_copy > ctx->cb_chunk_size - ctx->buffer_filled) + bytes_to_copy = ctx->cb_chunk_size - ctx->buffer_filled; + + if (bytes_to_copy > ctx->range_bytes_remaining - ctx->buffer_filled) + bytes_to_copy = ctx->range_bytes_remaining - ctx->buffer_filled; + + memcpy(&ctx->buffer[ctx->buffer_filled], chunkptr, bytes_to_copy); + + ctx->buffer_filled += bytes_to_copy; + chunkptr += bytes_to_copy; + size -= bytes_to_copy; + ctx->range_bytes_remaining -= bytes_to_copy; + + if (ctx->buffer_filled == ctx->cb_chunk_size || + ctx->range_bytes_remaining == 0) + { + /* Maximum chunk size reached, or current range ended. + * Call the next consume_data_callback_t and empty the + * buffer */ + ret = (*ctx->cb)(ctx->buffer, ctx->buffer_filled, ctx->cb_ctx); + if (ret) + return ret; + ctx->buffer_filled = 0; + + if (ctx->range_bytes_remaining == 0 && + ++ctx->cur_range != ctx->num_ranges) + ctx->range_bytes_remaining = ctx->ranges[ctx->cur_range].size; + } } return 0; } @@ -1088,18 +1167,11 @@ read_stream_list_wrapper_cb(const void *chunk, size_t size, void *_ctx) * @list_head_offset * Offset of the `struct list_head' within each `struct * wim_lookup_table_entry' that makes up the @stream_list. - * @begin_stream - * Callback for starting to process a stream. - * @consume_chunk - * Callback for receiving a chunk of stream data. - * @end_stream - * Callback for finishing the processing of a stream. * @cb_chunk_size * Size of chunks to provide to @consume_chunk. For a given stream, all * the chunks will be this size, except possibly the last which will be the - * remainder. - * @cb_ctx - * Parameter to pass to the callback functions. + * remainder. If @ + * @cbs TODO * * Returns 0 on success; a nonzero error code on failure. Failure can occur due * to an error reading the data or due to an error status being returned by any @@ -1108,11 +1180,8 @@ read_stream_list_wrapper_cb(const void *chunk, size_t size, void *_ctx) int read_stream_list(struct list_head *stream_list, size_t list_head_offset, - read_stream_list_begin_stream_t begin_stream, - consume_data_callback_t consume_chunk, - read_stream_list_end_stream_t end_stream, u32 cb_chunk_size, - void *cb_ctx) + const struct read_stream_list_callbacks *cbs) { int ret; struct list_head *cur, *next; @@ -1162,6 +1231,9 @@ read_stream_list(struct list_head *stream_list, * read and @lte_last specifies the last stream * in the resource that needs to be read. */ + DEBUG("Reading %zu streams combined in same " + "WIM resource", stream_count); + next = next2; struct data_range ranges[stream_count]; @@ -1181,28 +1253,73 @@ read_stream_list(struct list_head *stream_list, } } - struct rechunker_context rechunker_ctx = { - .buffer = MALLOC(cb_chunk_size), - .buffer_filled = 0, - .cb_chunk_size = cb_chunk_size, - .ranges = ranges, - .num_ranges = stream_count, - .cur_range = 0, - .range_bytes_remaining = ranges[0].size, - .cb = consume_chunk, - .cb_ctx = cb_ctx, - }; + /* Set up a chain of callbacks. + * + * The first level is the + * streamifier_cb, + * which takes in chunks of data and divides + * them into the constituent streams. + * + * The second level are the SHA1 message digest + * callbacks, which checksum each stream. + * + * rechunkifier_cb handles dividing the read + * data into chunks of maximum size + * @cb_chunk_size. If @cb_chunk_size is 0, then + * this callback is not needed. + * + * Finally, the last level of callbacks are + * @cbs, passed as arguments to this function. + */ + + struct rechunkifier_context *rechunkifier_ctx = NULL; + consume_data_callback_t last_cb; + void *last_cb_ctx; + + if (cb_chunk_size != 0) { + rechunkifier_ctx = alloca(sizeof(*rechunkifier_ctx)); + *rechunkifier_ctx = (struct rechunkifier_context) { + .buffer = MALLOC(cb_chunk_size), + .buffer_filled = 0, + .cb_chunk_size = cb_chunk_size, + .ranges = ranges, + .num_ranges = stream_count, + .cur_range = 0, + .range_bytes_remaining = ranges[0].size, + .cb = cbs->consume_chunk, + .cb_ctx = cbs->consume_chunk_ctx, + }; + + if (rechunkifier_ctx->buffer == NULL) + return WIMLIB_ERR_NOMEM; + last_cb = rechunkifier_cb; + last_cb_ctx = rechunkifier_ctx; + } else { + rechunkifier_ctx = NULL; + last_cb = cbs->consume_chunk; + last_cb_ctx = cbs->consume_chunk_ctx; + } - if (rechunker_ctx.buffer == NULL) - return WIMLIB_ERR_NOMEM; + struct hasher_context hasher_ctx = { + .cbs = { + .begin_stream = cbs->begin_stream, + .begin_stream_ctx = cbs->begin_stream_ctx, + .consume_chunk = last_cb, + .consume_chunk_ctx = last_cb_ctx, + .end_stream = cbs->end_stream, + .end_stream_ctx = cbs->end_stream_ctx, + }, + }; - struct read_stream_list_ctx ctx = { - .begin_stream = begin_stream, - .begin_stream_ctx = cb_ctx, - .consume_chunk = rechunker_cb, - .consume_chunk_ctx = &rechunker_ctx, - .end_stream = end_stream, - .end_stream_ctx = cb_ctx, + struct streamifier_context streamifier_ctx = { + .cbs = { + .begin_stream = hasher_begin_stream, + .begin_stream_ctx = &hasher_ctx, + .consume_chunk = hasher_consume_chunk, + .consume_chunk_ctx = &hasher_ctx, + .end_stream = hasher_end_stream, + .end_stream_ctx = &hasher_ctx, + }, .cur_stream = lte, .cur_stream_offset = 0, .final_stream = lte_last, @@ -1212,133 +1329,88 @@ read_stream_list(struct list_head *stream_list, ret = read_compressed_wim_resource(lte->rspec, ranges, stream_count, - read_stream_list_wrapper_cb, - &ctx, + streamifier_cb, + &streamifier_ctx, false); - FREE(rechunker_ctx.buffer); - if (ret) + if (rechunkifier_ctx != NULL) + FREE(rechunkifier_ctx->buffer); + + if (ret) { + if (streamifier_ctx.cur_stream_offset != 0) { + ret = (*streamifier_ctx.cbs.end_stream) + (streamifier_ctx.cur_stream, + ret, + streamifier_ctx.cbs.end_stream_ctx); + } return ret; + } continue; } } - ret = (*begin_stream)(lte, cb_ctx); - if (ret) - return ret; - - ret = read_stream_prefix(lte, lte->size, consume_chunk, - cb_chunk_size, cb_ctx, 0); - if (ret) - return ret; - ret = (*end_stream)(lte, cb_ctx); - if (ret) + ret = read_full_stream_with_sha1(lte, cbs); + if (ret > 0) return ret; } return 0; } -struct extract_ctx { - SHA_CTX sha_ctx; - consume_data_callback_t extract_chunk; - void *extract_chunk_arg; -}; - -static int -extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size, void *_ctx) -{ - struct extract_ctx *ctx = _ctx; - - sha1_update(&ctx->sha_ctx, chunk, chunk_size); - return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg); -} - /* Extracts the first @size bytes of a stream to somewhere. In the process, the * SHA1 message digest of the uncompressed stream is checked if the full stream * is being extracted. * - * @extract_chunk is a function that will be called to extract each chunk of the - * stream. */ + * @extract_chunk is the callback to extract each chunk of the stream. */ int -extract_stream(const struct wim_lookup_table_entry *lte, u64 size, +extract_stream(struct wim_lookup_table_entry *lte, u64 size, consume_data_callback_t extract_chunk, void *extract_chunk_arg) { - int ret; if (size == lte->size) { - /* Do SHA1 */ - struct extract_ctx ctx; - ctx.extract_chunk = extract_chunk; - ctx.extract_chunk_arg = extract_chunk_arg; - sha1_init(&ctx.sha_ctx); - ret = read_stream_prefix(lte, size, - extract_chunk_sha1_wrapper, - lte_cchunk_size(lte), - &ctx, 0); - if (ret == 0) { - u8 hash[SHA1_HASH_SIZE]; - sha1_final(hash, &ctx.sha_ctx); - if (!hashes_equal(hash, lte->hash)) { - if (wimlib_print_errors) { - ERROR("Invalid SHA1 message digest " - "on the following WIM stream:"); - print_lookup_table_entry(lte, stderr); - if (lte->resource_location == RESOURCE_IN_WIM) - ERROR("The WIM file appears to be corrupt!"); - } - ret = WIMLIB_ERR_INVALID_RESOURCE_HASH; - } - } + /* Do SHA1. */ + struct read_stream_list_callbacks cbs = { + .consume_chunk = extract_chunk, + .consume_chunk_ctx = extract_chunk_arg, + }; + return read_full_stream_with_sha1(lte, &cbs); } else { - /* Don't do SHA1 */ - ret = read_stream_prefix(lte, size, extract_chunk, - lte_cchunk_size(lte), - extract_chunk_arg, 0); + /* Don't do SHA1. */ + return read_stream_prefix(lte, size, extract_chunk, + extract_chunk_arg, 0); } - return ret; } -static int -extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p) +/* Write a chunk of data to a file descriptor. This function can be passed as a + * consume_data_callback_t. */ +int +extract_chunk_to_fd(const void *chunk, size_t size, void *_fd_p) { struct filedes *fd = _fd_p; - int ret = full_write(fd, buf, len); - if (ret) + + int ret = full_write(fd, chunk, size); + if (ret) { ERROR_WITH_ERRNO("Error writing to file descriptor"); - return ret; + return ret; + } + + return 0; } /* Extract the first @size bytes of the specified stream to the specified file * descriptor. If @size is the full size of the stream, its SHA1 message digest * is also checked. */ int -extract_stream_to_fd(const struct wim_lookup_table_entry *lte, +extract_stream_to_fd(struct wim_lookup_table_entry *lte, struct filedes *fd, u64 size) { - return extract_stream(lte, size, extract_wim_chunk_to_fd, fd); -} - - -static int -sha1_chunk(const void *buf, size_t len, void *ctx) -{ - sha1_update(ctx, buf, len); - return 0; + return extract_stream(lte, size, extract_chunk_to_fd, fd); } /* Calculate the SHA1 message digest of a stream, storing it in @lte->hash. */ int sha1_stream(struct wim_lookup_table_entry *lte) { - int ret; - SHA_CTX sha_ctx; - - sha1_init(&sha_ctx); - ret = read_stream_prefix(lte, lte->size, - sha1_chunk, lte_cchunk_size(lte), - &sha_ctx, 0); - if (ret == 0) - sha1_final(lte->hash, &sha_ctx); - - return ret; + struct read_stream_list_callbacks cbs = { + }; + return read_full_stream_with_sha1(lte, &cbs); } /* Convert a WIM resource header to a stand-alone resource specification. */ @@ -1390,7 +1462,7 @@ get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr, reshdr->uncompressed_size = le64_to_cpu(disk_reshdr->uncompressed_size); reshdr->flags = disk_reshdr->flags; - /* Truncate numbers to 62 bits to avoid possible overflows. */ + /* Avoid possible overflows. */ if (reshdr->offset_in_wim & 0xc000000000000000ULL) return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; diff --git a/src/write.c b/src/write.c index 63c91b2a..a3f4213f 100644 --- a/src/write.c +++ b/src/write.c @@ -532,7 +532,7 @@ try_write_again: else in_chunk_size = out_chunk_size; ret = read_stream_prefix(lte, read_size, write_resource_cb, - in_chunk_size, &write_ctx, resource_flags); + &write_ctx, resource_flags); if (ret) goto out_free_chunk_tab; @@ -1480,8 +1480,7 @@ submit_stream_for_compression(struct wim_lookup_table_entry *lte, ctx->next_lte = lte; INIT_LIST_HEAD(<e->msg_list); list_add_tail(<e->being_compressed_list, &ctx->outstanding_streams); - ret = read_stream_prefix(lte, lte->size, main_writer_thread_cb, - ctx->out_chunk_size, ctx, 0); + ret = read_stream_prefix(lte, lte->size, main_writer_thread_cb, ctx, 0); if (ret) return ret; wimlib_assert(ctx->next_chunk == ctx->next_num_chunks); -- 2.43.0