X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fextract.c;h=3adbca342ddfa65e50f61e9e11ca30be5c16fcc4;hp=df144785c92ce76e43d0b8937c92365a8cb89290;hb=b5b9681794d1f5f13350e3567f6f6e74f5c779cf;hpb=b6f6a919c8291da9cf2a9ea72ec7f8e47fbd79cf diff --git a/src/extract.c b/src/extract.c index df144785..3adbca34 100644 --- a/src/extract.c +++ b/src/extract.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2014 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -24,428 +24,533 @@ * along with wimlib; if not, see http://www.gnu.org/licenses/. */ +/* + * This file provides the API functions wimlib_extract_image(), + * wimlib_extract_image_from_pipe(), wimlib_extract_paths(), and + * wimlib_extract_pathlist(). Internally, all end up calling + * do_wimlib_extract_paths() and extract_trees(). + * + * Although wimlib supports multiple extraction modes/backends (NTFS-3g, UNIX, + * Win32), this file does not itself have code to extract files or directories + * to any specific target; instead, it handles generic functionality and relies + * on lower-level callback functions declared in `struct apply_operations' to do + * the actual extraction. + */ + #ifdef HAVE_CONFIG_H # include "config.h" #endif -#ifdef __WIN32__ -# include "wimlib/win32_common.h" /* For GetFullPathName() */ -#endif - #include "wimlib/apply.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/lookup_table.h" +#include "wimlib/metadata.h" +#include "wimlib/pathlist.h" #include "wimlib/paths.h" +#include "wimlib/reparse.h" #include "wimlib/resource.h" -#include "wimlib/swm.h" +#include "wimlib/security.h" #ifdef __WIN32__ # include "wimlib/win32.h" /* for realpath() equivalent */ #endif #include "wimlib/xml.h" +#include "wimlib/wildcard.h" +#include "wimlib/wim.h" #include -#include -#ifdef WITH_NTFS_3G -# include /* for ntfs_mount(), ntfs_umount() */ -#endif +#include #include #include #include -#define MAX_LONG_PATH_WARNINGS 5 +#define WIMLIB_EXTRACT_FLAG_MULTI_IMAGE 0x80000000 +#define WIMLIB_EXTRACT_FLAG_FROM_PIPE 0x40000000 +#define WIMLIB_EXTRACT_FLAG_IMAGEMODE 0x20000000 + +/* Keep in sync with wimlib.h */ +#define WIMLIB_EXTRACT_MASK_PUBLIC \ + (WIMLIB_EXTRACT_FLAG_NTFS | \ + WIMLIB_EXTRACT_FLAG_UNIX_DATA | \ + WIMLIB_EXTRACT_FLAG_NO_ACLS | \ + WIMLIB_EXTRACT_FLAG_STRICT_ACLS | \ + WIMLIB_EXTRACT_FLAG_RPFIX | \ + WIMLIB_EXTRACT_FLAG_NORPFIX | \ + WIMLIB_EXTRACT_FLAG_TO_STDOUT | \ + WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES | \ + WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS | \ + WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS | \ + WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES | \ + WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS | \ + WIMLIB_EXTRACT_FLAG_GLOB_PATHS | \ + WIMLIB_EXTRACT_FLAG_STRICT_GLOB | \ + WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES | \ + WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE | \ + WIMLIB_EXTRACT_FLAG_WIMBOOT) + +/* Check whether the extraction of a dentry should be skipped completely. */ +static bool +dentry_is_supported(struct wim_dentry *dentry, + const struct wim_features *supported_features) +{ + struct wim_inode *inode = dentry->d_inode; + + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + return supported_features->reparse_points || + (inode_is_symlink(inode) && + supported_features->symlink_reparse_points); + } + if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) { + if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) + return supported_features->encrypted_directories != 0; + else + return supported_features->encrypted_files != 0; + } + return true; +} + + +#define PWM_ALLOW_WIM_HDR 0x00001 +#define PWM_SILENT_EOF 0x00002 +/* Read the header from a stream in a pipable WIM. */ static int -do_apply_op(struct wim_dentry *dentry, struct apply_args *args, - int (*apply_dentry_func)(const tchar *, size_t, - struct wim_dentry *, struct apply_args *)) +read_pwm_stream_header(WIMStruct *pwm, struct wim_lookup_table_entry *lte, + struct wim_resource_spec *rspec, + int flags, struct wim_header_disk *hdr_ret) { - tchar *p; - size_t extraction_path_nchars; - struct wim_dentry *d; - LIST_HEAD(ancestor_list); - const tchar *target; - size_t target_nchars; + union { + struct pwm_stream_hdr stream_hdr; + struct wim_header_disk pwm_hdr; + } buf; + struct wim_reshdr reshdr; + int ret; -#ifdef __WIN32__ - if (args->target_lowlevel_path) { - target = args->target_lowlevel_path; - target_nchars = args->target_lowlevel_path_nchars; - } else -#endif - { - target = args->target; - target_nchars = args->target_nchars; - } + ret = full_read(&pwm->in_fd, &buf.stream_hdr, sizeof(buf.stream_hdr)); + if (ret) + goto read_error; - extraction_path_nchars = target_nchars; + if ((flags & PWM_ALLOW_WIM_HDR) && buf.stream_hdr.magic == PWM_MAGIC) { + BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.stream_hdr)); + ret = full_read(&pwm->in_fd, &buf.stream_hdr + 1, + sizeof(buf.pwm_hdr) - sizeof(buf.stream_hdr)); - for (d = dentry; d != args->extract_root; d = d->parent) { - if (d->not_extracted) - return 0; - extraction_path_nchars += d->extraction_name_nchars + 1; - list_add(&d->tmp_list, &ancestor_list); + if (ret) + goto read_error; + lte->resource_location = RESOURCE_NONEXISTENT; + memcpy(hdr_ret, &buf.pwm_hdr, sizeof(buf.pwm_hdr)); + return 0; } - tchar extraction_path[extraction_path_nchars + 1]; - p = tmempcpy(extraction_path, target, target_nchars); + if (le64_to_cpu(buf.stream_hdr.magic) != PWM_STREAM_MAGIC) { + ERROR("Data read on pipe is invalid (expected stream header)."); + return WIMLIB_ERR_INVALID_PIPABLE_WIM; + } + copy_hash(lte->hash, buf.stream_hdr.hash); + + reshdr.size_in_wim = 0; + reshdr.flags = le32_to_cpu(buf.stream_hdr.flags); + reshdr.offset_in_wim = pwm->in_fd.offset; + reshdr.uncompressed_size = le64_to_cpu(buf.stream_hdr.uncompressed_size); + wim_res_hdr_to_spec(&reshdr, pwm, rspec); + lte_bind_wim_resource_spec(lte, rspec); + lte->flags = rspec->flags; + lte->size = rspec->uncompressed_size; + lte->offset_in_res = 0; + return 0; - list_for_each_entry(d, &ancestor_list, tmp_list) { - *p++ = OS_PREFERRED_PATH_SEPARATOR; - p = tmempcpy(p, d->extraction_name, d->extraction_name_nchars); - } - *p = T('\0'); +read_error: + if (ret != WIMLIB_ERR_UNEXPECTED_END_OF_FILE || !(flags & PWM_SILENT_EOF)) + ERROR_WITH_ERRNO("Error reading pipable WIM from pipe"); + return ret; +} -#ifdef __WIN32__ - /* Warn the user if the path exceeds MAX_PATH */ +static int +load_streams_from_pipe(struct apply_ctx *ctx, + const struct read_stream_list_callbacks *cbs) +{ + struct wim_lookup_table_entry *found_lte = NULL; + struct wim_resource_spec *rspec = NULL; + struct wim_lookup_table *lookup_table; + int ret; + + ret = WIMLIB_ERR_NOMEM; + found_lte = new_lookup_table_entry(); + if (!found_lte) + goto out; + + rspec = MALLOC(sizeof(struct wim_resource_spec)); + if (!rspec) + goto out; + + lookup_table = ctx->wim->lookup_table; + memcpy(ctx->progress.extract.guid, ctx->wim->hdr.guid, WIM_GUID_LEN); + ctx->progress.extract.part_number = ctx->wim->hdr.part_number; + ctx->progress.extract.total_parts = ctx->wim->hdr.total_parts; + ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN); + if (ret) + goto out; - /* + 1 for '\0', -4 for \\?\. */ - if (extraction_path_nchars + 1 - 4 > MAX_PATH) { - if (dentry->needs_extraction && - args->num_long_paths < MAX_LONG_PATH_WARNINGS) + while (ctx->num_streams_remaining) { + struct wim_header_disk pwm_hdr; + struct wim_lookup_table_entry *needed_lte; + + if (found_lte->resource_location != RESOURCE_NONEXISTENT) + lte_unbind_wim_resource_spec(found_lte); + ret = read_pwm_stream_header(ctx->wim, found_lte, rspec, + PWM_ALLOW_WIM_HDR, &pwm_hdr); + if (ret) + goto out; + + if ((found_lte->resource_location != RESOURCE_NONEXISTENT) + && !(found_lte->flags & WIM_RESHDR_FLAG_METADATA) + && (needed_lte = lookup_stream(lookup_table, found_lte->hash)) + && (needed_lte->out_refcnt)) { - WARNING("Path \"%ls\" exceeds MAX_PATH and will not be accessible " - "to most Windows software", extraction_path); - if (++args->num_long_paths == MAX_LONG_PATH_WARNINGS) - WARNING("Suppressing further warnings about long paths"); + needed_lte->offset_in_res = found_lte->offset_in_res; + needed_lte->flags = found_lte->flags; + needed_lte->size = found_lte->size; + + lte_unbind_wim_resource_spec(found_lte); + lte_bind_wim_resource_spec(needed_lte, rspec); + + ret = (*cbs->begin_stream)(needed_lte, 0, + cbs->begin_stream_ctx); + if (ret) { + lte_unbind_wim_resource_spec(needed_lte); + goto out; + } + + ret = extract_stream(needed_lte, needed_lte->size, + cbs->consume_chunk, + cbs->consume_chunk_ctx); + + ret = (*cbs->end_stream)(needed_lte, ret, + cbs->end_stream_ctx); + lte_unbind_wim_resource_spec(needed_lte); + if (ret) + goto out; + ctx->num_streams_remaining--; + } else if (found_lte->resource_location != RESOURCE_NONEXISTENT) { + ret = skip_wim_stream(found_lte); + if (ret) + goto out; + } else { + u16 part_number = le16_to_cpu(pwm_hdr.part_number); + u16 total_parts = le16_to_cpu(pwm_hdr.total_parts); + + if (part_number != ctx->progress.extract.part_number || + total_parts != ctx->progress.extract.total_parts || + memcmp(pwm_hdr.guid, ctx->progress.extract.guid, + WIM_GUID_LEN)) + { + ctx->progress.extract.part_number = part_number; + ctx->progress.extract.total_parts = total_parts; + memcpy(ctx->progress.extract.guid, + pwm_hdr.guid, WIM_GUID_LEN); + ret = extract_progress(ctx, + WIMLIB_PROGRESS_MSG_EXTRACT_SPWM_PART_BEGIN); + if (ret) + goto out; + } } } -#endif - return (*apply_dentry_func)(extraction_path, extraction_path_nchars, - dentry, args); + ret = 0; +out: + if (found_lte->resource_location != RESOURCE_IN_WIM) + FREE(rspec); + free_lookup_table_entry(found_lte); + return ret; } - -/* Extracts a file, directory, or symbolic link from the WIM archive. */ static int -apply_dentry_normal(struct wim_dentry *dentry, void *arg) +begin_extract_stream_with_progress(struct wim_lookup_table_entry *lte, + u32 flags, void *_ctx) { -#ifdef __WIN32__ - return do_apply_op(dentry, arg, win32_do_apply_dentry); -#else - return do_apply_op(dentry, arg, unix_do_apply_dentry); -#endif -} + struct apply_ctx *ctx = _ctx; + ctx->cur_stream = lte; -/* Apply timestamps to an extracted file or directory */ -static int -apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg) -{ -#ifdef __WIN32__ - return do_apply_op(dentry, arg, win32_do_apply_dentry_timestamps); -#else - return do_apply_op(dentry, arg, unix_do_apply_dentry_timestamps); -#endif -} - -static bool -dentry_is_dot_or_dotdot(const struct wim_dentry *dentry) -{ - const utf16lechar *file_name = dentry->file_name; - return file_name != NULL && - file_name[0] == cpu_to_le16('.') && - (file_name[1] == cpu_to_le16('\0') || - (file_name[1] == cpu_to_le16('.') && - file_name[2] == cpu_to_le16('\0'))); + return (*ctx->saved_cbs->begin_stream)(lte, flags, + ctx->saved_cbs->begin_stream_ctx); } -/* Extract a dentry if it hasn't already been extracted and either - * WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified, or the dentry is a directory - * and/or has no unnamed stream. */ static int -maybe_apply_dentry(struct wim_dentry *dentry, void *arg) +consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx) { - struct apply_args *args = arg; + struct apply_ctx *ctx = _ctx; + union wimlib_progress_info *progress = &ctx->progress; int ret; - if (!dentry->needs_extraction) - return 0; - - if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS && - !dentry_is_directory(dentry) && - inode_unnamed_lte_resolved(dentry->d_inode) != NULL) - return 0; + if (likely(ctx->supported_features.hard_links)) { + progress->extract.completed_bytes += + (u64)size * ctx->cur_stream->out_refcnt; + } else { + const struct stream_owner *owners = stream_owners(ctx->cur_stream); + for (u32 i = 0; i < ctx->cur_stream->out_refcnt; i++) { + const struct wim_inode *inode = owners[i].inode; + const struct wim_dentry *dentry; + + list_for_each_entry(dentry, + &inode->i_extraction_aliases, + d_extraction_alias_node) + { + progress->extract.completed_bytes += size; + } + } + } + if (progress->extract.completed_bytes >= ctx->next_progress) { - if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) && - args->progress_func) { - ret = calculate_dentry_full_path(dentry); + ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS); if (ret) return ret; - args->progress.extract.cur_path = dentry->_full_path; - args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY, - &args->progress); - } - ret = args->apply_dentry(dentry, args); - if (ret == 0) - dentry->needs_extraction = 0; - return ret; -} - -static void -calculate_bytes_to_extract(struct list_head *stream_list, - int extract_flags, - union wimlib_progress_info *progress) -{ - struct wim_lookup_table_entry *lte; - u64 total_bytes = 0; - u64 num_streams = 0; - /* For each stream to be extracted... */ - list_for_each_entry(lte, stream_list, extraction_list) { - if (extract_flags & - (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK)) + if (progress->extract.completed_bytes >= + progress->extract.total_bytes) { - /* In the symlink or hard link extraction mode, each - * stream will be extracted one time regardless of how - * many dentries share the stream. */ - wimlib_assert(!(extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)); - if (!lte->extracted_file) { - num_streams++; - total_bytes += wim_resource_size(lte); - } + ctx->next_progress = UINT64_MAX; } else { - num_streams += lte->out_refcnt; - total_bytes += lte->out_refcnt * wim_resource_size(lte); + ctx->next_progress += progress->extract.total_bytes / 128; + if (ctx->next_progress > progress->extract.total_bytes) + ctx->next_progress = progress->extract.total_bytes; } } - progress->extract.num_streams = num_streams; - progress->extract.total_bytes = total_bytes; - progress->extract.completed_bytes = 0; + return (*ctx->saved_cbs->consume_chunk)(chunk, size, + ctx->saved_cbs->consume_chunk_ctx); } -static void -maybe_add_stream_for_extraction(struct wim_lookup_table_entry *lte, - struct list_head *stream_list) +/* + * Read the list of single-instance streams to extract and feed their data into + * the specified callback functions. + * + * This handles checksumming each stream. + * + * This also handles sending WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS. + * + * This also works if the WIM is being read from a pipe, whereas attempting to + * read streams directly (e.g. with read_full_stream_into_buf()) will not. + */ +int +extract_stream_list(struct apply_ctx *ctx, + const struct read_stream_list_callbacks *cbs) { - if (++lte->out_refcnt == 1) { - INIT_LIST_HEAD(<e->lte_dentry_list); - list_add_tail(<e->extraction_list, stream_list); + struct read_stream_list_callbacks wrapper_cbs = { + .begin_stream = begin_extract_stream_with_progress, + .begin_stream_ctx = ctx, + .consume_chunk = consume_chunk_with_progress, + .consume_chunk_ctx = ctx, + .end_stream = cbs->end_stream, + .end_stream_ctx = cbs->end_stream_ctx, + }; + if (ctx->progfunc) { + ctx->saved_cbs = cbs; + cbs = &wrapper_cbs; + } + if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) { + return load_streams_from_pipe(ctx, cbs); + } else { + return read_stream_list(&ctx->stream_list, + offsetof(struct wim_lookup_table_entry, + extraction_list), + cbs, VERIFY_STREAM_HASHES); } } -struct find_streams_ctx { - struct list_head stream_list; - int extract_flags; -}; - +/* Extract a WIM dentry to standard output. + * + * This obviously doesn't make sense in all cases. We return an error if the + * dentry does not correspond to a regular file. Otherwise we extract the + * unnamed data stream only. */ static int -dentry_find_streams_to_extract(struct wim_dentry *dentry, void *_ctx) +extract_dentry_to_stdout(struct wim_dentry *dentry, + const struct wim_lookup_table *lookup_table) { - struct find_streams_ctx *ctx = _ctx; struct wim_inode *inode = dentry->d_inode; struct wim_lookup_table_entry *lte; - bool dentry_added = false; - struct list_head *stream_list = &ctx->stream_list; - int extract_flags = ctx->extract_flags; - - if (!dentry->needs_extraction) - return 0; + struct filedes _stdout; - lte = inode_unnamed_lte_resolved(inode); - if (lte) { - if (!inode->i_visited) - maybe_add_stream_for_extraction(lte, stream_list); - list_add_tail(&dentry->extraction_stream_list, <e->lte_dentry_list); - dentry_added = true; - } - - /* Determine whether to include alternate data stream entries or not. - * - * UNIX: Include them if extracting using NTFS-3g. - * - * Windows: Include them undconditionally, although if the filesystem is - * not NTFS we won't actually be able to extract them. */ -#if defined(WITH_NTFS_3G) - if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) -#elif defined(__WIN32__) - if (1) -#else - if (0) -#endif + if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT | + FILE_ATTRIBUTE_DIRECTORY)) { - for (unsigned i = 0; i < inode->i_num_ads; i++) { - if (inode->i_ads_entries[i].stream_name_nbytes != 0) { - lte = inode->i_ads_entries[i].lte; - if (lte) { - if (!inode->i_visited) { - maybe_add_stream_for_extraction(lte, - stream_list); - } - if (!dentry_added) { - list_add_tail(&dentry->extraction_stream_list, - <e->lte_dentry_list); - dentry_added = true; - } - } - } - } + ERROR("\"%"TS"\" is not a regular file and therefore cannot be " + "extracted to standard output", dentry_full_path(dentry)); + return WIMLIB_ERR_NOT_A_REGULAR_FILE; } - inode->i_visited = 1; - return 0; + + lte = inode_unnamed_lte(inode, lookup_table); + if (!lte) { + const u8 *hash = inode_unnamed_stream_hash(inode); + if (!is_zero_hash(hash)) + return stream_not_found_error(inode, hash); + return 0; + } + + filedes_init(&_stdout, STDOUT_FILENO); + return extract_full_stream_to_fd(lte, &_stdout); } static int -dentry_resolve_and_zero_lte_refcnt(struct wim_dentry *dentry, void *_lookup_table) +extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries, + const struct wim_lookup_table *lookup_table) { - struct wim_inode *inode = dentry->d_inode; - struct wim_lookup_table *lookup_table = _lookup_table; - struct wim_lookup_table_entry *lte; - - inode_resolve_ltes(inode, lookup_table); - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - lte = inode_stream_lte_resolved(inode, i); - if (lte) - lte->out_refcnt = 0; + for (size_t i = 0; i < num_dentries; i++) { + int ret = extract_dentry_to_stdout(dentries[i], lookup_table); + if (ret) + return ret; } return 0; } -static void -find_streams_for_extraction(struct wim_dentry *root, - struct list_head *stream_list, - struct wim_lookup_table *lookup_table, - int extract_flags) -{ - struct find_streams_ctx ctx; +/**********************************************************************/ - INIT_LIST_HEAD(&ctx.stream_list); - ctx.extract_flags = extract_flags; - for_dentry_in_tree(root, dentry_resolve_and_zero_lte_refcnt, lookup_table); - for_dentry_in_tree(root, dentry_find_streams_to_extract, &ctx); - list_transfer(&ctx.stream_list, stream_list); +/* + * Removes duplicate dentries from the array. + * + * Returns the new number of dentries, packed at the front of the array. + */ +static size_t +remove_duplicate_trees(struct wim_dentry **trees, size_t num_trees) +{ + size_t i, j = 0; + for (i = 0; i < num_trees; i++) { + if (!trees[i]->tmp_flag) { + /* Found distinct dentry. */ + trees[i]->tmp_flag = 1; + trees[j++] = trees[i]; + } + } + for (i = 0; i < j; i++) + trees[i]->tmp_flag = 0; + return j; } -struct apply_operations { - int (*apply_dentry)(struct wim_dentry *dentry, void *arg); - int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg); -}; +/* + * Remove dentries that are descendants of other dentries in the array. + * + * Returns the new number of dentries, packed at the front of the array. + */ +static size_t +remove_contained_trees(struct wim_dentry **trees, size_t num_trees) +{ + size_t i, j = 0; + for (i = 0; i < num_trees; i++) + trees[i]->tmp_flag = 1; + for (i = 0; i < num_trees; i++) { + struct wim_dentry *d = trees[i]; + while (!dentry_is_root(d)) { + d = d->parent; + if (d->tmp_flag) + goto tree_contained; + } + trees[j++] = trees[i]; + continue; -static const struct apply_operations normal_apply_operations = { - .apply_dentry = apply_dentry_normal, - .apply_dentry_timestamps = apply_dentry_timestamps_normal, -}; + tree_contained: + trees[i]->tmp_flag = 0; + } -#ifdef WITH_NTFS_3G -static const struct apply_operations ntfs_apply_operations = { - .apply_dentry = apply_dentry_ntfs, - .apply_dentry_timestamps = apply_dentry_timestamps_ntfs, -}; -#endif + for (i = 0; i < j; i++) + trees[i]->tmp_flag = 0; + return j; +} static int -apply_stream_list(struct list_head *stream_list, - struct apply_args *args, - const struct apply_operations *ops, - wimlib_progress_func_t progress_func) +dentry_append_to_list(struct wim_dentry *dentry, void *_dentry_list) { - uint64_t bytes_per_progress = args->progress.extract.total_bytes / 100; - uint64_t next_progress = bytes_per_progress; - struct wim_lookup_table_entry *lte; - struct wim_dentry *dentry; - int ret; - - /* This complicated loop is essentially looping through the dentries, - * although dentries may be visited more than once (if a dentry contains - * two different nonempty streams) or not at all (if a dentry contains - * no non-empty streams). - * - * The outer loop is over the distinct streams to be extracted so that - * sequential reading of the WIM can be implemented. */ - - /* For each distinct stream to be extracted */ - list_for_each_entry(lte, stream_list, extraction_list) { - /* For each dentry to be extracted that is a name for an inode - * containing the stream */ - list_for_each_entry(dentry, <e->lte_dentry_list, extraction_stream_list) { - /* Extract the dentry if it was not already - * extracted */ - ret = maybe_apply_dentry(dentry, args); - if (ret) - return ret; - if (progress_func && - args->progress.extract.completed_bytes >= next_progress) - { - progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS, - &args->progress); - if (args->progress.extract.completed_bytes >= - args->progress.extract.total_bytes) - { - next_progress = ~0ULL; - } else { - next_progress = - min (args->progress.extract.completed_bytes + - bytes_per_progress, - args->progress.extract.total_bytes); - } - } - } - } + struct list_head *dentry_list = _dentry_list; + list_add_tail(&dentry->d_extraction_list_node, dentry_list); return 0; } +static void +dentry_reset_extraction_list_node(struct wim_dentry *dentry) +{ + dentry->d_extraction_list_node = (struct list_head){NULL, NULL}; +} + static int -sort_stream_list_by_wim_position(struct list_head *stream_list) +dentry_delete_from_list(struct wim_dentry *dentry, void *_ignore) { - struct list_head *cur; - size_t num_streams; - struct wim_lookup_table_entry **array; - size_t i; - size_t array_size; - - num_streams = 0; - list_for_each(cur, stream_list) - num_streams++; - array_size = num_streams * sizeof(array[0]); - array = MALLOC(array_size); - if (!array) { - ERROR("Failed to allocate %zu bytes to sort stream entries", - array_size); - return WIMLIB_ERR_NOMEM; - } - cur = stream_list->next; - for (i = 0; i < num_streams; i++) { - array[i] = container_of(cur, struct wim_lookup_table_entry, extraction_list); - cur = cur->next; - } - - qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position); - - INIT_LIST_HEAD(stream_list); - for (i = 0; i < num_streams; i++) - list_add_tail(&array[i]->extraction_list, stream_list); - FREE(array); + list_del(&dentry->d_extraction_list_node); + dentry_reset_extraction_list_node(dentry); return 0; } /* - * Extract a dentry to standard output. + * Build the preliminary list of dentries to be extracted. * - * This obviously doesn't make sense in all cases. We return an error if the - * dentry does not correspond to a regular file. Otherwise we extract the - * unnamed data stream only. + * The list maintains the invariant that if d1 and d2 are in the list and d1 is + * an ancestor of d2, then d1 appears before d2 in the list. */ -static int -extract_dentry_to_stdout(struct wim_dentry *dentry) +static void +build_dentry_list(struct list_head *dentry_list, struct wim_dentry **trees, + size_t num_trees, bool add_ancestors) { - int ret = 0; - if (!dentry_is_regular_file(dentry)) { - ERROR("\"%"TS"\" is not a regular file and therefore cannot be " - "extracted to standard output", dentry->_full_path); - ret = WIMLIB_ERR_NOT_A_REGULAR_FILE; - } else { - struct wim_lookup_table_entry *lte; - - lte = inode_unnamed_lte_resolved(dentry->d_inode); - if (lte) { - ret = extract_wim_resource_to_fd(lte, STDOUT_FILENO, - wim_resource_size(lte)); + INIT_LIST_HEAD(dentry_list); + + /* Add the trees recursively. */ + for (size_t i = 0; i < num_trees; i++) + for_dentry_in_tree(trees[i], dentry_append_to_list, dentry_list); + + /* If requested, add ancestors of the trees. */ + if (add_ancestors) { + for (size_t i = 0; i < num_trees; i++) { + struct wim_dentry *dentry = trees[i]; + struct wim_dentry *ancestor; + struct list_head *place_after; + + if (dentry_is_root(dentry)) + continue; + + place_after = dentry_list; + ancestor = dentry; + do { + ancestor = ancestor->parent; + if (will_extract_dentry(ancestor)) { + place_after = &ancestor->d_extraction_list_node; + break; + } + } while (!dentry_is_root(ancestor)); + + ancestor = dentry; + do { + ancestor = ancestor->parent; + if (will_extract_dentry(ancestor)) + break; + list_add(&ancestor->d_extraction_list_node, place_after); + } while (!dentry_is_root(ancestor)); } } - return ret; +} + +static void +destroy_dentry_list(struct list_head *dentry_list) +{ + struct wim_dentry *dentry, *tmp; + struct wim_inode *inode; + + list_for_each_entry_safe(dentry, tmp, dentry_list, d_extraction_list_node) { + inode = dentry->d_inode; + dentry_reset_extraction_list_node(dentry); + inode->i_visited = 0; + if ((void *)dentry->d_extraction_name != (void *)dentry->file_name) + FREE(dentry->d_extraction_name); + dentry->d_extraction_name = NULL; + dentry->d_extraction_name_nchars = 0; + } +} + +static void +destroy_stream_list(struct list_head *stream_list) +{ + struct wim_lookup_table_entry *lte; + + list_for_each_entry(lte, stream_list, extraction_list) + if (lte->out_refcnt > ARRAY_LEN(lte->inline_stream_owners)) + FREE(lte->stream_owners); } #ifdef __WIN32__ @@ -495,80 +600,61 @@ file_name_valid(utf16lechar *name, size_t num_chars, bool fix) return true; } -/* - * dentry_calculate_extraction_path- - * - * Calculate the actual filename component at which a WIM dentry will be - * extracted, handling invalid filenames "properly". - * - * dentry->extraction_name usually will be set the same as dentry->file_name (on - * UNIX, converted into the platform's multibyte encoding). However, if the - * file name contains characters that are not valid on the current platform or - * has some other format that is not valid, leave dentry->extraction_name as - * NULL and clear dentry->needs_extraction to indicate that this dentry should - * not be extracted, unless the appropriate flag - * WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES is set in the extract flags, in - * which case a substitute filename will be created and set instead. - * - * Conflicts with case-insensitive names on Windows are handled similarly; see - * below. - */ static int -dentry_calculate_extraction_path(struct wim_dentry *dentry, void *_args) +dentry_calculate_extraction_name(struct wim_dentry *dentry, + struct apply_ctx *ctx) { - struct apply_args *args = _args; int ret; - dentry->needs_extraction = 1; + if (!dentry_is_supported(dentry, &ctx->supported_features)) + goto skip_dentry; - if (dentry == args->extract_root) + if (dentry_is_root(dentry)) return 0; - if (dentry_is_dot_or_dotdot(dentry)) { - /* WIM files shouldn't contain . or .. entries. But if they are - * there, don't attempt to extract them. */ - WARNING("Skipping extraction of unexpected . or .. file \"%"TS"\"", - dentry_full_path(dentry)); - goto skip_dentry; + if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) { + dentry->d_extraction_name = dentry->file_name; + dentry->d_extraction_name_nchars = dentry->file_name_nbytes / + sizeof(utf16lechar); + return 0; } -#ifdef __WIN32__ - struct wim_dentry *other; - list_for_each_entry(other, &dentry->case_insensitive_conflict_list, - case_insensitive_conflict_list) - { - if (other->needs_extraction) { - if (args->extract_flags & WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS) - { - WARNING("\"%"TS"\" has the same case-insensitive " - "name as \"%"TS"\"; extracting dummy name instead", - dentry_full_path(dentry), - dentry_full_path(other)); - goto out_replace; - } else { - WARNING("Not extracting \"%"TS"\": has same case-insensitive " - "name as \"%"TS"\"", - dentry_full_path(dentry), - dentry_full_path(other)); - goto skip_dentry; + if (!ctx->supported_features.case_sensitive_filenames) { + struct wim_dentry *other; + list_for_each_entry(other, &dentry->d_ci_conflict_list, + d_ci_conflict_list) + { + if (will_extract_dentry(other)) { + if (ctx->extract_flags & + WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS) { + WARNING("\"%"TS"\" has the same " + "case-insensitive name as " + "\"%"TS"\"; extracting " + "dummy name instead", + dentry_full_path(dentry), + dentry_full_path(other)); + goto out_replace; + } else { + WARNING("Not extracting \"%"TS"\": " + "has same case-insensitive " + "name as \"%"TS"\"", + dentry_full_path(dentry), + dentry_full_path(other)); + goto skip_dentry; + } } } } -#endif if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) { -#ifdef __WIN32__ - dentry->extraction_name = dentry->file_name; - dentry->extraction_name_nchars = dentry->file_name_nbytes / 2; - return 0; -#else - return utf16le_to_tstr(dentry->file_name, + ret = utf16le_get_tstr(dentry->file_name, dentry->file_name_nbytes, - &dentry->extraction_name, - &dentry->extraction_name_nchars); -#endif + (const tchar **)&dentry->d_extraction_name, + &dentry->d_extraction_name_nchars); + dentry->d_extraction_name_nchars /= sizeof(tchar); + return ret; } else { - if (args->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES) + if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES) { WARNING("\"%"TS"\" has an invalid filename " "that is not supported on this platform; " @@ -590,537 +676,858 @@ out_replace: memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes); file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true); - tchar *tchar_name; + const tchar *tchar_name; size_t tchar_nchars; - #ifdef __WIN32__ - tchar_name = utf16_name_copy; - tchar_nchars = dentry->file_name_nbytes / 2; - #else - ret = utf16le_to_tstr(utf16_name_copy, - dentry->file_name_nbytes, - &tchar_name, &tchar_nchars); + + ret = utf16le_get_tstr(utf16_name_copy, + dentry->file_name_nbytes, + &tchar_name, &tchar_nchars); if (ret) return ret; - #endif + + tchar_nchars /= sizeof(tchar); + size_t fixed_name_num_chars = tchar_nchars; tchar fixed_name[tchar_nchars + 50]; tmemcpy(fixed_name, tchar_name, tchar_nchars); fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars, T(" (invalid filename #%lu)"), - ++args->invalid_sequence); - #ifndef __WIN32__ - FREE(tchar_name); - #endif - dentry->extraction_name = memdup(fixed_name, 2 * fixed_name_num_chars + 2); - if (!dentry->extraction_name) + ++ctx->invalid_sequence); + + utf16le_put_tstr(tchar_name); + + dentry->d_extraction_name = memdup(fixed_name, + 2 * fixed_name_num_chars + 2); + if (!dentry->d_extraction_name) return WIMLIB_ERR_NOMEM; - dentry->extraction_name_nchars = fixed_name_num_chars; + dentry->d_extraction_name_nchars = fixed_name_num_chars; } return 0; -skip_dentry: - dentry->needs_extraction = 0; - dentry->not_extracted = 1; - return 0; -} -static int -dentry_reset_needs_extraction(struct wim_dentry *dentry, void *_ignore) -{ - dentry->needs_extraction = 0; - dentry->not_extracted = 0; - dentry->is_win32_name = 0; - dentry->d_inode->i_visited = 0; - dentry->d_inode->i_dos_name_extracted = 0; - FREE(dentry->d_inode->i_extracted_file); - dentry->d_inode->i_extracted_file = NULL; - if ((void*)dentry->extraction_name != (void*)dentry->file_name) - FREE(dentry->extraction_name); - dentry->extraction_name = NULL; +skip_dentry: + for_dentry_in_tree(dentry, dentry_delete_from_list, NULL); return 0; } /* - * extract_tree - Extract a file or directory tree from the currently selected - * WIM image. - * - * @wim: WIMStruct for the WIM file, with the desired image selected - * (as wim->current_image). - * @wim_source_path: - * "Canonical" (i.e. no leading or trailing slashes, path - * separators forwald slashes) path inside the WIM image to - * extract. An empty string means the full image. - * @target: - * Filesystem path to extract the file or directory tree to. + * Calculate the actual filename component at which each WIM dentry will be + * extracted, with special handling for dentries that are unsupported by the + * extraction backend or have invalid names. * - * @extract_flags: - * WIMLIB_EXTRACT_FLAG_*. Also, the private flag - * WIMLIB_EXTRACT_FLAG_MULTI_IMAGE will be set if this is being - * called through wimlib_extract_image() with WIMLIB_ALL_IMAGES as - * the image. + * ctx->supported_features must be filled in. * - * @progress_func: - * If non-NULL, progress function for the extraction. The messages - * we may in this function are: - * - * WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN or - * WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN; - * WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN; - * WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END; - * WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY; - * WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS; - * WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS; - * WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END or - * WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END. - * - * Returns 0 on success; nonzero on failure. + * Possible error codes: WIMLIB_ERR_NOMEM, WIMLIB_ERR_INVALID_UTF16_STRING */ static int -extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target, - int extract_flags, wimlib_progress_func_t progress_func) +dentry_list_calculate_extraction_names(struct list_head *dentry_list, + struct apply_ctx *ctx) { - int ret; - struct list_head stream_list; - struct apply_args args; - const struct apply_operations *ops; - struct wim_dentry *root; + struct list_head *prev, *cur; - memset(&args, 0, sizeof(args)); + /* Can't use list_for_each_entry() because a call to + * dentry_calculate_extraction_name() may delete the current dentry and + * its children from the list. */ + prev = dentry_list; + for (;;) { + struct wim_dentry *dentry; + int ret; - args.w = wim; - args.target = target; - args.target_nchars = tstrlen(target); - args.extract_flags = extract_flags; - args.progress_func = progress_func; + cur = prev->next; + if (cur == dentry_list) + break; -#ifdef __WIN32__ - /* Work around defective behavior in Windows where paths longer than 260 - * characters are not supported by default; instead they need to be - * turned into absolute paths and prefixed with "\\?\". */ - args.target_lowlevel_path = MALLOC(32768 * sizeof(wchar_t)); - if (!args.target_lowlevel_path) - { - ret = WIMLIB_ERR_NOMEM; - goto out; - } - args.target_lowlevel_path[0] = L'\\'; - args.target_lowlevel_path[1] = L'\\'; - args.target_lowlevel_path[2] = L'?'; - args.target_lowlevel_path[3] = L'\\'; - args.target_lowlevel_path_nchars = - GetFullPathName(args.target, 32768 - 4, - &args.target_lowlevel_path[4], NULL); - - if (args.target_lowlevel_path_nchars == 0 || - args.target_lowlevel_path_nchars >= 32768 - 4) - { - WARNING("Can't get full path name for \"%ls\"", args.target); - FREE(args.target_lowlevel_path); - args.target_lowlevel_path = NULL; - } else { - args.target_lowlevel_path_nchars += 4; - } -#endif - - if (progress_func) { - args.progress.extract.wimfile_name = wim->filename; - args.progress.extract.image = wim->current_image; - args.progress.extract.extract_flags = (extract_flags & - WIMLIB_EXTRACT_MASK_PUBLIC); - args.progress.extract.image_name = wimlib_get_image_name(wim, - wim->current_image); - args.progress.extract.extract_root_wim_source_path = wim_source_path; - args.progress.extract.target = target; - } + dentry = list_entry(cur, struct wim_dentry, d_extraction_list_node); -#ifdef WITH_NTFS_3G - if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) { - args.vol = ntfs_mount(target, 0); - if (!args.vol) { - ERROR_WITH_ERRNO("Failed to mount NTFS volume `%"TS"'", - target); - ret = WIMLIB_ERR_NTFS_3G; - goto out_free_target_lowlevel_path; - } - ops = &ntfs_apply_operations; - } else -#endif - ops = &normal_apply_operations; + ret = dentry_calculate_extraction_name(dentry, ctx); + if (ret) + return ret; - root = get_dentry(wim, wim_source_path); - if (!root) { - ERROR("Path \"%"TS"\" does not exist in WIM image %d", - wim_source_path, wim->current_image); - ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST; - goto out_ntfs_umount; + if (prev->next == cur) + prev = cur; + else + ; /* Current dentry and its children (which follow in + the list) were deleted. prev stays the same. */ } - args.extract_root = root; + return 0; +} - /* Calculate the actual filename component of each extracted dentry, and - * in the process set the dentry->needs_extraction flag on dentries that - * will be extracted. */ - ret = for_dentry_in_tree(root, dentry_calculate_extraction_path, &args); +static int +dentry_resolve_streams(struct wim_dentry *dentry, int extract_flags, + struct wim_lookup_table *lookup_table) +{ + struct wim_inode *inode = dentry->d_inode; + struct wim_lookup_table_entry *lte; + int ret; + bool force = false; + + /* Special case: when extracting from a pipe, the WIM lookup table is + * initially empty, so "resolving" an inode's streams is initially not + * possible. However, we still need to keep track of which streams, + * identified by SHA1 message digests, need to be extracted, so we + * "resolve" the inode's streams anyway by allocating new entries. */ + if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) + force = true; + ret = inode_resolve_streams(inode, lookup_table, force); if (ret) - goto out_dentry_reset_needs_extraction; - - /* Build a list of the streams that need to be extracted */ - find_streams_for_extraction(root, - &stream_list, - wim->lookup_table, extract_flags); - - /* Calculate the number of bytes of data that will be extracted */ - calculate_bytes_to_extract(&stream_list, extract_flags, - &args.progress); - - if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) { - ret = extract_dentry_to_stdout(root); - goto out_dentry_reset_needs_extraction; + return ret; + for (u32 i = 0; i <= inode->i_num_ads; i++) { + lte = inode_stream_lte_resolved(inode, i); + if (lte) + lte->out_refcnt = 0; } + return 0; +} - if (progress_func) { - progress_func(*wim_source_path ? WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN : - WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN, - &args.progress); - } +/* + * For each dentry to be extracted, resolve all streams in the corresponding + * inode and set 'out_refcnt' in each to 0. + * + * Possible error codes: WIMLIB_ERR_RESOURCE_NOT_FOUND, WIMLIB_ERR_NOMEM. + */ +static int +dentry_list_resolve_streams(struct list_head *dentry_list, + struct apply_ctx *ctx) +{ + struct wim_dentry *dentry; + int ret; - /* If a sequential extraction was specified, sort the streams to be - * extracted by their position in the WIM file, so that the WIM file can - * be read sequentially. */ - if (extract_flags & WIMLIB_EXTRACT_FLAG_SEQUENTIAL) { - ret = sort_stream_list_by_wim_position(&stream_list); - if (ret != 0) { - WARNING("Falling back to non-sequential extraction"); - extract_flags &= ~WIMLIB_EXTRACT_FLAG_SEQUENTIAL; - } + list_for_each_entry(dentry, dentry_list, d_extraction_list_node) { + ret = dentry_resolve_streams(dentry, + ctx->extract_flags, + ctx->wim->lookup_table); + if (ret) + return ret; } + return 0; +} - if (progress_func) { - progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_BEGIN, - &args.progress); - } +static int +ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx, + struct wim_dentry *dentry, struct apply_ctx *ctx) +{ + struct wim_inode *inode = dentry->d_inode; + struct stream_owner *stream_owners; - /* Make the directory structure and extract empty files */ - args.extract_flags |= WIMLIB_EXTRACT_FLAG_NO_STREAMS; - args.apply_dentry = ops->apply_dentry; - ret = for_dentry_in_tree(root, maybe_apply_dentry, &args); - args.extract_flags &= ~WIMLIB_EXTRACT_FLAG_NO_STREAMS; - if (ret) - goto out_dentry_reset_needs_extraction; + if (!lte) + return 0; + + /* Tally the size only for each extraction of the stream (not hard + * links). */ + if (inode->i_visited && ctx->supported_features.hard_links) + return 0; + + ctx->progress.extract.total_bytes += lte->size; + ctx->progress.extract.num_streams++; + + if (inode->i_visited) + return 0; - if (progress_func) { - progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DIR_STRUCTURE_END, - &args.progress); + /* Add stream to the dentry_list only one time, even if it's going + * to be extracted to multiple inodes. */ + if (lte->out_refcnt == 0) { + list_add_tail(<e->extraction_list, &ctx->stream_list); + ctx->num_streams_remaining++; } - if (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX) { - args.target_realpath = realpath(target, NULL); - if (!args.target_realpath) { - ret = WIMLIB_ERR_NOMEM; - goto out_dentry_reset_needs_extraction; + /* If inode not yet been visited, append it to the stream_owners array. */ + if (lte->out_refcnt < ARRAY_LEN(lte->inline_stream_owners)) { + stream_owners = lte->inline_stream_owners; + } else { + struct stream_owner *prev_stream_owners; + size_t alloc_stream_owners; + + if (lte->out_refcnt == ARRAY_LEN(lte->inline_stream_owners)) { + prev_stream_owners = NULL; + alloc_stream_owners = ARRAY_LEN(lte->inline_stream_owners); + } else { + prev_stream_owners = lte->stream_owners; + alloc_stream_owners = lte->alloc_stream_owners; } - args.target_realpath_len = tstrlen(args.target_realpath); + + if (lte->out_refcnt == alloc_stream_owners) { + alloc_stream_owners *= 2; + stream_owners = REALLOC(prev_stream_owners, + alloc_stream_owners * + sizeof(stream_owners[0])); + if (!stream_owners) + return WIMLIB_ERR_NOMEM; + if (!prev_stream_owners) { + memcpy(stream_owners, + lte->inline_stream_owners, + sizeof(lte->inline_stream_owners)); + } + lte->stream_owners = stream_owners; + lte->alloc_stream_owners = alloc_stream_owners; + } + stream_owners = lte->stream_owners; + } + stream_owners[lte->out_refcnt].inode = inode; + if (stream_idx == 0) { + stream_owners[lte->out_refcnt].stream_name = NULL; + } else { + stream_owners[lte->out_refcnt].stream_name = + inode->i_ads_entries[stream_idx - 1].stream_name; } + lte->out_refcnt++; + return 0; +} - /* Extract non-empty files */ - ret = apply_stream_list(&stream_list, &args, ops, progress_func); - if (ret) - goto out_free_target_realpath; +static int +dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx) +{ + struct wim_inode *inode = dentry->d_inode; + int ret; + + /* The unnamed data stream will always be extracted, except in an + * unlikely case. */ + if (!inode_is_encrypted_directory(inode)) { + u16 stream_idx; + struct wim_lookup_table_entry *stream; - if (progress_func) { - progress_func(WIMLIB_PROGRESS_MSG_APPLY_TIMESTAMPS, - &args.progress); + stream = inode_unnamed_stream_resolved(inode, &stream_idx); + ret = ref_stream(stream, stream_idx, dentry, ctx); + if (ret) + return ret; } - /* Apply timestamps */ - ret = for_dentry_in_tree_depth(root, - ops->apply_dentry_timestamps, &args); - if (ret) - goto out_free_target_realpath; - - if (progress_func) { - progress_func(*wim_source_path ? WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END : - WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END, - &args.progress); - } -out_free_target_realpath: - FREE(args.target_realpath); -out_dentry_reset_needs_extraction: - for_dentry_in_tree(root, dentry_reset_needs_extraction, NULL); -out_ntfs_umount: -#ifdef WITH_NTFS_3G - /* Unmount the NTFS volume */ - if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) { - if (ntfs_umount(args.vol, FALSE) != 0) { - ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%"TS"'", - args.target); - if (ret == 0) - ret = WIMLIB_ERR_NTFS_3G; + /* Named data streams will be extracted only if supported in the current + * extraction mode and volume, and to avoid complications, if not doing + * a linked extraction. */ + if (ctx->supported_features.named_data_streams) { + for (u16 i = 0; i < inode->i_num_ads; i++) { + if (!ads_entry_is_named_stream(&inode->i_ads_entries[i])) + continue; + ret = ref_stream(inode->i_ads_entries[i].lte, i + 1, + dentry, ctx); + if (ret) + return ret; } } -#endif -out_free_target_lowlevel_path: -#ifdef __WIN32__ - FREE(args.target_lowlevel_path); -#endif -out: - return ret; + inode->i_visited = 1; + return 0; } -/* Validates a single wimlib_extract_command, mostly checking to make sure the - * extract flags make sense. */ +/* + * For each dentry to be extracted, iterate through the data streams of the + * corresponding inode. For each such stream that is not to be ignored due to + * the supported features or extraction flags, add it to the list of streams to + * be extracted (ctx->stream_list) if not already done so. + * + * Also builds a mapping from each stream to the inodes referencing it. + * + * This also initializes the extract progress info with byte and stream + * information. + * + * ctx->supported_features must be filled in. + * + * Possible error codes: WIMLIB_ERR_NOMEM. + */ static int -check_extract_command(struct wimlib_extract_command *cmd, int wim_header_flags) +dentry_list_ref_streams(struct list_head *dentry_list, struct apply_ctx *ctx) { - int extract_flags; - bool is_entire_image = (cmd->wim_source_path[0] == T('\0')); + struct wim_dentry *dentry; + int ret; - /* Empty destination path? */ - if (cmd->fs_dest_path[0] == T('\0')) - return WIMLIB_ERR_INVALID_PARAM; + list_for_each_entry(dentry, dentry_list, d_extraction_list_node) { + ret = dentry_ref_streams(dentry, ctx); + if (ret) + return ret; + } + list_for_each_entry(dentry, dentry_list, d_extraction_list_node) + dentry->d_inode->i_visited = 0; + return 0; +} - extract_flags = cmd->extract_flags; +static void +dentry_list_build_inode_alias_lists(struct list_head *dentry_list) +{ + struct wim_dentry *dentry; + struct wim_inode *inode; - /* Specified both symlink and hardlink modes? */ - if ((extract_flags & - (WIMLIB_EXTRACT_FLAG_SYMLINK | - WIMLIB_EXTRACT_FLAG_HARDLINK)) == (WIMLIB_EXTRACT_FLAG_SYMLINK | - WIMLIB_EXTRACT_FLAG_HARDLINK)) - return WIMLIB_ERR_INVALID_PARAM; + list_for_each_entry(dentry, dentry_list, d_extraction_list_node) { + inode = dentry->d_inode; + if (!inode->i_visited) + INIT_LIST_HEAD(&inode->i_extraction_aliases); + list_add_tail(&dentry->d_extraction_alias_node, + &inode->i_extraction_aliases); + inode->i_visited = 1; + } + list_for_each_entry(dentry, dentry_list, d_extraction_list_node) + dentry->d_inode->i_visited = 0; +} -#ifdef __WIN32__ - /* Wanted UNIX data on Windows? */ - if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) { - ERROR("Extracting UNIX data is not supported on Windows"); - return WIMLIB_ERR_INVALID_PARAM; +static void +inode_tally_features(const struct wim_inode *inode, + struct wim_features *features) +{ + if (inode->i_attributes & FILE_ATTRIBUTE_ARCHIVE) + features->archive_files++; + if (inode->i_attributes & FILE_ATTRIBUTE_HIDDEN) + features->hidden_files++; + if (inode->i_attributes & FILE_ATTRIBUTE_SYSTEM) + features->system_files++; + if (inode->i_attributes & FILE_ATTRIBUTE_COMPRESSED) + features->compressed_files++; + if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) { + if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) + features->encrypted_directories++; + else + features->encrypted_files++; } - /* Wanted linked extraction on Windows? (XXX This is possible, just not - * implemented yet.) */ - if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | - WIMLIB_EXTRACT_FLAG_HARDLINK)) - { - ERROR("Linked extraction modes are not supported on Windows"); - return WIMLIB_ERR_INVALID_PARAM; + if (inode->i_attributes & FILE_ATTRIBUTE_NOT_CONTENT_INDEXED) + features->not_context_indexed_files++; + if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE) + features->sparse_files++; + if (inode_has_named_stream(inode)) + features->named_data_streams++; + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + features->reparse_points++; + if (inode_is_symlink(inode)) + features->symlink_reparse_points++; + else + features->other_reparse_points++; } -#endif + if (inode->i_security_id != -1) + features->security_descriptors++; + if (inode_has_unix_data(inode)) + features->unix_data++; +} - if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) { - /* NTFS-3g extraction mode requested */ -#ifdef WITH_NTFS_3G - if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | - WIMLIB_EXTRACT_FLAG_HARDLINK))) { - ERROR("Cannot specify symlink or hardlink flags when applying\n" - " directly to a NTFS volume"); - return WIMLIB_ERR_INVALID_PARAM; - } - if (!is_entire_image && - (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS)) - { - ERROR("When applying directly to a NTFS volume you can " - "only extract a full image, not part of one"); - return WIMLIB_ERR_INVALID_PARAM; - } - if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) { - ERROR("Cannot restore UNIX-specific data in " - "the NTFS extraction mode"); - return WIMLIB_ERR_INVALID_PARAM; - } -#else - ERROR("wimlib was compiled without support for NTFS-3g, so"); - ERROR("we cannot apply a WIM image directly to a NTFS volume"); +/* Tally features necessary to extract a dentry and the corresponding inode. */ +static void +dentry_tally_features(struct wim_dentry *dentry, struct wim_features *features) +{ + struct wim_inode *inode = dentry->d_inode; + + if (dentry_has_short_name(dentry)) + features->short_names++; + + if (inode->i_visited) { + features->hard_links++; + } else { + inode_tally_features(inode, features); + inode->i_visited = 1; + } +} + +/* Tally the features necessary to extract the specified dentries. */ +static void +dentry_list_get_features(struct list_head *dentry_list, + struct wim_features *features) +{ + struct wim_dentry *dentry; + + list_for_each_entry(dentry, dentry_list, d_extraction_list_node) + dentry_tally_features(dentry, features); + + list_for_each_entry(dentry, dentry_list, d_extraction_list_node) + dentry->d_inode->i_visited = 0; +} + +static int +do_feature_check(const struct wim_features *required_features, + const struct wim_features *supported_features, + int extract_flags) +{ + /* File attributes. */ + if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES)) { + /* Note: Don't bother the user about FILE_ATTRIBUTE_ARCHIVE. + * We're an archive program, so theoretically we can do what we + * want with it. */ + + if (required_features->hidden_files && + !supported_features->hidden_files) + WARNING("Ignoring FILE_ATTRIBUTE_HIDDEN of %lu files", + required_features->hidden_files); + + if (required_features->system_files && + !supported_features->system_files) + WARNING("Ignoring FILE_ATTRIBUTE_SYSTEM of %lu files", + required_features->system_files); + + if (required_features->compressed_files && + !supported_features->compressed_files) + WARNING("Ignoring FILE_ATTRIBUTE_COMPRESSED of %lu files", + required_features->compressed_files); + + if (required_features->not_context_indexed_files && + !supported_features->not_context_indexed_files) + WARNING("Ignoring FILE_ATTRIBUTE_NOT_CONTENT_INDEXED of %lu files", + required_features->not_context_indexed_files); + + if (required_features->sparse_files && + !supported_features->sparse_files) + WARNING("Ignoring FILE_ATTRIBUTE_SPARSE_FILE of %lu files", + required_features->sparse_files); + + if (required_features->encrypted_directories && + !supported_features->encrypted_directories) + WARNING("Ignoring FILE_ATTRIBUTE_ENCRYPTED of %lu directories", + required_features->encrypted_directories); + } + + /* Encrypted files. */ + if (required_features->encrypted_files && + !supported_features->encrypted_files) + WARNING("Ignoring %lu encrypted files", + required_features->encrypted_files); + + /* Named data streams. */ + if (required_features->named_data_streams && + (!supported_features->named_data_streams)) + WARNING("Ignoring named data streams of %lu files", + required_features->named_data_streams); + + /* Hard links. */ + if (required_features->hard_links && !supported_features->hard_links) + WARNING("Extracting %lu hard links as independent files", + required_features->hard_links); + + /* Symbolic links and reparse points. */ + if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SYMLINKS) && + required_features->symlink_reparse_points && + !supported_features->symlink_reparse_points && + !supported_features->reparse_points) + { + ERROR("Extraction backend does not support symbolic links!"); return WIMLIB_ERR_UNSUPPORTED; -#endif + } + if (required_features->reparse_points && + !supported_features->reparse_points) + { + if (supported_features->symlink_reparse_points) { + if (required_features->other_reparse_points) { + WARNING("Ignoring %lu non-symlink/junction " + "reparse point files", + required_features->other_reparse_points); + } + } else { + WARNING("Ignoring %lu reparse point files", + required_features->reparse_points); + } } - if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX | - WIMLIB_EXTRACT_FLAG_NORPFIX)) == - (WIMLIB_EXTRACT_FLAG_RPFIX | WIMLIB_EXTRACT_FLAG_NORPFIX)) + /* Security descriptors. */ + if (((extract_flags & (WIMLIB_EXTRACT_FLAG_STRICT_ACLS | + WIMLIB_EXTRACT_FLAG_UNIX_DATA)) + == WIMLIB_EXTRACT_FLAG_STRICT_ACLS) && + required_features->security_descriptors && + !supported_features->security_descriptors) { - ERROR("Cannot specify RPFIX and NORPFIX flags at the same time!"); - return WIMLIB_ERR_INVALID_PARAM; + ERROR("Extraction backend does not support security descriptors!"); + return WIMLIB_ERR_UNSUPPORTED; + } + if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ACLS) && + required_features->security_descriptors && + !supported_features->security_descriptors) + WARNING("Ignoring Windows NT security descriptors of %lu files", + required_features->security_descriptors); + + /* UNIX data. */ + if ((extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) && + required_features->unix_data && !supported_features->unix_data) + { + ERROR("Extraction backend does not support UNIX data!"); + return WIMLIB_ERR_UNSUPPORTED; } - if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX | - WIMLIB_EXTRACT_FLAG_NORPFIX)) == 0) + /* DOS Names. */ + if (required_features->short_names && + !supported_features->short_names) { - /* Do reparse point fixups by default if the WIM header says - * they are enabled and we are extracting a full image. */ - if ((wim_header_flags & WIM_HDR_FLAG_RP_FIX) && is_entire_image) - extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX; + if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_SHORT_NAMES) { + ERROR("Extraction backend does not support DOS names!"); + return WIMLIB_ERR_UNSUPPORTED; + } + WARNING("Ignoring DOS names of %lu files", + required_features->short_names); } - if (!is_entire_image && (extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX)) { - ERROR("Cannot specify --rpfix when not extracting entire image"); - return WIMLIB_ERR_INVALID_PARAM; + /* Timestamps. */ + if ((extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS) && + !supported_features->timestamps) + { + ERROR("Extraction backend does not support timestamps!"); + return WIMLIB_ERR_UNSUPPORTED; } - cmd->extract_flags = extract_flags; return 0; } +static const struct apply_operations * +select_apply_operations(int extract_flags) +{ +#ifdef WITH_NTFS_3G + if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) + return &ntfs_3g_apply_ops; +#endif +#ifdef __WIN32__ + return &win32_apply_ops; +#else + return &unix_apply_ops; +#endif +} -/* Internal function to execute extraction commands for a WIM image. */ static int -do_wimlib_extract_files(WIMStruct *wim, - int image, - struct wimlib_extract_command *cmds, - size_t num_cmds, - wimlib_progress_func_t progress_func) +extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, + const tchar *target, int extract_flags) { + const struct apply_operations *ops; + struct apply_ctx *ctx; int ret; - bool found_link_cmd = false; - bool found_nolink_cmd = false; + LIST_HEAD(dentry_list); - /* Select the image from which we are extracting files */ - ret = select_wim_image(wim, image); + if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) { + ret = extract_dentries_to_stdout(trees, num_trees, + wim->lookup_table); + goto out; + } + + num_trees = remove_duplicate_trees(trees, num_trees); + num_trees = remove_contained_trees(trees, num_trees); + + ops = select_apply_operations(extract_flags); + + if (num_trees > 1 && ops->single_tree_only) { + ERROR("Extracting multiple directory trees " + "at once is not supported in %s extraction mode!", + ops->name); + ret = WIMLIB_ERR_UNSUPPORTED; + goto out; + } + + ctx = CALLOC(1, ops->context_size); + if (!ctx) { + ret = WIMLIB_ERR_NOMEM; + goto out; + } + + ctx->wim = wim; + ctx->target = target; + ctx->target_nchars = tstrlen(target); + ctx->extract_flags = extract_flags; + if (ctx->wim->progfunc) { + ctx->progfunc = ctx->wim->progfunc; + ctx->progctx = ctx->wim->progctx; + ctx->progress.extract.image = wim->current_image; + ctx->progress.extract.extract_flags = (extract_flags & + WIMLIB_EXTRACT_MASK_PUBLIC); + ctx->progress.extract.wimfile_name = wim->filename; + ctx->progress.extract.image_name = wimlib_get_image_name(wim, + wim->current_image); + ctx->progress.extract.target = target; + } + INIT_LIST_HEAD(&ctx->stream_list); + + ret = (*ops->get_supported_features)(target, &ctx->supported_features); if (ret) - return ret; + goto out_cleanup; - /* Make sure there are no streams in the WIM that have not been - * checksummed yet. */ - ret = wim_checksum_unhashed_streams(wim); + build_dentry_list(&dentry_list, trees, num_trees, + !(extract_flags & + WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE)); + + dentry_list_get_features(&dentry_list, &ctx->required_features); + + ret = do_feature_check(&ctx->required_features, &ctx->supported_features, + ctx->extract_flags); if (ret) - return ret; + goto out_cleanup; - /* Check for problems with the extraction commands */ - for (size_t i = 0; i < num_cmds; i++) { - ret = check_extract_command(&cmds[i], wim->hdr.flags); + ret = dentry_list_calculate_extraction_names(&dentry_list, ctx); + if (ret) + goto out_cleanup; + + ret = dentry_list_resolve_streams(&dentry_list, ctx); + if (ret) + goto out_cleanup; + + ret = dentry_list_ref_streams(&dentry_list, ctx); + if (ret) + goto out_cleanup; + + dentry_list_build_inode_alias_lists(&dentry_list); + + if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) { + /* When extracting from a pipe, the number of bytes of data to + * extract can't be determined in the normal way (examining the + * lookup table), since at this point all we have is a set of + * SHA1 message digests of streams that need to be extracted. + * However, we can get a reasonably accurate estimate by taking + * from the corresponding in the WIM XML + * data. This does assume that a full image is being extracted, + * but currently there is no API for doing otherwise. (Also, + * subtract from this if hard links are + * supported by the extraction mode.) */ + ctx->progress.extract.total_bytes = + wim_info_get_image_total_bytes(wim->wim_info, + wim->current_image); + if (ctx->supported_features.hard_links) { + ctx->progress.extract.total_bytes -= + wim_info_get_image_hard_link_bytes(wim->wim_info, + wim->current_image); + } + } + + ret = extract_progress(ctx, + ((extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE) ? + WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_BEGIN : + WIMLIB_PROGRESS_MSG_EXTRACT_TREE_BEGIN)); + if (ret) + goto out_cleanup; + + ret = (*ops->extract)(&dentry_list, ctx); + if (ret) + goto out_cleanup; + + if (ctx->progress.extract.completed_bytes < + ctx->progress.extract.total_bytes) + { + ctx->progress.extract.completed_bytes = + ctx->progress.extract.total_bytes; + ret = extract_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS); if (ret) - return ret; - if (cmds[i].extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | - WIMLIB_EXTRACT_FLAG_HARDLINK)) { - found_link_cmd = true; + goto out_cleanup; + } + + ret = extract_progress(ctx, + ((extract_flags & WIMLIB_EXTRACT_FLAG_IMAGEMODE) ? + WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END : + WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END)); +out_cleanup: + destroy_stream_list(&ctx->stream_list); + destroy_dentry_list(&dentry_list); + FREE(ctx); +out: + return ret; +} + +static int +mkdir_if_needed(const tchar *target) +{ + struct stat stbuf; + if (tstat(target, &stbuf)) { + if (errno == ENOENT) { + if (tmkdir(target, 0755)) { + ERROR_WITH_ERRNO("Failed to create directory " + "\"%"TS"\"", target); + return WIMLIB_ERR_MKDIR; + } } else { - found_nolink_cmd = true; - } - if (found_link_cmd && found_nolink_cmd) { - ERROR("Symlink or hardlink extraction mode must " - "be set on all extraction commands"); - return WIMLIB_ERR_INVALID_PARAM; + ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target); + return WIMLIB_ERR_STAT; } + } else if (!S_ISDIR(stbuf.st_mode)) { + ERROR("\"%"TS"\" is not a directory", target); + return WIMLIB_ERR_NOTDIR; } + return 0; +} - /* Execute the extraction commands */ - for (size_t i = 0; i < num_cmds; i++) { - ret = extract_tree(wim, - cmds[i].wim_source_path, - cmds[i].fs_dest_path, - cmds[i].extract_flags, - progress_func); - if (ret) - return ret; +/* Make sure the extraction flags make sense, and update them if needed. */ +static int +check_extract_flags(const WIMStruct *wim, int *extract_flags_p) +{ + int extract_flags = *extract_flags_p; + + /* Check for invalid flag combinations */ + + if ((extract_flags & + (WIMLIB_EXTRACT_FLAG_NO_ACLS | + WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) == (WIMLIB_EXTRACT_FLAG_NO_ACLS | + WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) + return WIMLIB_ERR_INVALID_PARAM; + + if ((extract_flags & + (WIMLIB_EXTRACT_FLAG_RPFIX | + WIMLIB_EXTRACT_FLAG_NORPFIX)) == (WIMLIB_EXTRACT_FLAG_RPFIX | + WIMLIB_EXTRACT_FLAG_NORPFIX)) + return WIMLIB_ERR_INVALID_PARAM; + +#ifndef WITH_NTFS_3G + if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) { + ERROR("wimlib was compiled without support for NTFS-3g, so\n" + " it cannot apply a WIM image directly to an NTFS volume."); + return WIMLIB_ERR_UNSUPPORTED; + } +#endif + +#ifndef __WIN32__ + if (extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT) { + ERROR("WIMBoot extraction is only supported on Windows!"); + return WIMLIB_ERR_UNSUPPORTED; } +#endif + + if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX | + WIMLIB_EXTRACT_FLAG_NORPFIX | + WIMLIB_EXTRACT_FLAG_IMAGEMODE)) == + WIMLIB_EXTRACT_FLAG_IMAGEMODE) + { + /* For full-image extraction, do reparse point fixups by default + * if the WIM header says they are enabled. */ + if (wim->hdr.flags & WIM_HDR_FLAG_RP_FIX) + extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX; + } + + *extract_flags_p = extract_flags; return 0; } -/* Extract files or directories from a WIM image. */ -WIMLIBAPI int -wimlib_extract_files(WIMStruct *wim, - int image, - const struct wimlib_extract_command *cmds, - size_t num_cmds, - int default_extract_flags, - WIMStruct **additional_swms, - unsigned num_additional_swms, - wimlib_progress_func_t progress_func) +static u32 +get_wildcard_flags(int extract_flags) +{ + u32 wildcard_flags = 0; + + if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_GLOB) + wildcard_flags |= WILDCARD_FLAG_ERROR_IF_NO_MATCH; + else + wildcard_flags |= WILDCARD_FLAG_WARN_IF_NO_MATCH; + + if (default_ignore_case) + wildcard_flags |= WILDCARD_FLAG_CASE_INSENSITIVE; + + return wildcard_flags; +} + +struct append_dentry_ctx { + struct wim_dentry **dentries; + size_t num_dentries; + size_t num_alloc_dentries; +}; + +static int +append_dentry_cb(struct wim_dentry *dentry, void *_ctx) +{ + struct append_dentry_ctx *ctx = _ctx; + + if (ctx->num_dentries == ctx->num_alloc_dentries) { + struct wim_dentry **new_dentries; + size_t new_length; + + new_length = max(ctx->num_alloc_dentries + 8, + ctx->num_alloc_dentries * 3 / 2); + new_dentries = REALLOC(ctx->dentries, + new_length * sizeof(ctx->dentries[0])); + if (new_dentries == NULL) + return WIMLIB_ERR_NOMEM; + ctx->dentries = new_dentries; + ctx->num_alloc_dentries = new_length; + } + ctx->dentries[ctx->num_dentries++] = dentry; + return 0; +} + +static int +do_wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target, + const tchar * const *paths, size_t num_paths, + int extract_flags) { int ret; - struct wimlib_extract_command *cmds_copy; - int all_flags = 0; + struct wim_dentry **trees; + size_t num_trees; - default_extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC; + if (wim == NULL || target == NULL || target[0] == T('\0') || + (num_paths != 0 && paths == NULL)) + return WIMLIB_ERR_INVALID_PARAM; - ret = verify_swm_set(wim, additional_swms, num_additional_swms); + ret = check_extract_flags(wim, &extract_flags); if (ret) - goto out; + return ret; - if (num_cmds == 0) - goto out; + ret = select_wim_image(wim, image); + if (ret) + return ret; - if (num_additional_swms) - merge_lookup_tables(wim, additional_swms, num_additional_swms); + ret = wim_checksum_unhashed_streams(wim); + if (ret) + return ret; - cmds_copy = CALLOC(num_cmds, sizeof(cmds[0])); - if (!cmds_copy) { - ret = WIMLIB_ERR_NOMEM; - goto out_restore_lookup_table; + if ((extract_flags & (WIMLIB_EXTRACT_FLAG_NTFS | + WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE)) == + (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE)) + { + ret = mkdir_if_needed(target); + if (ret) + return ret; } - for (size_t i = 0; i < num_cmds; i++) { - cmds_copy[i].extract_flags = (default_extract_flags | - cmds[i].extract_flags) - & WIMLIB_EXTRACT_MASK_PUBLIC; - all_flags |= cmds_copy[i].extract_flags; + if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) { - cmds_copy[i].wim_source_path = canonicalize_wim_path(cmds[i].wim_source_path); - if (!cmds_copy[i].wim_source_path) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_cmds_copy; + struct append_dentry_ctx append_dentry_ctx = { + .dentries = NULL, + .num_dentries = 0, + .num_alloc_dentries = 0, + }; + + u32 wildcard_flags = get_wildcard_flags(extract_flags); + + for (size_t i = 0; i < num_paths; i++) { + tchar *path = canonicalize_wim_path(paths[i]); + if (path == NULL) { + ret = WIMLIB_ERR_NOMEM; + trees = append_dentry_ctx.dentries; + goto out_free_trees; + } + ret = expand_wildcard(wim, path, + append_dentry_cb, + &append_dentry_ctx, + wildcard_flags); + FREE(path); + if (ret) { + trees = append_dentry_ctx.dentries; + goto out_free_trees; + } } + trees = append_dentry_ctx.dentries; + num_trees = append_dentry_ctx.num_dentries; + } else { + trees = MALLOC(num_paths * sizeof(trees[0])); + if (trees == NULL) + return WIMLIB_ERR_NOMEM; - cmds_copy[i].fs_dest_path = canonicalize_fs_path(cmds[i].fs_dest_path); - if (!cmds_copy[i].fs_dest_path) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_cmds_copy; + for (size_t i = 0; i < num_paths; i++) { + + tchar *path = canonicalize_wim_path(paths[i]); + if (path == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_trees; + } + + trees[i] = get_dentry(wim, path, + WIMLIB_CASE_PLATFORM_DEFAULT); + FREE(path); + if (trees[i] == NULL) { + ERROR("Path \"%"TS"\" does not exist " + "in WIM image %d", + paths[i], wim->current_image); + ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST; + goto out_free_trees; + } } + num_trees = num_paths; + } + if (num_trees == 0) { + ret = 0; + goto out_free_trees; } - ret = do_wimlib_extract_files(wim, image, - cmds_copy, num_cmds, - progress_func); - if (all_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | - WIMLIB_EXTRACT_FLAG_HARDLINK)) - { - for_lookup_table_entry(wim->lookup_table, - lte_free_extracted_file, NULL); - } -out_free_cmds_copy: - for (size_t i = 0; i < num_cmds; i++) { - FREE(cmds_copy[i].wim_source_path); - FREE(cmds_copy[i].fs_dest_path); - } - FREE(cmds_copy); -out_restore_lookup_table: - if (num_additional_swms) - unmerge_lookup_table(wim); -out: + ret = extract_trees(wim, trees, num_trees, target, extract_flags); +out_free_trees: + FREE(trees); return ret; } -/* - * Extracts an image from a WIM file. - * - * @wim: WIMStruct for the WIM file. - * - * @image: Number of the single image to extract. - * - * @target: Directory or NTFS volume to extract the image to. - * - * @extract_flags: Bitwise or of WIMLIB_EXTRACT_FLAG_*. - * - * @progress_func: If non-NULL, a progress function to be called - * periodically. - * - * Returns 0 on success; nonzero on failure. - */ static int extract_single_image(WIMStruct *wim, int image, - const tchar *target, int extract_flags, - wimlib_progress_func_t progress_func) + const tchar *target, int extract_flags) { - int ret; - tchar *target_copy = canonicalize_fs_path(target); - if (!target_copy) - return WIMLIB_ERR_NOMEM; - struct wimlib_extract_command cmd = { - .wim_source_path = T(""), - .fs_dest_path = target_copy, - .extract_flags = extract_flags, - }; - ret = do_wimlib_extract_files(wim, image, &cmd, 1, progress_func); - FREE(target_copy); - return ret; + const tchar *path = WIMLIB_WIM_ROOT_PATH; + extract_flags |= WIMLIB_EXTRACT_FLAG_IMAGEMODE; + return do_wimlib_extract_paths(wim, image, target, &path, 1, extract_flags); } static const tchar * const filename_forbidden_chars = @@ -1146,10 +1553,7 @@ image_name_ok_as_dir(const tchar *image_name) /* Extracts all images from the WIM to the directory @target, with the images * placed in subdirectories named by their image names. */ static int -extract_all_images(WIMStruct *wim, - const tchar *target, - int extract_flags, - wimlib_progress_func_t progress_func) +extract_all_images(WIMStruct *wim, const tchar *target, int extract_flags) { size_t image_name_max_len = max(xml_get_max_image_name_len(wim), 20); size_t output_path_len = tstrlen(target); @@ -1157,28 +1561,19 @@ extract_all_images(WIMStruct *wim, int ret; int image; const tchar *image_name; - struct stat stbuf; - if (tstat(target, &stbuf)) { - if (errno == ENOENT) - { - if (tmkdir(target, S_IRWXU | S_IRGRP | S_IXGRP | - S_IROTH | S_IXOTH)) - { - ERROR_WITH_ERRNO("Failed to create directory \"%"TS"\"", target); - return WIMLIB_ERR_MKDIR; - } - } else { - ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target); - return WIMLIB_ERR_STAT; - } - } else if (!S_ISDIR(stbuf.st_mode)) { - ERROR("\"%"TS"\" is not a directory", target); - return WIMLIB_ERR_NOTDIR; + extract_flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE; + + if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) { + ERROR("Cannot extract multiple images in NTFS extraction mode."); + return WIMLIB_ERR_INVALID_PARAM; } + ret = mkdir_if_needed(target); + if (ret) + return ret; tmemcpy(buf, target, output_path_len); - buf[output_path_len] = T('/'); + buf[output_path_len] = OS_PREFERRED_PATH_SEPARATOR; for (image = 1; image <= wim->hdr.image_count; image++) { image_name = wimlib_get_image_name(wim, image); if (image_name_ok_as_dir(image_name)) { @@ -1188,53 +1583,247 @@ extract_all_images(WIMStruct *wim, * Use image number instead. */ tsprintf(buf + output_path_len + 1, T("%d"), image); } - ret = extract_single_image(wim, image, buf, extract_flags, - progress_func); + ret = extract_single_image(wim, image, buf, extract_flags); if (ret) return ret; } return 0; } -/* Extracts a single image or all images from a WIM file to a directory or NTFS - * volume. */ +static int +do_wimlib_extract_image(WIMStruct *wim, int image, const tchar *target, + int extract_flags) +{ + if (extract_flags & (WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE | + WIMLIB_EXTRACT_FLAG_TO_STDOUT | + WIMLIB_EXTRACT_FLAG_GLOB_PATHS)) + return WIMLIB_ERR_INVALID_PARAM; + + if (image == WIMLIB_ALL_IMAGES) + return extract_all_images(wim, target, extract_flags); + else + return extract_single_image(wim, image, target, extract_flags); +} + + +/**************************************************************************** + * Extraction API * + ****************************************************************************/ + WIMLIBAPI int -wimlib_extract_image(WIMStruct *wim, - int image, - const tchar *target, - int extract_flags, - WIMStruct **additional_swms, - unsigned num_additional_swms, - wimlib_progress_func_t progress_func) +wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target, + const tchar * const *paths, size_t num_paths, + int extract_flags) +{ + if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC) + return WIMLIB_ERR_INVALID_PARAM; + + return do_wimlib_extract_paths(wim, image, target, paths, num_paths, + extract_flags); +} + +WIMLIBAPI int +wimlib_extract_pathlist(WIMStruct *wim, int image, const tchar *target, + const tchar *path_list_file, int extract_flags) { int ret; + tchar **paths; + size_t num_paths; + void *mem; + + ret = read_path_list_file(path_list_file, &paths, &num_paths, &mem); + if (ret) { + ERROR("Failed to read path list file \"%"TS"\"", + path_list_file); + return ret; + } - extract_flags &= WIMLIB_EXTRACT_MASK_PUBLIC; + ret = wimlib_extract_paths(wim, image, target, + (const tchar * const *)paths, num_paths, + extract_flags); + FREE(paths); + FREE(mem); + return ret; +} - ret = verify_swm_set(wim, additional_swms, num_additional_swms); +WIMLIBAPI int +wimlib_extract_image_from_pipe_with_progress(int pipe_fd, + const tchar *image_num_or_name, + const tchar *target, + int extract_flags, + wimlib_progress_func_t progfunc, + void *progctx) +{ + int ret; + WIMStruct *pwm; + struct filedes *in_fd; + int image; + unsigned i; + + if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC) + return WIMLIB_ERR_INVALID_PARAM; + + /* Read the WIM header from the pipe and get a WIMStruct to represent + * the pipable WIM. Caveats: Unlike getting a WIMStruct with + * wimlib_open_wim(), getting a WIMStruct in this way will result in + * an empty lookup table, no XML data read, and no filename set. */ + ret = open_wim_as_WIMStruct(&pipe_fd, WIMLIB_OPEN_FLAG_FROM_PIPE, &pwm, + progfunc, progctx); if (ret) return ret; - if (num_additional_swms) - merge_lookup_tables(wim, additional_swms, num_additional_swms); + /* Sanity check to make sure this is a pipable WIM. */ + if (pwm->hdr.magic != PWM_MAGIC) { + ERROR("The WIM being read from file descriptor %d " + "is not pipable!", pipe_fd); + ret = WIMLIB_ERR_NOT_PIPABLE; + goto out_wimlib_free; + } - if (image == WIMLIB_ALL_IMAGES) { - ret = extract_all_images(wim, target, - extract_flags | WIMLIB_EXTRACT_FLAG_MULTI_IMAGE, - progress_func); - } else { - ret = extract_single_image(wim, image, target, extract_flags, - progress_func); + /* Sanity check to make sure the first part of a pipable split WIM is + * sent over the pipe first. */ + if (pwm->hdr.part_number != 1) { + ERROR("The first part of the split WIM must be " + "sent over the pipe first."); + ret = WIMLIB_ERR_INVALID_PIPABLE_WIM; + goto out_wimlib_free; } - if (extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | - WIMLIB_EXTRACT_FLAG_HARDLINK)) + in_fd = &pwm->in_fd; + wimlib_assert(in_fd->offset == WIM_HEADER_DISK_SIZE); + + /* As mentioned, the WIMStruct we created from the pipe does not have + * XML data yet. Fix this by reading the extra copy of the XML data + * that directly follows the header in pipable WIMs. (Note: see + * write_pipable_wim() for more details about the format of pipable + * WIMs.) */ { - for_lookup_table_entry(wim->lookup_table, - lte_free_extracted_file, - NULL); + struct wim_lookup_table_entry xml_lte; + struct wim_resource_spec xml_rspec; + ret = read_pwm_stream_header(pwm, &xml_lte, &xml_rspec, 0, NULL); + if (ret) + goto out_wimlib_free; + + if (!(xml_lte.flags & WIM_RESHDR_FLAG_METADATA)) + { + ERROR("Expected XML data, but found non-metadata " + "stream."); + ret = WIMLIB_ERR_INVALID_PIPABLE_WIM; + goto out_wimlib_free; + } + + wim_res_spec_to_hdr(&xml_rspec, &pwm->hdr.xml_data_reshdr); + + ret = read_wim_xml_data(pwm); + if (ret) + goto out_wimlib_free; + + if (wim_info_get_num_images(pwm->wim_info) != pwm->hdr.image_count) { + ERROR("Image count in XML data is not the same as in WIM header."); + ret = WIMLIB_ERR_IMAGE_COUNT; + goto out_wimlib_free; + } + } + + /* Get image index (this may use the XML data that was just read to + * resolve an image name). */ + if (image_num_or_name) { + image = wimlib_resolve_image(pwm, image_num_or_name); + if (image == WIMLIB_NO_IMAGE) { + ERROR("\"%"TS"\" is not a valid image in the pipable WIM!", + image_num_or_name); + ret = WIMLIB_ERR_INVALID_IMAGE; + goto out_wimlib_free; + } else if (image == WIMLIB_ALL_IMAGES) { + ERROR("Applying all images from a pipe is not supported!"); + ret = WIMLIB_ERR_INVALID_IMAGE; + goto out_wimlib_free; + } + } else { + if (pwm->hdr.image_count != 1) { + ERROR("No image was specified, but the pipable WIM " + "did not contain exactly 1 image"); + ret = WIMLIB_ERR_INVALID_IMAGE; + goto out_wimlib_free; + } + image = 1; + } + + /* Load the needed metadata resource. */ + for (i = 1; i <= pwm->hdr.image_count; i++) { + struct wim_lookup_table_entry *metadata_lte; + struct wim_image_metadata *imd; + struct wim_resource_spec *metadata_rspec; + + metadata_lte = new_lookup_table_entry(); + if (metadata_lte == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto out_wimlib_free; + } + metadata_rspec = MALLOC(sizeof(struct wim_resource_spec)); + if (metadata_rspec == NULL) { + ret = WIMLIB_ERR_NOMEM; + free_lookup_table_entry(metadata_lte); + goto out_wimlib_free; + } + + ret = read_pwm_stream_header(pwm, metadata_lte, metadata_rspec, 0, NULL); + imd = pwm->image_metadata[i - 1]; + imd->metadata_lte = metadata_lte; + if (ret) { + FREE(metadata_rspec); + goto out_wimlib_free; + } + + if (!(metadata_lte->flags & WIM_RESHDR_FLAG_METADATA)) { + ERROR("Expected metadata resource, but found " + "non-metadata stream."); + ret = WIMLIB_ERR_INVALID_PIPABLE_WIM; + goto out_wimlib_free; + } + + if (i == image) { + /* Metadata resource is for the image being extracted. + * Parse it and save the metadata in memory. */ + ret = read_metadata_resource(pwm, imd); + if (ret) + goto out_wimlib_free; + imd->modified = 1; + } else { + /* Metadata resource is not for the image being + * extracted. Skip over it. */ + ret = skip_wim_stream(metadata_lte); + if (ret) + goto out_wimlib_free; + } } - if (num_additional_swms) - unmerge_lookup_table(wim); + /* Extract the image. */ + extract_flags |= WIMLIB_EXTRACT_FLAG_FROM_PIPE; + ret = do_wimlib_extract_image(pwm, image, target, extract_flags); + /* Clean up and return. */ +out_wimlib_free: + wimlib_free(pwm); return ret; } + + +WIMLIBAPI int +wimlib_extract_image_from_pipe(int pipe_fd, const tchar *image_num_or_name, + const tchar *target, int extract_flags) +{ + return wimlib_extract_image_from_pipe_with_progress(pipe_fd, + image_num_or_name, + target, + extract_flags, + NULL, + NULL); +} + +WIMLIBAPI int +wimlib_extract_image(WIMStruct *wim, int image, const tchar *target, + int extract_flags) +{ + if (extract_flags & ~WIMLIB_EXTRACT_MASK_PUBLIC) + return WIMLIB_ERR_INVALID_PARAM; + return do_wimlib_extract_image(wim, image, target, extract_flags); +}