src/lzx-decompress.c \
src/metadata_resource.c \
src/mount_image.c \
+ src/pathlist.c \
src/paths.c \
src/resource.c \
src/rbtree.c \
src/update_image.c \
src/util.c \
src/verify.c \
+ src/wildcard.c \
src/wim.c \
src/write.c \
src/xml.c \
include/wimlib/lzms.h \
include/wimlib/lzx.h \
include/wimlib/metadata.h \
+ include/wimlib/pathlist.h \
include/wimlib/paths.h \
include/wimlib/rbtree.h \
include/wimlib/reparse.h \
include/wimlib/types.h \
include/wimlib/util.h \
include/wimlib/version.h \
+ include/wimlib/wildcard.h \
include/wimlib/wim.h \
include/wimlib/write.h \
include/wimlib/xml.h \
@IMAGEX_PROGNAME@-extract \- Extract files or directories from a WIM image
.SH SYNOPSIS
\fB@IMAGEX_PROGNAME@ extract\fR \fIWIMFILE\fR \fIIMAGE\fR [\fIPATH\fR...] [\fIOPTION\fR...]
+.br
+\fB@IMAGEX_PROGNAME@ extract\fR \fIWIMFILE\fR \fIIMAGE\fR @\fILISTFILE\fR [\fIOPTION\fR...]
.SH DESCRIPTION
\fB@IMAGEX_PROGNAME@ extract\fR extracts one or more files or directory trees
from the specified \fIIMAGE\fR contained in the Windows Imaging (WIM) file
command to show what images a WIM file contains.
.PP
Each \fIPATH\fR specifies a file or directory tree within the WIM image to
-extract. See \fBPATH_SPECIFICATIONS\fR.
+extract. Alternatively, a single \fILISTFILE\fR beginning with the '@'
+character is taken as a file that itself contains a list of files or directory
+trees to extract. See \fBPATH_SPECIFICATIONS\fR.
.PP
By default, files and directories are extracted to the current directory. Use
\fB--dest-dir\fR to choose an alternate target directory. Alternatively, use
\fB@IMAGEX_PROGNAME@ extract\fR supports extracting files and directory trees
from stand-alone WIMs as well as split WIMs. See \fBSPLIT WIMS\fR.
.SH PATH SPECIFICATIONS
-Each \fIPATH\fR specifies a file or directory tree within the WIM image to
-extract. Each path must be specified as an absolute path starting from the root
-of the WIM image, like those output by the \fB@IMAGEX_PROGNAME@ dir\fR (1)
-command. However, path separators may be either forward or backward slashes,
-and the leading slash is optional; also, on Windows, the paths are treated
+Except when a single path is specified and prefixd by the '@' character, each
+\fIPATH\fR specifies a file or directory tree within the WIM image to extract.
+Each such path must be specified as an absolute path starting from the root of
+the WIM image, like those output by the \fB@IMAGEX_PROGNAME@ dir\fR (1) command.
+However, path separators may be either forward or backward slashes, and the
+leading slash is optional; also, on Windows, the paths are treated
case-insensitively, while on UNIX, paths are treated case-sensitively.
.PP
If no \fIPATH\fRs are provided, the default behavior is to extract the full
image, as if the path "/" had been provided.
+.PP
+If a single \fIPATH\fR is provided and is prefixed with the '@' character, it is
+interpreted as the path to a \fILISTFILE\fR which must be a UTF-8 text file that
+contains a list of paths (files or directories) to extract, one per line. In
+each line, leading and trailing whitespace is ignored, and lines beginning with
+the ';' character and otherwise empty lines are ignored. Each path must be
+unquoted and must specify a full path in the WIM image, as described above.
+However, unless \fB--no-wildcards\fR is specified, each path in the list file
+may also contain the wildcard characters '?' and '*', and therefore may expand
+to multiple actual files or directories. By default, paths or wildcards that
+match no wildcards only produce a warning; use \fB--strict-wildcards\fR if you
+want an error instead. Also, when using a list file, files and directories not
+located at the root of the WIM image will be extracted to a corresponding
+subdirectory of the destination directory rather than directly to the
+destination directory itself.
.SH SPLIT WIMS
You may use \fB@IMAGEX_PROGNAME@ extract\fR to extract files or directory trees
from a split WIM. This uses the \fB--refs\fR="\fIGLOB\fR" option in the same
.TP
\fB--ref\fR="\fIGLOB\fR"
File glob of additional WIMs or split WIM parts to reference resources from.
-See \fBSPLIT_WIMS\fR. Note: since \fIGLOB\fR is listed in quotes because it is
+See \fBSPLIT_WIMS\fR. Note: \fIGLOB\fR is listed in quotes because it is
interpreted by \fB@IMAGEX_PROGNAME@\fR and may need to be quoted to protect
against shell expansion.
.TP
\fB--dest-dir\fR=\fIDIR\fR
Extract the files and directories to the directory \fIDIR\fR instead of to the
current working directory.
+.TP
+\fB--no-wildcards\fR
+Do not interpret wildcard characters in paths in the \fILISTFILE\fR.
+.TP
+\fB--strict-wildcards\fR
+Fail if any wildcards or paths in \fILISTFILE\fR do not match any files in the
+WIM image. The default behavior is to warn only.
+.TP
+\fB--case-insensitive-wildcards\fR
+Treat the wildcards or paths in \fILISTFILE\fR as case-insensitive. On Windows
+this is already the default behavior, but on UNIX-like systems it is not.
.SH NOTES
See the documentation \fB@IMAGEX_PROGNAME@ apply\fR (1) for documentation about
what data and metadata are extracted on UNIX-like systems versus on Windows.
.RE
.RE
.PP
+Extract files using a list file:
+.RS
+.PP
+@IMAGEX_PROGNAME@ extract install.wim 1 @files.txt
+.RE
+.PP
+ ... where files.txt could be something like:
+.PP
+.RS
+.RS
+.nf
+Windows\\System32\\*.*
+Windows\\System32\\??-??\\*.*
+Windows\\System32\\en-US\\*.*
+.RE
+.RE
+.fi
.SH SEE ALSO
.BR @IMAGEX_PROGNAME@ (1)
.BR @IMAGEX_PROGNAME@-apply (1)
* Another function, wimlib_extract_files(), is also provided. It can extract
* certain files or directories from a WIM image, instead of a full image.
*
- * A third function, wimlib_extract_image_from_pipe(), allows an image to be
- * extracted from a pipable WIM sent over a pipe; see @ref subsec_pipable_wims.
+ * wimlib_extract_paths() and wimlib_extract_pathlist() allow extracting a set
+ * of paths from a WIM image in a manner that may be easier to use than
+ * wimlib_extract_files(), and also can wildcard characters.
+ *
+ * wimlib_extract_image_from_pipe() allows an image to be extracted from a
+ * pipable WIM sent over a pipe; see @ref subsec_pipable_wims.
*
* Note that some details of how image extraction/application works are
* documented more fully in the manual pages for <b>wimlib-imagex apply</b> and
* performance. */
#define WIMLIB_EXTRACT_FLAG_FILE_ORDER 0x00020000
+/** For wimlib_extract_paths() and wimlib_extract_pathlist() only: Treat the
+ * paths in the WIM as case-insensitive globs which may contain the characters
+ * '?' and '*'. The '?' character matches any character, whereas the '*'
+ * character matches zero or more characters in the same path component. */
+#define WIMLIB_EXTRACT_FLAG_GLOB_PATHS 0x00040000
+
+/** In combination with ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS, causes an error
+ * (::WIMLIB_ERR_PATH_DOES_NOT_EXIST) rather than a warning to be issued when
+ * one of the provided globs did not match a file. */
+#define WIMLIB_EXTRACT_FLAG_STRICT_GLOB 0x00080000
+
+/** In combination with ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS, causes the globbing to
+ * be performed case insensitively. On Windows this is already the default
+ * behavior but on UNIX-like systems it is not. */
+#define WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB 0x00100000
+
/** @} */
/** @ingroup G_mounting_wim_images
* @{ */
const wimlib_tchar *target, int extract_flags,
wimlib_progress_func_t progress_func);
+/**
+ * Similar to wimlib_extract_paths(), but the paths to extract from the WIM
+ * image specified in the UTF-8 text file @p path_list_file which itself
+ * contains the list of paths to use, one per line. Leading and trailing
+ * whitespace, and otherwise empty lines and lines beginning with the ';'
+ * character are ignored. No quotes are needed as paths are otherwise delimited
+ * by the newline character.
+ */
+extern int
+wimlib_extract_pathlist(WIMStruct *wim, int image,
+ const wimlib_tchar *target,
+ const wimlib_tchar *path_list_file,
+ int extract_flags,
+ wimlib_progress_func_t progress_func);
+
+/**
+ * Similar to wimlib_extract_files(), but the files or directories to extract
+ * from the WIM image are specified as an array of paths. Each path will be
+ * extracted to a corresponding location in @p target based on its location in
+ * the WIM image.
+ *
+ * With ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS specified in @p extract_flags, this
+ * function additionally allows paths to be globs using the wildcard characters
+ * '*' and '?'.
+ */
+extern int
+wimlib_extract_paths(WIMStruct *wim,
+ int image,
+ const wimlib_tchar *target,
+ const wimlib_tchar * const *paths,
+ size_t num_paths,
+ int extract_flags,
+ wimlib_progress_func_t progress_func);
+
/**
* @ingroup G_wim_information
*
int (*visitor)(struct wim_dentry *, void *),
void *arg);
-static inline int
+extern int
for_dentry_child(const struct wim_dentry *dentry,
int (*visitor)(struct wim_dentry *, void *),
- void *arg)
-{
- return for_dentry_in_rbtree(dentry->d_inode->i_children.rb_node,
- visitor,
- arg);
-}
+ void *arg);
extern int
for_dentry_in_tree_depth(struct wim_dentry *root,
DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf16le, tchar, utf16lechar);
#endif
+DECLARE_CHAR_CONVERSION_FUNCTIONS(utf8, tstr, char, tchar);
+DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf8, tchar, char);
+
extern int
utf8_to_tstr_simple(const char *utf8str, tchar **out);
--- /dev/null
+#ifndef _WIMLIB_PATHLIST_H
+#define _WIMLIB_PATHLIST_H
+
+#include "wimlib/types.h"
+
+extern int
+read_path_list_file(const tchar *listfile,
+ tchar ***paths_ret, size_t *num_paths_ret,
+ void **mem_ret);
+
+#endif /* _WIMLIB_PATHLIST_H */
--- /dev/null
+#ifndef _WIMLIB_WILDCARD_H
+#define _WIMLIB_WILDCARD_H
+
+#include <wimlib/types.h>
+
+#define WILDCARD_FLAG_USE_LITERAL_IF_NO_MATCHES 0x00000001
+#define WILDCARD_FLAG_WARN_IF_NO_MATCH 0x00000002
+#define WILDCARD_FLAG_ERROR_IF_NO_MATCH 0x00000004
+#define WILDCARD_FLAG_CASE_INSENSITIVE 0x00000008
+
+extern int
+expand_wildcard_wim_paths(WIMStruct *wim,
+ const char * const *wildcards,
+ size_t num_wildcards,
+ tchar ***expanded_paths_ret,
+ size_t *num_expanded_paths_ret,
+ u32 flags);
+
+#ifdef __WIN32__
+extern int
+fnmatch(const tchar *pattern, const tchar *string, int flags);
+# define FNM_CASEFOLD 0
+#else
+# include <fnmatch.h>
+# ifndef FNM_CASEFOLD
+# warning "FNM_CASEFOLD not defined!"
+# define FNM_CASEFOLD 0
+# endif
+#endif
+
+#endif /* _WIMLIB_WILDCARD_H */
#ifndef _WIMLIB_WIM_H
#define _WIMLIB_WIM_H
+#include "wimlib.h"
#include "wimlib/header.h"
#include "wimlib/types.h"
#include "wimlib/file_io.h"
void *private;
struct wimlib_decompressor *decompressor;
- enum wimlib_compression_type decompressor_ctype;
+ u8 decompressor_ctype;
u32 decompressor_max_block_size;
struct list_head subwims;
enum {
IMAGEX_ALLOW_OTHER_OPTION,
IMAGEX_BOOT_OPTION,
+ IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION,
IMAGEX_CHECK_OPTION,
IMAGEX_CHUNK_SIZE_OPTION,
IMAGEX_COMMAND_OPTION,
IMAGEX_NORPFIX_OPTION,
IMAGEX_NOCHECK_OPTION,
IMAGEX_NO_ACLS_OPTION,
+ IMAGEX_NO_WILDCARDS_OPTION,
IMAGEX_NOT_PIPABLE_OPTION,
IMAGEX_PACK_STREAMS_OPTION,
IMAGEX_PATH_OPTION,
IMAGEX_STAGING_DIR_OPTION,
IMAGEX_STREAMS_INTERFACE_OPTION,
IMAGEX_STRICT_ACLS_OPTION,
+ IMAGEX_STRICT_WILDCARDS_OPTION,
IMAGEX_SYMLINK_OPTION,
IMAGEX_THREADS_OPTION,
IMAGEX_TO_STDOUT_OPTION,
{T("dest-dir"), required_argument, NULL, IMAGEX_DEST_DIR_OPTION},
{T("to-stdout"), no_argument, NULL, IMAGEX_TO_STDOUT_OPTION},
{T("include-invalid-names"), no_argument, NULL, IMAGEX_INCLUDE_INVALID_NAMES_OPTION},
+ {T("strict-wildcards"), no_argument, NULL, IMAGEX_STRICT_WILDCARDS_OPTION},
+ {T("no-wildcards"), no_argument, NULL, IMAGEX_NO_WILDCARDS_OPTION},
+ {T("case-insensitive-wildcards"), no_argument, NULL, IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION},
{NULL, 0, NULL, 0},
};
int ret;
const tchar *wimfile;
const tchar *image_num_or_name;
+ const tchar *pathlist;
tchar *dest_dir = T(".");
int extract_flags = WIMLIB_EXTRACT_FLAG_SEQUENTIAL | WIMLIB_EXTRACT_FLAG_NORPFIX;
+ int listfile_extract_flags = WIMLIB_EXTRACT_FLAG_GLOB_PATHS;
STRING_SET(refglobs);
extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES;
extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS;
break;
+ case IMAGEX_NO_WILDCARDS_OPTION:
+ listfile_extract_flags &= ~WIMLIB_EXTRACT_FLAG_GLOB_PATHS;
+ break;
+ case IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION:
+ listfile_extract_flags |= WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB;
+ break;
+ case IMAGEX_STRICT_WILDCARDS_OPTION:
+ listfile_extract_flags |= WIMLIB_EXTRACT_FLAG_STRICT_GLOB;
+ break;
default:
goto out_usage;
}
argc -= 2;
argv += 2;
- cmds = prepare_extract_commands(argv, argc, extract_flags, dest_dir,
- &num_cmds);
- if (!cmds)
- goto out_err;
+ if (argc == 1 && argv[0][0] == T('@')) {
+ pathlist = argv[0] + 1;
+ cmds = NULL;
+ num_cmds = 0;
+ } else {
+ cmds = prepare_extract_commands(argv, argc, extract_flags, dest_dir,
+ &num_cmds);
+ if (cmds == NULL)
+ goto out_err;
+ pathlist = NULL;
+ }
ret = wimlib_open_wim(wimfile, open_flags, &wim, imagex_progress_func);
if (ret)
goto out_wimlib_free;
}
- ret = wimlib_extract_files(wim, image, cmds, num_cmds, 0,
- imagex_progress_func);
+ ret = 0;
+ if (ret == 0 && cmds != NULL) {
+ ret = wimlib_extract_files(wim, image, cmds, num_cmds, 0,
+ imagex_progress_func);
+ }
+ if (ret == 0 && pathlist != NULL) {
+ ret = wimlib_extract_pathlist(wim, image, dest_dir,
+ pathlist,
+ extract_flags | listfile_extract_flags,
+ imagex_progress_func);
+ }
if (ret == 0) {
if (!imagex_be_quiet)
imagex_printf(T("Done extracting files.\n"));
tfprintf(stderr, T("Note: You can use `%"TS"' to see what "
"files and directories\n"
" are in the WIM image.\n"),
- get_cmd_string(CMD_INFO, false));
+ get_cmd_string(CMD_DIR, false));
} else if (ret == WIMLIB_ERR_RESOURCE_NOT_FOUND) {
struct wimlib_wim_info info;
#include "wimlib/error.h"
#include "wimlib/lookup_table.h"
#include "wimlib/paths.h"
+#include "wimlib/wildcard.h"
-#ifdef __WIN32__
-# include "wimlib/win32.h" /* for fnmatch() equivalent */
-#else
-# include <fnmatch.h>
-#endif
#include <string.h>
-
static int
canonicalize_pattern(const tchar *pat, tchar **canonical_pat_ret)
{
return 0;
}
+/*
+ * Iterate over all children of @dentry, calling the function @visitor, passing
+ * it a child dentry and the extra argument @arg.
+ *
+ * Note: this function iterates over ALL child dentries, even those with the
+ * same case-insensitive name.
+ *
+ * Note: this function clobbers the tmp_list field of the child dentries. */
+int
+for_dentry_child(const struct wim_dentry *dentry,
+ int (*visitor)(struct wim_dentry *, void *),
+ void *arg)
+{
+ return for_dentry_in_rbtree(dentry->d_inode->i_children.rb_node,
+ visitor,
+ arg);
+}
+
/* Calls a function on all directory entries in a WIM dentry tree. Logically,
* this is a pre-order traversal (the function is called on a parent dentry
* before its children), but sibling dentries will be visited in order as well.
WIMLIB_ERR_INVALID_UTF16_STRING,
ERROR_WITH_ERRNO("Failed to convert UTF-16LE "
"string \"%"TS"\" to UTF-8 string!", in),
- static)
+ )
DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
tstr, "UTF-16LE", tchar,
WIMLIB_ERR_INVALID_UTF8_STRING,
ERROR_WITH_ERRNO("Failed to convert UTF-8 string "
"to UTF-16LE string!"),
- static)
+ )
#else
/* UNIX */
ERROR("If the data you provided was UTF-8, please make sure "
"the character\n"
" encoding of your current locale is UTF-8."),
- static)
+ )
DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
tstr, "", tchar,
ERROR("This may be because the UTF-8 data "
"could not be represented\n"
" in your locale's character encoding."),
- static)
+ )
#endif
int
#include "wimlib/error.h"
#include "wimlib/lookup_table.h"
#include "wimlib/metadata.h"
+#include "wimlib/pathlist.h"
#include "wimlib/paths.h"
#include "wimlib/reparse.h"
#include "wimlib/resource.h"
# include "wimlib/win32.h" /* for realpath() equivalent */
#endif
#include "wimlib/xml.h"
+#include "wimlib/wildcard.h"
#include "wimlib/wim.h"
#include <errno.h>
WIMLIB_EXTRACT_FLAG_HARDLINK))
return WIMLIB_ERR_INVALID_PARAM;
+ if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS)
+ return WIMLIB_ERR_INVALID_PARAM;
+
if ((extract_flags &
(WIMLIB_EXTRACT_FLAG_NO_ACLS |
WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) == (WIMLIB_EXTRACT_FLAG_NO_ACLS |
return do_wimlib_extract_image(wim, image, target, extract_flags,
progress_func);
}
+
+/* API function documented in wimlib.h */
+WIMLIBAPI int
+wimlib_extract_pathlist(WIMStruct *wim, int image,
+ const tchar *target,
+ const tchar *path_list_file,
+ int extract_flags,
+ wimlib_progress_func_t progress_func)
+{
+ int ret;
+ tchar **paths;
+ size_t num_paths;
+ void *mem;
+
+ ret = read_path_list_file(path_list_file, &paths, &num_paths, &mem);
+ if (ret)
+ return ret;
+
+ ret = wimlib_extract_paths(wim, image, target,
+ (const tchar * const *)paths, num_paths,
+ extract_flags, progress_func);
+ FREE(paths);
+ FREE(mem);
+ return ret;
+}
+
+/* API function documented in wimlib.h */
+WIMLIBAPI int
+wimlib_extract_paths(WIMStruct *wim,
+ int image,
+ const tchar *target,
+ const tchar * const *paths,
+ size_t num_paths,
+ int extract_flags,
+ wimlib_progress_func_t progress_func)
+{
+ int ret;
+ tchar **expanded_paths;
+ size_t num_expanded_paths;
+ struct wimlib_extract_command *cmds;
+
+ ret = select_wim_image(wim, image);
+ if (ret)
+ return ret;
+
+ if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) {
+ int wildcard_flags = 0;
+
+ if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_GLOB)
+ wildcard_flags |= WILDCARD_FLAG_ERROR_IF_NO_MATCH;
+ else
+ wildcard_flags |= WILDCARD_FLAG_WARN_IF_NO_MATCH;
+
+ if (extract_flags & WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB)
+ wildcard_flags |= WILDCARD_FLAG_CASE_INSENSITIVE;
+
+ ret = expand_wildcard_wim_paths(wim, paths, num_paths,
+ &expanded_paths,
+ &num_expanded_paths,
+ wildcard_flags);
+ if (ret)
+ return ret;
+ } else {
+ expanded_paths = (tchar**)paths;
+ num_expanded_paths = num_paths;
+ }
+
+ cmds = CALLOC(num_expanded_paths, sizeof(cmds[0]));
+ if (cmds == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_free_expanded_paths;
+ }
+
+ for (size_t i = 0; i < num_expanded_paths; i++) {
+ cmds[i].wim_source_path = expanded_paths[i];
+ cmds[i].extract_flags = 0;
+
+ tchar *dest_path;
+ size_t dest_len = 0;
+ dest_len += tstrlen(target);
+ dest_len += 1;
+ dest_len += tstrlen(expanded_paths[i]);
+ dest_len += 1;
+
+ dest_path = MALLOC(dest_len * sizeof(tchar));
+ if (dest_path == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_free_extraction_cmds;
+ }
+ tchar *p = dest_path;
+ p = tmempcpy(p, target, tstrlen(target));
+ *p++ = OS_PREFERRED_PATH_SEPARATOR;
+ for (tchar *path_p = expanded_paths[i]; *path_p != '\0'; path_p++) {
+ if (is_any_path_separator(*path_p))
+ *p++ = OS_PREFERRED_PATH_SEPARATOR;
+ else
+ *p++ = *path_p;
+ }
+ *p++ = T('\0');
+ wimlib_assert(p - dest_path == dest_len);
+ cmds[i].fs_dest_path = dest_path;
+ }
+
+ ret = wimlib_extract_files(wim, image,
+ cmds, num_expanded_paths,
+ extract_flags & ~WIMLIB_EXTRACT_FLAG_GLOB_PATHS,
+ progress_func);
+out_free_extraction_cmds:
+ for (size_t i = 0; i < num_expanded_paths; i++)
+ FREE(cmds[i].fs_dest_path);
+ FREE(cmds);
+out_free_expanded_paths:
+ if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) {
+ for (size_t i = 0; i < num_expanded_paths; i++)
+ FREE(expanded_paths[i]);
+ FREE(expanded_paths);
+ }
+ return ret;
+}
--- /dev/null
+/*
+ * pathlist.c
+ *
+ * Utility function for reading path list files.
+ */
+
+/*
+ * Copyright (C) 2013 Eric Biggers
+ *
+ * This file is part of wimlib, a library for working with WIM files.
+ *
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "wimlib/encoding.h"
+#include "wimlib/error.h"
+#include "wimlib/file_io.h"
+#include "wimlib/pathlist.h"
+#include "wimlib/util.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+static int
+read_file_contents(const tchar *path, char **buf_ret, size_t *bufsize_ret)
+{
+ int raw_fd;
+ struct filedes fd;
+ struct stat st;
+ void *buf;
+ int ret;
+ int errno_save;
+
+ raw_fd = topen(path, O_RDONLY | O_BINARY);
+ if (raw_fd < 0) {
+ ERROR_WITH_ERRNO("Can't open \"%"TS"\"", path);
+ return WIMLIB_ERR_OPEN;
+ }
+ if (fstat(raw_fd, &st)) {
+ ERROR_WITH_ERRNO("Can't stat \"%"TS"\"", path);
+ close(raw_fd);
+ return WIMLIB_ERR_STAT;
+ }
+ if ((size_t)st.st_size != st.st_size ||
+ (buf = MALLOC(st.st_size)) == NULL)
+ {
+ close(raw_fd);
+ ERROR("Not enough memory to read \"%"TS"\"", path);
+ return WIMLIB_ERR_NOMEM;
+ }
+
+ filedes_init(&fd, raw_fd);
+ ret = full_read(&fd, buf, st.st_size);
+ errno_save = errno;
+ filedes_close(&fd);
+ errno = errno_save;
+ if (ret) {
+ ERROR_WITH_ERRNO("Error reading \"%"TS"\"", path);
+ FREE(buf);
+ return ret;
+ }
+
+ *buf_ret = buf;
+ *bufsize_ret = st.st_size;
+ return 0;
+}
+
+static int
+read_utf8_file_contents(const tchar *path, tchar **buf_ret, size_t *buflen_ret)
+{
+ int ret;
+ char *buf_utf8;
+ size_t bufsize_utf8;
+ tchar *buf_tstr;
+ size_t bufsize_tstr;
+
+ ret = read_file_contents(path, &buf_utf8, &bufsize_utf8);
+ if (ret)
+ return ret;
+
+ ret = utf8_to_tstr(buf_utf8, bufsize_utf8, &buf_tstr, &bufsize_tstr);
+ FREE(buf_utf8);
+ if (ret)
+ return ret;
+
+ *buf_ret = buf_tstr;
+ *buflen_ret = bufsize_tstr / sizeof(tchar);
+ return 0;
+}
+
+static int
+parse_path_list_file(tchar *buf, size_t buflen,
+ tchar ***paths_ret, size_t *num_paths_ret)
+{
+ tchar **paths = NULL;
+ size_t num_paths = 0;
+ size_t num_alloc_paths = 0;
+ tchar *nl;
+ tchar *p;
+
+ for (p = buf; p != buf + buflen; p = nl + 1) {
+ tchar *line_begin, *line_end;
+ size_t line_len;
+
+ nl = tmemchr(p, T('\n'), buf + buflen - p);
+ if (nl == NULL)
+ break;
+
+ line_begin = p;
+ line_end = nl;
+
+ /* Ignore leading whitespace. */
+ while (line_begin < nl && istspace(*line_begin))
+ line_begin++;
+
+ /* Ignore trailing whitespace. */
+ while (line_end > line_begin && istspace(*(line_end - 1)))
+ line_end--;
+
+ line_len = line_end - line_begin;
+
+ /* Ignore comments and empty lines. */
+ if (line_len == 0 || *line_begin == T(';'))
+ continue;
+
+ if (num_paths == num_alloc_paths) {
+ tchar **new_paths;
+ size_t new_num_alloc_paths = max(num_alloc_paths + 8,
+ num_alloc_paths * 3 / 2);
+
+ new_paths = REALLOC(paths, new_num_alloc_paths *
+ sizeof(paths[0]));
+ if (new_paths == NULL)
+ goto oom;
+ paths = new_paths;
+ num_alloc_paths = new_num_alloc_paths;
+ }
+
+ *line_end = T('\0');
+ paths[num_paths++] = line_begin;
+ }
+
+ *paths_ret = paths;
+ *num_paths_ret = num_paths;
+ return 0;
+
+oom:
+ FREE(paths);
+ return WIMLIB_ERR_NOMEM;
+}
+
+int
+read_path_list_file(const tchar *listfile,
+ tchar ***paths_ret, size_t *num_paths_ret,
+ void **mem_ret)
+{
+ int ret;
+ tchar *buf;
+ size_t buflen;
+
+ ret = read_utf8_file_contents(listfile, &buf, &buflen);
+ if (ret)
+ return ret;
+
+ ret = parse_path_list_file(buf, buflen, paths_ret, num_paths_ret);
+ if (ret) {
+ FREE(buf);
+ return ret;
+ }
+ *mem_ret = buf;
+ return 0;
+}
--- /dev/null
+/*
+ * wildcard.c
+ *
+ * Wildcard matching functions.
+ */
+
+/*
+ * Copyright (C) 2013 Eric Biggers
+ *
+ * This file is part of wimlib, a library for working with WIM files.
+ *
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "wimlib/dentry.h"
+#include "wimlib/encoding.h"
+#include "wimlib/error.h"
+#include "wimlib/metadata.h"
+#include "wimlib/wildcard.h"
+
+struct match_dentry_ctx {
+ int (*consume_path)(const tchar *, void *, bool);
+ void *consume_path_ctx;
+ size_t consume_path_count;
+ tchar *expanded_path;
+ size_t expanded_path_len;
+ size_t expanded_path_alloc_len;
+ tchar *wildcard_path;
+ size_t cur_component_offset;
+ size_t cur_component_len;
+ bool case_insensitive;
+};
+
+static bool
+match_wildcard(const tchar *string, tchar *wildcard,
+ size_t wildcard_len, bool case_insensitive)
+{
+ char orig;
+ int flags;
+ int ret;
+
+ orig = wildcard[wildcard_len];
+ wildcard[wildcard_len] = T('\0');
+
+ /* Warning: in Windows builds fnmatch() calls a replacement function.
+ * Also, FNM_CASEFOLD is a GNU extension and it is defined to 0 if not
+ * available. */
+ flags = FNM_NOESCAPE;
+ if (case_insensitive)
+ flags |= FNM_CASEFOLD;
+ ret = fnmatch(wildcard, string, flags);
+
+ wildcard[wildcard_len] = orig;
+ return (ret == 0);
+}
+
+static int
+expand_wildcard_recursive(struct wim_dentry *cur_dentry,
+ struct match_dentry_ctx *ctx);
+
+enum {
+ WILDCARD_STATUS_DONE_FULLY,
+ WILDCARD_STATUS_DONE_TRAILING_SLASHES,
+ WILDCARD_STATUS_NOT_DONE,
+};
+
+static int
+wildcard_status(const tchar *wildcard)
+{
+ if (*wildcard == T('\0'))
+ return WILDCARD_STATUS_DONE_FULLY;
+ while (is_any_path_separator(*wildcard))
+ wildcard++;
+ if (*wildcard == T('\0'))
+ return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
+
+ return WILDCARD_STATUS_NOT_DONE;
+}
+
+static int
+match_dentry(struct wim_dentry *cur_dentry, void *_ctx)
+{
+ struct match_dentry_ctx *ctx = _ctx;
+ tchar *name;
+ size_t name_len;
+ int ret;
+
+ if (cur_dentry->file_name_nbytes == 0)
+ return 0;
+
+#if TCHAR_IS_UTF16LE
+ name = cur_dentry->file_name;
+ name_len = cur_dentry->file_name_nbytes;
+#else
+ ret = utf16le_to_tstr(cur_dentry->file_name,
+ cur_dentry->file_name_nbytes,
+ &name, &name_len);
+ if (ret)
+ return ret;
+#endif
+ name_len /= sizeof(tchar);
+
+ if (match_wildcard(name,
+ &ctx->wildcard_path[ctx->cur_component_offset],
+ ctx->cur_component_len,
+ ctx->case_insensitive))
+ {
+ size_t len_needed = ctx->expanded_path_len + 1 + name_len + 1;
+ size_t expanded_path_len_save;
+
+ if (len_needed > ctx->expanded_path_alloc_len) {
+ tchar *expanded_path;
+
+ expanded_path = REALLOC(ctx->expanded_path,
+ len_needed * sizeof(ctx->expanded_path[0]));
+ if (expanded_path == NULL) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_free_name;
+ }
+ ctx->expanded_path = expanded_path;
+ ctx->expanded_path_alloc_len = len_needed;
+ }
+ expanded_path_len_save = ctx->expanded_path_len;
+
+ ctx->expanded_path[ctx->expanded_path_len++] = WIM_PATH_SEPARATOR;
+ tmemcpy(&ctx->expanded_path[ctx->expanded_path_len],
+ name, name_len);
+ ctx->expanded_path_len += name_len;
+ ctx->expanded_path[ctx->expanded_path_len] = T('\0');
+
+ switch (wildcard_status(&ctx->wildcard_path[
+ ctx->cur_component_offset +
+ ctx->cur_component_len]))
+ {
+ case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
+ if (!dentry_is_directory(cur_dentry)) {
+ ret = 0;
+ break;
+ }
+ /* Fall through */
+ case WILDCARD_STATUS_DONE_FULLY:
+ ret = (*ctx->consume_path)(ctx->expanded_path,
+ ctx->consume_path_ctx,
+ false);
+ ctx->consume_path_count++;
+ break;
+ case WILDCARD_STATUS_NOT_DONE:
+ ret = expand_wildcard_recursive(cur_dentry, ctx);
+ break;
+ }
+ ctx->expanded_path_len = expanded_path_len_save;
+ ctx->expanded_path[expanded_path_len_save] = T('\0');
+ } else {
+ ret = 0;
+ }
+
+out_free_name:
+#if !TCHAR_IS_UTF16LE
+ FREE(name);
+#endif
+ return ret;
+}
+
+static int
+expand_wildcard_recursive(struct wim_dentry *cur_dentry,
+ struct match_dentry_ctx *ctx)
+{
+ tchar *w;
+ size_t begin;
+ size_t end;
+ size_t len;
+ size_t offset_save;
+ size_t len_save;
+ int ret;
+
+ w = ctx->wildcard_path;
+
+ begin = ctx->cur_component_offset + ctx->cur_component_len;
+ while (is_any_path_separator(w[begin]))
+ begin++;
+
+ end = begin;
+
+ while (w[end] != T('\0') && !is_any_path_separator(w[end]))
+ end++;
+
+ len = end - begin;
+
+ if (len == 0)
+ return 0;
+
+ offset_save = ctx->cur_component_offset;
+ len_save = ctx->cur_component_len;
+
+ ctx->cur_component_offset = begin;
+ ctx->cur_component_len = len;
+
+ ret = for_dentry_child(cur_dentry, match_dentry, ctx);
+
+ ctx->cur_component_len = len_save;
+ ctx->cur_component_offset = offset_save;
+
+ return ret;
+}
+
+static int
+expand_wildcard(WIMStruct *wim,
+ const tchar *wildcard_path,
+ int (*consume_path)(const tchar *, void *, bool),
+ void *consume_path_ctx,
+ u32 flags)
+{
+ struct wim_dentry *root;
+ int ret;
+
+ root = wim_root_dentry(wim);
+ if (root == NULL)
+ goto no_match;
+
+ struct match_dentry_ctx ctx = {
+ .consume_path = consume_path,
+ .consume_path_ctx = consume_path_ctx,
+ .consume_path_count = 0,
+ .expanded_path = MALLOC(256 * sizeof(ctx.expanded_path[0])),
+ .expanded_path_len = 0,
+ .expanded_path_alloc_len = 256,
+ .wildcard_path = TSTRDUP(wildcard_path),
+ .cur_component_offset = 0,
+ .cur_component_len = 0,
+ .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
+ };
+
+ if (ctx.expanded_path == NULL || ctx.wildcard_path == NULL) {
+ FREE(ctx.expanded_path);
+ FREE(ctx.wildcard_path);
+ return WIMLIB_ERR_NOMEM;
+ }
+
+ ret = expand_wildcard_recursive(root, &ctx);
+ FREE(ctx.expanded_path);
+ FREE(ctx.wildcard_path);
+ if (ret == 0 && ctx.consume_path_count == 0)
+ goto no_match;
+ return ret;
+
+no_match:
+ ret = 0;
+ if (flags & WILDCARD_FLAG_USE_LITERAL_IF_NO_MATCHES)
+ ret = (*consume_path)(wildcard_path, consume_path_ctx, true);
+
+ if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
+ WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
+
+ if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
+ ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
+ ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
+ }
+ return ret;
+}
+
+struct expanded_paths_ctx {
+ tchar **expanded_paths;
+ size_t num_expanded_paths;
+ size_t alloc_length;
+};
+
+static int
+append_path_cb(const tchar *path, void *_ctx, bool may_need_trans)
+{
+ struct expanded_paths_ctx *ctx = _ctx;
+ tchar *path_dup;
+
+ if (ctx->num_expanded_paths == ctx->alloc_length) {
+ tchar **new_paths;
+ size_t new_alloc_length = max(ctx->alloc_length + 8,
+ ctx->alloc_length * 3 / 2);
+
+ new_paths = REALLOC(ctx->expanded_paths,
+ new_alloc_length * sizeof(new_paths[0]));
+ if (new_paths == NULL)
+ return WIMLIB_ERR_NOMEM;
+ ctx->expanded_paths = new_paths;
+ ctx->alloc_length = new_alloc_length;
+ }
+ path_dup = TSTRDUP(path);
+ if (path_dup == NULL)
+ return WIMLIB_ERR_NOMEM;
+ if (may_need_trans) {
+ for (tchar *p = path_dup; *p; p++)
+ if (is_any_path_separator(*p))
+ *p = WIM_PATH_SEPARATOR;
+ }
+ ctx->expanded_paths[ctx->num_expanded_paths++] = path_dup;
+ return 0;
+}
+
+int
+expand_wildcard_wim_paths(WIMStruct *wim,
+ const char * const *wildcards,
+ size_t num_wildcards,
+ tchar ***expanded_paths_ret,
+ size_t *num_expanded_paths_ret,
+ u32 flags)
+{
+ int ret;
+ struct expanded_paths_ctx ctx = {
+ .expanded_paths = NULL,
+ .num_expanded_paths = 0,
+ .alloc_length = 0,
+ };
+ for (size_t i = 0; i < num_wildcards; i++) {
+ ret = expand_wildcard(wim, wildcards[i], append_path_cb, &ctx,
+ flags);
+ if (ret)
+ goto out_free;
+ }
+ *expanded_paths_ret = ctx.expanded_paths;
+ *num_expanded_paths_ret = ctx.num_expanded_paths;
+ return 0;
+
+out_free:
+ for (size_t i = 0; i < ctx.num_expanded_paths; i++)
+ FREE(ctx.expanded_paths[i]);
+ FREE(ctx.expanded_paths);
+ return ret;
+}