From d9675dd5814394373d9871c6e9b7b35325b3d21d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 27 Dec 2013 18:29:31 -0600 Subject: [PATCH] Add support for extract list files --- Makefile.am | 4 + doc/imagex-extract.1.in | 62 ++++++- include/wimlib.h | 58 ++++++- include/wimlib/dentry.h | 9 +- include/wimlib/encoding.h | 3 + include/wimlib/pathlist.h | 11 ++ include/wimlib/wildcard.h | 31 ++++ include/wimlib/wim.h | 3 +- programs/imagex.c | 47 +++++- src/capture_common.c | 7 +- src/dentry.c | 18 ++ src/encoding.c | 8 +- src/extract.c | 124 ++++++++++++++ src/pathlist.c | 191 +++++++++++++++++++++ src/wildcard.c | 341 ++++++++++++++++++++++++++++++++++++++ 15 files changed, 883 insertions(+), 34 deletions(-) create mode 100644 include/wimlib/pathlist.h create mode 100644 include/wimlib/wildcard.h create mode 100644 src/pathlist.c create mode 100644 src/wildcard.c diff --git a/Makefile.am b/Makefile.am index 2602100f..b41b826d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -52,6 +52,7 @@ libwim_la_SOURCES = \ src/lzx-decompress.c \ src/metadata_resource.c \ src/mount_image.c \ + src/pathlist.c \ src/paths.c \ src/resource.c \ src/rbtree.c \ @@ -63,6 +64,7 @@ libwim_la_SOURCES = \ src/update_image.c \ src/util.c \ src/verify.c \ + src/wildcard.c \ src/wim.c \ src/write.c \ src/xml.c \ @@ -91,6 +93,7 @@ libwim_la_SOURCES = \ include/wimlib/lzms.h \ include/wimlib/lzx.h \ include/wimlib/metadata.h \ + include/wimlib/pathlist.h \ include/wimlib/paths.h \ include/wimlib/rbtree.h \ include/wimlib/reparse.h \ @@ -102,6 +105,7 @@ libwim_la_SOURCES = \ include/wimlib/types.h \ include/wimlib/util.h \ include/wimlib/version.h \ + include/wimlib/wildcard.h \ include/wimlib/wim.h \ include/wimlib/write.h \ include/wimlib/xml.h \ diff --git a/doc/imagex-extract.1.in b/doc/imagex-extract.1.in index 0a1fca4d..6d21cee4 100644 --- a/doc/imagex-extract.1.in +++ b/doc/imagex-extract.1.in @@ -3,6 +3,8 @@ @IMAGEX_PROGNAME@-extract \- Extract files or directories from a WIM image .SH SYNOPSIS \fB@IMAGEX_PROGNAME@ extract\fR \fIWIMFILE\fR \fIIMAGE\fR [\fIPATH\fR...] [\fIOPTION\fR...] +.br +\fB@IMAGEX_PROGNAME@ extract\fR \fIWIMFILE\fR \fIIMAGE\fR @\fILISTFILE\fR [\fIOPTION\fR...] .SH DESCRIPTION \fB@IMAGEX_PROGNAME@ extract\fR extracts one or more files or directory trees from the specified \fIIMAGE\fR contained in the Windows Imaging (WIM) file @@ -20,7 +22,9 @@ the name of an image in the WIM. Use the \fB@IMAGEX_PROGNAME@ info\fR (1) command to show what images a WIM file contains. .PP Each \fIPATH\fR specifies a file or directory tree within the WIM image to -extract. See \fBPATH_SPECIFICATIONS\fR. +extract. Alternatively, a single \fILISTFILE\fR beginning with the '@' +character is taken as a file that itself contains a list of files or directory +trees to extract. See \fBPATH_SPECIFICATIONS\fR. .PP By default, files and directories are extracted to the current directory. Use \fB--dest-dir\fR to choose an alternate target directory. Alternatively, use @@ -30,15 +34,31 @@ program. \fB@IMAGEX_PROGNAME@ extract\fR supports extracting files and directory trees from stand-alone WIMs as well as split WIMs. See \fBSPLIT WIMS\fR. .SH PATH SPECIFICATIONS -Each \fIPATH\fR specifies a file or directory tree within the WIM image to -extract. Each path must be specified as an absolute path starting from the root -of the WIM image, like those output by the \fB@IMAGEX_PROGNAME@ dir\fR (1) -command. However, path separators may be either forward or backward slashes, -and the leading slash is optional; also, on Windows, the paths are treated +Except when a single path is specified and prefixd by the '@' character, each +\fIPATH\fR specifies a file or directory tree within the WIM image to extract. +Each such path must be specified as an absolute path starting from the root of +the WIM image, like those output by the \fB@IMAGEX_PROGNAME@ dir\fR (1) command. +However, path separators may be either forward or backward slashes, and the +leading slash is optional; also, on Windows, the paths are treated case-insensitively, while on UNIX, paths are treated case-sensitively. .PP If no \fIPATH\fRs are provided, the default behavior is to extract the full image, as if the path "/" had been provided. +.PP +If a single \fIPATH\fR is provided and is prefixed with the '@' character, it is +interpreted as the path to a \fILISTFILE\fR which must be a UTF-8 text file that +contains a list of paths (files or directories) to extract, one per line. In +each line, leading and trailing whitespace is ignored, and lines beginning with +the ';' character and otherwise empty lines are ignored. Each path must be +unquoted and must specify a full path in the WIM image, as described above. +However, unless \fB--no-wildcards\fR is specified, each path in the list file +may also contain the wildcard characters '?' and '*', and therefore may expand +to multiple actual files or directories. By default, paths or wildcards that +match no wildcards only produce a warning; use \fB--strict-wildcards\fR if you +want an error instead. Also, when using a list file, files and directories not +located at the root of the WIM image will be extracted to a corresponding +subdirectory of the destination directory rather than directly to the +destination directory itself. .SH SPLIT WIMS You may use \fB@IMAGEX_PROGNAME@ extract\fR to extract files or directory trees from a split WIM. This uses the \fB--refs\fR="\fIGLOB\fR" option in the same @@ -52,7 +72,7 @@ present. .TP \fB--ref\fR="\fIGLOB\fR" File glob of additional WIMs or split WIM parts to reference resources from. -See \fBSPLIT_WIMS\fR. Note: since \fIGLOB\fR is listed in quotes because it is +See \fBSPLIT_WIMS\fR. Note: \fIGLOB\fR is listed in quotes because it is interpreted by \fB@IMAGEX_PROGNAME@\fR and may need to be quoted to protect against shell expansion. .TP @@ -77,6 +97,17 @@ extracted. \fB--dest-dir\fR=\fIDIR\fR Extract the files and directories to the directory \fIDIR\fR instead of to the current working directory. +.TP +\fB--no-wildcards\fR +Do not interpret wildcard characters in paths in the \fILISTFILE\fR. +.TP +\fB--strict-wildcards\fR +Fail if any wildcards or paths in \fILISTFILE\fR do not match any files in the +WIM image. The default behavior is to warn only. +.TP +\fB--case-insensitive-wildcards\fR +Treat the wildcards or paths in \fILISTFILE\fR as case-insensitive. On Windows +this is already the default behavior, but on UNIX-like systems it is not. .SH NOTES See the documentation \fB@IMAGEX_PROGNAME@ apply\fR (1) for documentation about what data and metadata are extracted on UNIX-like systems versus on Windows. @@ -143,6 +174,23 @@ Extract multiple files and directories in one command: .RE .RE .PP +Extract files using a list file: +.RS +.PP +@IMAGEX_PROGNAME@ extract install.wim 1 @files.txt +.RE +.PP + ... where files.txt could be something like: +.PP +.RS +.RS +.nf +Windows\\System32\\*.* +Windows\\System32\\??-??\\*.* +Windows\\System32\\en-US\\*.* +.RE +.RE +.fi .SH SEE ALSO .BR @IMAGEX_PROGNAME@ (1) .BR @IMAGEX_PROGNAME@-apply (1) diff --git a/include/wimlib.h b/include/wimlib.h index 93f7f359..512c6149 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -262,8 +262,12 @@ * Another function, wimlib_extract_files(), is also provided. It can extract * certain files or directories from a WIM image, instead of a full image. * - * A third function, wimlib_extract_image_from_pipe(), allows an image to be - * extracted from a pipable WIM sent over a pipe; see @ref subsec_pipable_wims. + * wimlib_extract_paths() and wimlib_extract_pathlist() allow extracting a set + * of paths from a WIM image in a manner that may be easier to use than + * wimlib_extract_files(), and also can wildcard characters. + * + * wimlib_extract_image_from_pipe() allows an image to be extracted from a + * pipable WIM sent over a pipe; see @ref subsec_pipable_wims. * * Note that some details of how image extraction/application works are * documented more fully in the manual pages for wimlib-imagex apply and @@ -1382,6 +1386,22 @@ typedef int (*wimlib_iterate_lookup_table_callback_t)(const struct wimlib_resour * performance. */ #define WIMLIB_EXTRACT_FLAG_FILE_ORDER 0x00020000 +/** For wimlib_extract_paths() and wimlib_extract_pathlist() only: Treat the + * paths in the WIM as case-insensitive globs which may contain the characters + * '?' and '*'. The '?' character matches any character, whereas the '*' + * character matches zero or more characters in the same path component. */ +#define WIMLIB_EXTRACT_FLAG_GLOB_PATHS 0x00040000 + +/** In combination with ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS, causes an error + * (::WIMLIB_ERR_PATH_DOES_NOT_EXIST) rather than a warning to be issued when + * one of the provided globs did not match a file. */ +#define WIMLIB_EXTRACT_FLAG_STRICT_GLOB 0x00080000 + +/** In combination with ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS, causes the globbing to + * be performed case insensitively. On Windows this is already the default + * behavior but on UNIX-like systems it is not. */ +#define WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB 0x00100000 + /** @} */ /** @ingroup G_mounting_wim_images * @{ */ @@ -2328,6 +2348,40 @@ wimlib_extract_image_from_pipe(int pipe_fd, const wimlib_tchar *target, int extract_flags, wimlib_progress_func_t progress_func); +/** + * Similar to wimlib_extract_paths(), but the paths to extract from the WIM + * image specified in the UTF-8 text file @p path_list_file which itself + * contains the list of paths to use, one per line. Leading and trailing + * whitespace, and otherwise empty lines and lines beginning with the ';' + * character are ignored. No quotes are needed as paths are otherwise delimited + * by the newline character. + */ +extern int +wimlib_extract_pathlist(WIMStruct *wim, int image, + const wimlib_tchar *target, + const wimlib_tchar *path_list_file, + int extract_flags, + wimlib_progress_func_t progress_func); + +/** + * Similar to wimlib_extract_files(), but the files or directories to extract + * from the WIM image are specified as an array of paths. Each path will be + * extracted to a corresponding location in @p target based on its location in + * the WIM image. + * + * With ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS specified in @p extract_flags, this + * function additionally allows paths to be globs using the wildcard characters + * '*' and '?'. + */ +extern int +wimlib_extract_paths(WIMStruct *wim, + int image, + const wimlib_tchar *target, + const wimlib_tchar * const *paths, + size_t num_paths, + int extract_flags, + wimlib_progress_func_t progress_func); + /** * @ingroup G_wim_information * diff --git a/include/wimlib/dentry.h b/include/wimlib/dentry.h index a2735e5e..f5140281 100644 --- a/include/wimlib/dentry.h +++ b/include/wimlib/dentry.h @@ -432,15 +432,10 @@ for_dentry_in_rbtree(struct rb_node *node, int (*visitor)(struct wim_dentry *, void *), void *arg); -static inline int +extern int for_dentry_child(const struct wim_dentry *dentry, int (*visitor)(struct wim_dentry *, void *), - void *arg) -{ - return for_dentry_in_rbtree(dentry->d_inode->i_children.rb_node, - visitor, - arg); -} + void *arg); extern int for_dentry_in_tree_depth(struct wim_dentry *root, diff --git a/include/wimlib/encoding.h b/include/wimlib/encoding.h index ff3c2ae6..a621581c 100644 --- a/include/wimlib/encoding.h +++ b/include/wimlib/encoding.h @@ -30,6 +30,9 @@ DECLARE_CHAR_CONVERSION_FUNCTIONS(utf16le, tstr, utf16lechar, tchar); DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf16le, tchar, utf16lechar); #endif +DECLARE_CHAR_CONVERSION_FUNCTIONS(utf8, tstr, char, tchar); +DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf8, tchar, char); + extern int utf8_to_tstr_simple(const char *utf8str, tchar **out); diff --git a/include/wimlib/pathlist.h b/include/wimlib/pathlist.h new file mode 100644 index 00000000..71878bf7 --- /dev/null +++ b/include/wimlib/pathlist.h @@ -0,0 +1,11 @@ +#ifndef _WIMLIB_PATHLIST_H +#define _WIMLIB_PATHLIST_H + +#include "wimlib/types.h" + +extern int +read_path_list_file(const tchar *listfile, + tchar ***paths_ret, size_t *num_paths_ret, + void **mem_ret); + +#endif /* _WIMLIB_PATHLIST_H */ diff --git a/include/wimlib/wildcard.h b/include/wimlib/wildcard.h new file mode 100644 index 00000000..764f4786 --- /dev/null +++ b/include/wimlib/wildcard.h @@ -0,0 +1,31 @@ +#ifndef _WIMLIB_WILDCARD_H +#define _WIMLIB_WILDCARD_H + +#include + +#define WILDCARD_FLAG_USE_LITERAL_IF_NO_MATCHES 0x00000001 +#define WILDCARD_FLAG_WARN_IF_NO_MATCH 0x00000002 +#define WILDCARD_FLAG_ERROR_IF_NO_MATCH 0x00000004 +#define WILDCARD_FLAG_CASE_INSENSITIVE 0x00000008 + +extern int +expand_wildcard_wim_paths(WIMStruct *wim, + const char * const *wildcards, + size_t num_wildcards, + tchar ***expanded_paths_ret, + size_t *num_expanded_paths_ret, + u32 flags); + +#ifdef __WIN32__ +extern int +fnmatch(const tchar *pattern, const tchar *string, int flags); +# define FNM_CASEFOLD 0 +#else +# include +# ifndef FNM_CASEFOLD +# warning "FNM_CASEFOLD not defined!" +# define FNM_CASEFOLD 0 +# endif +#endif + +#endif /* _WIMLIB_WILDCARD_H */ diff --git a/include/wimlib/wim.h b/include/wimlib/wim.h index 2e780343..f045c885 100644 --- a/include/wimlib/wim.h +++ b/include/wimlib/wim.h @@ -1,6 +1,7 @@ #ifndef _WIMLIB_WIM_H #define _WIMLIB_WIM_H +#include "wimlib.h" #include "wimlib/header.h" #include "wimlib/types.h" #include "wimlib/file_io.h" @@ -43,7 +44,7 @@ struct WIMStruct { void *private; struct wimlib_decompressor *decompressor; - enum wimlib_compression_type decompressor_ctype; + u8 decompressor_ctype; u32 decompressor_max_block_size; struct list_head subwims; diff --git a/programs/imagex.c b/programs/imagex.c index 4800633e..ce5ac8d5 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -119,6 +119,7 @@ static FILE *imagex_info_file; enum { IMAGEX_ALLOW_OTHER_OPTION, IMAGEX_BOOT_OPTION, + IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION, IMAGEX_CHECK_OPTION, IMAGEX_CHUNK_SIZE_OPTION, IMAGEX_COMMAND_OPTION, @@ -142,6 +143,7 @@ enum { IMAGEX_NORPFIX_OPTION, IMAGEX_NOCHECK_OPTION, IMAGEX_NO_ACLS_OPTION, + IMAGEX_NO_WILDCARDS_OPTION, IMAGEX_NOT_PIPABLE_OPTION, IMAGEX_PACK_STREAMS_OPTION, IMAGEX_PATH_OPTION, @@ -157,6 +159,7 @@ enum { IMAGEX_STAGING_DIR_OPTION, IMAGEX_STREAMS_INTERFACE_OPTION, IMAGEX_STRICT_ACLS_OPTION, + IMAGEX_STRICT_WILDCARDS_OPTION, IMAGEX_SYMLINK_OPTION, IMAGEX_THREADS_OPTION, IMAGEX_TO_STDOUT_OPTION, @@ -252,6 +255,9 @@ static const struct option extract_options[] = { {T("dest-dir"), required_argument, NULL, IMAGEX_DEST_DIR_OPTION}, {T("to-stdout"), no_argument, NULL, IMAGEX_TO_STDOUT_OPTION}, {T("include-invalid-names"), no_argument, NULL, IMAGEX_INCLUDE_INVALID_NAMES_OPTION}, + {T("strict-wildcards"), no_argument, NULL, IMAGEX_STRICT_WILDCARDS_OPTION}, + {T("no-wildcards"), no_argument, NULL, IMAGEX_NO_WILDCARDS_OPTION}, + {T("case-insensitive-wildcards"), no_argument, NULL, IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION}, {NULL, 0, NULL, 0}, }; @@ -2697,8 +2703,10 @@ imagex_extract(int argc, tchar **argv, int cmd) int ret; const tchar *wimfile; const tchar *image_num_or_name; + const tchar *pathlist; tchar *dest_dir = T("."); int extract_flags = WIMLIB_EXTRACT_FLAG_SEQUENTIAL | WIMLIB_EXTRACT_FLAG_NORPFIX; + int listfile_extract_flags = WIMLIB_EXTRACT_FLAG_GLOB_PATHS; STRING_SET(refglobs); @@ -2739,6 +2747,15 @@ imagex_extract(int argc, tchar **argv, int cmd) extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES; extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS; break; + case IMAGEX_NO_WILDCARDS_OPTION: + listfile_extract_flags &= ~WIMLIB_EXTRACT_FLAG_GLOB_PATHS; + break; + case IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION: + listfile_extract_flags |= WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB; + break; + case IMAGEX_STRICT_WILDCARDS_OPTION: + listfile_extract_flags |= WIMLIB_EXTRACT_FLAG_STRICT_GLOB; + break; default: goto out_usage; } @@ -2755,10 +2772,17 @@ imagex_extract(int argc, tchar **argv, int cmd) argc -= 2; argv += 2; - cmds = prepare_extract_commands(argv, argc, extract_flags, dest_dir, - &num_cmds); - if (!cmds) - goto out_err; + if (argc == 1 && argv[0][0] == T('@')) { + pathlist = argv[0] + 1; + cmds = NULL; + num_cmds = 0; + } else { + cmds = prepare_extract_commands(argv, argc, extract_flags, dest_dir, + &num_cmds); + if (cmds == NULL) + goto out_err; + pathlist = NULL; + } ret = wimlib_open_wim(wimfile, open_flags, &wim, imagex_progress_func); if (ret) @@ -2777,8 +2801,17 @@ imagex_extract(int argc, tchar **argv, int cmd) goto out_wimlib_free; } - ret = wimlib_extract_files(wim, image, cmds, num_cmds, 0, - imagex_progress_func); + ret = 0; + if (ret == 0 && cmds != NULL) { + ret = wimlib_extract_files(wim, image, cmds, num_cmds, 0, + imagex_progress_func); + } + if (ret == 0 && pathlist != NULL) { + ret = wimlib_extract_pathlist(wim, image, dest_dir, + pathlist, + extract_flags | listfile_extract_flags, + imagex_progress_func); + } if (ret == 0) { if (!imagex_be_quiet) imagex_printf(T("Done extracting files.\n")); @@ -2786,7 +2819,7 @@ imagex_extract(int argc, tchar **argv, int cmd) tfprintf(stderr, T("Note: You can use `%"TS"' to see what " "files and directories\n" " are in the WIM image.\n"), - get_cmd_string(CMD_INFO, false)); + get_cmd_string(CMD_DIR, false)); } else if (ret == WIMLIB_ERR_RESOURCE_NOT_FOUND) { struct wimlib_wim_info info; diff --git a/src/capture_common.c b/src/capture_common.c index 66f6411f..f5866b49 100644 --- a/src/capture_common.c +++ b/src/capture_common.c @@ -31,15 +31,10 @@ #include "wimlib/error.h" #include "wimlib/lookup_table.h" #include "wimlib/paths.h" +#include "wimlib/wildcard.h" -#ifdef __WIN32__ -# include "wimlib/win32.h" /* for fnmatch() equivalent */ -#else -# include -#endif #include - static int canonicalize_pattern(const tchar *pat, tchar **canonical_pat_ret) { diff --git a/src/dentry.c b/src/dentry.c index 8670d274..53a44cf1 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -435,6 +435,24 @@ for_dentry_tree_in_rbtree(struct rb_node *node, return 0; } +/* + * Iterate over all children of @dentry, calling the function @visitor, passing + * it a child dentry and the extra argument @arg. + * + * Note: this function iterates over ALL child dentries, even those with the + * same case-insensitive name. + * + * Note: this function clobbers the tmp_list field of the child dentries. */ +int +for_dentry_child(const struct wim_dentry *dentry, + int (*visitor)(struct wim_dentry *, void *), + void *arg) +{ + return for_dentry_in_rbtree(dentry->d_inode->i_children.rb_node, + visitor, + arg); +} + /* Calls a function on all directory entries in a WIM dentry tree. Logically, * this is a pre-order traversal (the function is called on a parent dentry * before its children), but sibling dentries will be visited in order as well. diff --git a/src/encoding.c b/src/encoding.c index d4f4f399..42f7e95f 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -298,7 +298,7 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar, WIMLIB_ERR_INVALID_UTF16_STRING, ERROR_WITH_ERRNO("Failed to convert UTF-16LE " "string \"%"TS"\" to UTF-8 string!", in), - static) + ) DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char, tstr, "UTF-16LE", tchar, @@ -308,7 +308,7 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char, WIMLIB_ERR_INVALID_UTF8_STRING, ERROR_WITH_ERRNO("Failed to convert UTF-8 string " "to UTF-16LE string!"), - static) + ) #else /* UNIX */ @@ -324,7 +324,7 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar, ERROR("If the data you provided was UTF-8, please make sure " "the character\n" " encoding of your current locale is UTF-8."), - static) + ) DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char, tstr, "", tchar, @@ -337,7 +337,7 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char, ERROR("This may be because the UTF-8 data " "could not be represented\n" " in your locale's character encoding."), - static) + ) #endif int diff --git a/src/extract.c b/src/extract.c index 467452dc..aba92865 100644 --- a/src/extract.c +++ b/src/extract.c @@ -49,6 +49,7 @@ #include "wimlib/error.h" #include "wimlib/lookup_table.h" #include "wimlib/metadata.h" +#include "wimlib/pathlist.h" #include "wimlib/paths.h" #include "wimlib/reparse.h" #include "wimlib/resource.h" @@ -57,6 +58,7 @@ # include "wimlib/win32.h" /* for realpath() equivalent */ #endif #include "wimlib/xml.h" +#include "wimlib/wildcard.h" #include "wimlib/wim.h" #include @@ -2545,6 +2547,9 @@ check_extract_command(struct wimlib_extract_command *cmd, int wim_header_flags) WIMLIB_EXTRACT_FLAG_HARDLINK)) return WIMLIB_ERR_INVALID_PARAM; + if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) + return WIMLIB_ERR_INVALID_PARAM; + if ((extract_flags & (WIMLIB_EXTRACT_FLAG_NO_ACLS | WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) == (WIMLIB_EXTRACT_FLAG_NO_ACLS | @@ -3032,3 +3037,122 @@ wimlib_extract_image(WIMStruct *wim, return do_wimlib_extract_image(wim, image, target, extract_flags, progress_func); } + +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_extract_pathlist(WIMStruct *wim, int image, + const tchar *target, + const tchar *path_list_file, + int extract_flags, + wimlib_progress_func_t progress_func) +{ + int ret; + tchar **paths; + size_t num_paths; + void *mem; + + ret = read_path_list_file(path_list_file, &paths, &num_paths, &mem); + if (ret) + return ret; + + ret = wimlib_extract_paths(wim, image, target, + (const tchar * const *)paths, num_paths, + extract_flags, progress_func); + FREE(paths); + FREE(mem); + return ret; +} + +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_extract_paths(WIMStruct *wim, + int image, + const tchar *target, + const tchar * const *paths, + size_t num_paths, + int extract_flags, + wimlib_progress_func_t progress_func) +{ + int ret; + tchar **expanded_paths; + size_t num_expanded_paths; + struct wimlib_extract_command *cmds; + + ret = select_wim_image(wim, image); + if (ret) + return ret; + + if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) { + int wildcard_flags = 0; + + if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_GLOB) + wildcard_flags |= WILDCARD_FLAG_ERROR_IF_NO_MATCH; + else + wildcard_flags |= WILDCARD_FLAG_WARN_IF_NO_MATCH; + + if (extract_flags & WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB) + wildcard_flags |= WILDCARD_FLAG_CASE_INSENSITIVE; + + ret = expand_wildcard_wim_paths(wim, paths, num_paths, + &expanded_paths, + &num_expanded_paths, + wildcard_flags); + if (ret) + return ret; + } else { + expanded_paths = (tchar**)paths; + num_expanded_paths = num_paths; + } + + cmds = CALLOC(num_expanded_paths, sizeof(cmds[0])); + if (cmds == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_expanded_paths; + } + + for (size_t i = 0; i < num_expanded_paths; i++) { + cmds[i].wim_source_path = expanded_paths[i]; + cmds[i].extract_flags = 0; + + tchar *dest_path; + size_t dest_len = 0; + dest_len += tstrlen(target); + dest_len += 1; + dest_len += tstrlen(expanded_paths[i]); + dest_len += 1; + + dest_path = MALLOC(dest_len * sizeof(tchar)); + if (dest_path == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_extraction_cmds; + } + tchar *p = dest_path; + p = tmempcpy(p, target, tstrlen(target)); + *p++ = OS_PREFERRED_PATH_SEPARATOR; + for (tchar *path_p = expanded_paths[i]; *path_p != '\0'; path_p++) { + if (is_any_path_separator(*path_p)) + *p++ = OS_PREFERRED_PATH_SEPARATOR; + else + *p++ = *path_p; + } + *p++ = T('\0'); + wimlib_assert(p - dest_path == dest_len); + cmds[i].fs_dest_path = dest_path; + } + + ret = wimlib_extract_files(wim, image, + cmds, num_expanded_paths, + extract_flags & ~WIMLIB_EXTRACT_FLAG_GLOB_PATHS, + progress_func); +out_free_extraction_cmds: + for (size_t i = 0; i < num_expanded_paths; i++) + FREE(cmds[i].fs_dest_path); + FREE(cmds); +out_free_expanded_paths: + if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) { + for (size_t i = 0; i < num_expanded_paths; i++) + FREE(expanded_paths[i]); + FREE(expanded_paths); + } + return ret; +} diff --git a/src/pathlist.c b/src/pathlist.c new file mode 100644 index 00000000..3bfec2ef --- /dev/null +++ b/src/pathlist.c @@ -0,0 +1,191 @@ +/* + * pathlist.c + * + * Utility function for reading path list files. + */ + +/* + * Copyright (C) 2013 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib/encoding.h" +#include "wimlib/error.h" +#include "wimlib/file_io.h" +#include "wimlib/pathlist.h" +#include "wimlib/util.h" + +#include +#include +#include +#include +#include +#include + +static int +read_file_contents(const tchar *path, char **buf_ret, size_t *bufsize_ret) +{ + int raw_fd; + struct filedes fd; + struct stat st; + void *buf; + int ret; + int errno_save; + + raw_fd = topen(path, O_RDONLY | O_BINARY); + if (raw_fd < 0) { + ERROR_WITH_ERRNO("Can't open \"%"TS"\"", path); + return WIMLIB_ERR_OPEN; + } + if (fstat(raw_fd, &st)) { + ERROR_WITH_ERRNO("Can't stat \"%"TS"\"", path); + close(raw_fd); + return WIMLIB_ERR_STAT; + } + if ((size_t)st.st_size != st.st_size || + (buf = MALLOC(st.st_size)) == NULL) + { + close(raw_fd); + ERROR("Not enough memory to read \"%"TS"\"", path); + return WIMLIB_ERR_NOMEM; + } + + filedes_init(&fd, raw_fd); + ret = full_read(&fd, buf, st.st_size); + errno_save = errno; + filedes_close(&fd); + errno = errno_save; + if (ret) { + ERROR_WITH_ERRNO("Error reading \"%"TS"\"", path); + FREE(buf); + return ret; + } + + *buf_ret = buf; + *bufsize_ret = st.st_size; + return 0; +} + +static int +read_utf8_file_contents(const tchar *path, tchar **buf_ret, size_t *buflen_ret) +{ + int ret; + char *buf_utf8; + size_t bufsize_utf8; + tchar *buf_tstr; + size_t bufsize_tstr; + + ret = read_file_contents(path, &buf_utf8, &bufsize_utf8); + if (ret) + return ret; + + ret = utf8_to_tstr(buf_utf8, bufsize_utf8, &buf_tstr, &bufsize_tstr); + FREE(buf_utf8); + if (ret) + return ret; + + *buf_ret = buf_tstr; + *buflen_ret = bufsize_tstr / sizeof(tchar); + return 0; +} + +static int +parse_path_list_file(tchar *buf, size_t buflen, + tchar ***paths_ret, size_t *num_paths_ret) +{ + tchar **paths = NULL; + size_t num_paths = 0; + size_t num_alloc_paths = 0; + tchar *nl; + tchar *p; + + for (p = buf; p != buf + buflen; p = nl + 1) { + tchar *line_begin, *line_end; + size_t line_len; + + nl = tmemchr(p, T('\n'), buf + buflen - p); + if (nl == NULL) + break; + + line_begin = p; + line_end = nl; + + /* Ignore leading whitespace. */ + while (line_begin < nl && istspace(*line_begin)) + line_begin++; + + /* Ignore trailing whitespace. */ + while (line_end > line_begin && istspace(*(line_end - 1))) + line_end--; + + line_len = line_end - line_begin; + + /* Ignore comments and empty lines. */ + if (line_len == 0 || *line_begin == T(';')) + continue; + + if (num_paths == num_alloc_paths) { + tchar **new_paths; + size_t new_num_alloc_paths = max(num_alloc_paths + 8, + num_alloc_paths * 3 / 2); + + new_paths = REALLOC(paths, new_num_alloc_paths * + sizeof(paths[0])); + if (new_paths == NULL) + goto oom; + paths = new_paths; + num_alloc_paths = new_num_alloc_paths; + } + + *line_end = T('\0'); + paths[num_paths++] = line_begin; + } + + *paths_ret = paths; + *num_paths_ret = num_paths; + return 0; + +oom: + FREE(paths); + return WIMLIB_ERR_NOMEM; +} + +int +read_path_list_file(const tchar *listfile, + tchar ***paths_ret, size_t *num_paths_ret, + void **mem_ret) +{ + int ret; + tchar *buf; + size_t buflen; + + ret = read_utf8_file_contents(listfile, &buf, &buflen); + if (ret) + return ret; + + ret = parse_path_list_file(buf, buflen, paths_ret, num_paths_ret); + if (ret) { + FREE(buf); + return ret; + } + *mem_ret = buf; + return 0; +} diff --git a/src/wildcard.c b/src/wildcard.c new file mode 100644 index 00000000..9c878e21 --- /dev/null +++ b/src/wildcard.c @@ -0,0 +1,341 @@ +/* + * wildcard.c + * + * Wildcard matching functions. + */ + +/* + * Copyright (C) 2013 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib/dentry.h" +#include "wimlib/encoding.h" +#include "wimlib/error.h" +#include "wimlib/metadata.h" +#include "wimlib/wildcard.h" + +struct match_dentry_ctx { + int (*consume_path)(const tchar *, void *, bool); + void *consume_path_ctx; + size_t consume_path_count; + tchar *expanded_path; + size_t expanded_path_len; + size_t expanded_path_alloc_len; + tchar *wildcard_path; + size_t cur_component_offset; + size_t cur_component_len; + bool case_insensitive; +}; + +static bool +match_wildcard(const tchar *string, tchar *wildcard, + size_t wildcard_len, bool case_insensitive) +{ + char orig; + int flags; + int ret; + + orig = wildcard[wildcard_len]; + wildcard[wildcard_len] = T('\0'); + + /* Warning: in Windows builds fnmatch() calls a replacement function. + * Also, FNM_CASEFOLD is a GNU extension and it is defined to 0 if not + * available. */ + flags = FNM_NOESCAPE; + if (case_insensitive) + flags |= FNM_CASEFOLD; + ret = fnmatch(wildcard, string, flags); + + wildcard[wildcard_len] = orig; + return (ret == 0); +} + +static int +expand_wildcard_recursive(struct wim_dentry *cur_dentry, + struct match_dentry_ctx *ctx); + +enum { + WILDCARD_STATUS_DONE_FULLY, + WILDCARD_STATUS_DONE_TRAILING_SLASHES, + WILDCARD_STATUS_NOT_DONE, +}; + +static int +wildcard_status(const tchar *wildcard) +{ + if (*wildcard == T('\0')) + return WILDCARD_STATUS_DONE_FULLY; + while (is_any_path_separator(*wildcard)) + wildcard++; + if (*wildcard == T('\0')) + return WILDCARD_STATUS_DONE_TRAILING_SLASHES; + + return WILDCARD_STATUS_NOT_DONE; +} + +static int +match_dentry(struct wim_dentry *cur_dentry, void *_ctx) +{ + struct match_dentry_ctx *ctx = _ctx; + tchar *name; + size_t name_len; + int ret; + + if (cur_dentry->file_name_nbytes == 0) + return 0; + +#if TCHAR_IS_UTF16LE + name = cur_dentry->file_name; + name_len = cur_dentry->file_name_nbytes; +#else + ret = utf16le_to_tstr(cur_dentry->file_name, + cur_dentry->file_name_nbytes, + &name, &name_len); + if (ret) + return ret; +#endif + name_len /= sizeof(tchar); + + if (match_wildcard(name, + &ctx->wildcard_path[ctx->cur_component_offset], + ctx->cur_component_len, + ctx->case_insensitive)) + { + size_t len_needed = ctx->expanded_path_len + 1 + name_len + 1; + size_t expanded_path_len_save; + + if (len_needed > ctx->expanded_path_alloc_len) { + tchar *expanded_path; + + expanded_path = REALLOC(ctx->expanded_path, + len_needed * sizeof(ctx->expanded_path[0])); + if (expanded_path == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_name; + } + ctx->expanded_path = expanded_path; + ctx->expanded_path_alloc_len = len_needed; + } + expanded_path_len_save = ctx->expanded_path_len; + + ctx->expanded_path[ctx->expanded_path_len++] = WIM_PATH_SEPARATOR; + tmemcpy(&ctx->expanded_path[ctx->expanded_path_len], + name, name_len); + ctx->expanded_path_len += name_len; + ctx->expanded_path[ctx->expanded_path_len] = T('\0'); + + switch (wildcard_status(&ctx->wildcard_path[ + ctx->cur_component_offset + + ctx->cur_component_len])) + { + case WILDCARD_STATUS_DONE_TRAILING_SLASHES: + if (!dentry_is_directory(cur_dentry)) { + ret = 0; + break; + } + /* Fall through */ + case WILDCARD_STATUS_DONE_FULLY: + ret = (*ctx->consume_path)(ctx->expanded_path, + ctx->consume_path_ctx, + false); + ctx->consume_path_count++; + break; + case WILDCARD_STATUS_NOT_DONE: + ret = expand_wildcard_recursive(cur_dentry, ctx); + break; + } + ctx->expanded_path_len = expanded_path_len_save; + ctx->expanded_path[expanded_path_len_save] = T('\0'); + } else { + ret = 0; + } + +out_free_name: +#if !TCHAR_IS_UTF16LE + FREE(name); +#endif + return ret; +} + +static int +expand_wildcard_recursive(struct wim_dentry *cur_dentry, + struct match_dentry_ctx *ctx) +{ + tchar *w; + size_t begin; + size_t end; + size_t len; + size_t offset_save; + size_t len_save; + int ret; + + w = ctx->wildcard_path; + + begin = ctx->cur_component_offset + ctx->cur_component_len; + while (is_any_path_separator(w[begin])) + begin++; + + end = begin; + + while (w[end] != T('\0') && !is_any_path_separator(w[end])) + end++; + + len = end - begin; + + if (len == 0) + return 0; + + offset_save = ctx->cur_component_offset; + len_save = ctx->cur_component_len; + + ctx->cur_component_offset = begin; + ctx->cur_component_len = len; + + ret = for_dentry_child(cur_dentry, match_dentry, ctx); + + ctx->cur_component_len = len_save; + ctx->cur_component_offset = offset_save; + + return ret; +} + +static int +expand_wildcard(WIMStruct *wim, + const tchar *wildcard_path, + int (*consume_path)(const tchar *, void *, bool), + void *consume_path_ctx, + u32 flags) +{ + struct wim_dentry *root; + int ret; + + root = wim_root_dentry(wim); + if (root == NULL) + goto no_match; + + struct match_dentry_ctx ctx = { + .consume_path = consume_path, + .consume_path_ctx = consume_path_ctx, + .consume_path_count = 0, + .expanded_path = MALLOC(256 * sizeof(ctx.expanded_path[0])), + .expanded_path_len = 0, + .expanded_path_alloc_len = 256, + .wildcard_path = TSTRDUP(wildcard_path), + .cur_component_offset = 0, + .cur_component_len = 0, + .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0), + }; + + if (ctx.expanded_path == NULL || ctx.wildcard_path == NULL) { + FREE(ctx.expanded_path); + FREE(ctx.wildcard_path); + return WIMLIB_ERR_NOMEM; + } + + ret = expand_wildcard_recursive(root, &ctx); + FREE(ctx.expanded_path); + FREE(ctx.wildcard_path); + if (ret == 0 && ctx.consume_path_count == 0) + goto no_match; + return ret; + +no_match: + ret = 0; + if (flags & WILDCARD_FLAG_USE_LITERAL_IF_NO_MATCHES) + ret = (*consume_path)(wildcard_path, consume_path_ctx, true); + + if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH) + WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path); + + if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) { + ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path); + ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST; + } + return ret; +} + +struct expanded_paths_ctx { + tchar **expanded_paths; + size_t num_expanded_paths; + size_t alloc_length; +}; + +static int +append_path_cb(const tchar *path, void *_ctx, bool may_need_trans) +{ + struct expanded_paths_ctx *ctx = _ctx; + tchar *path_dup; + + if (ctx->num_expanded_paths == ctx->alloc_length) { + tchar **new_paths; + size_t new_alloc_length = max(ctx->alloc_length + 8, + ctx->alloc_length * 3 / 2); + + new_paths = REALLOC(ctx->expanded_paths, + new_alloc_length * sizeof(new_paths[0])); + if (new_paths == NULL) + return WIMLIB_ERR_NOMEM; + ctx->expanded_paths = new_paths; + ctx->alloc_length = new_alloc_length; + } + path_dup = TSTRDUP(path); + if (path_dup == NULL) + return WIMLIB_ERR_NOMEM; + if (may_need_trans) { + for (tchar *p = path_dup; *p; p++) + if (is_any_path_separator(*p)) + *p = WIM_PATH_SEPARATOR; + } + ctx->expanded_paths[ctx->num_expanded_paths++] = path_dup; + return 0; +} + +int +expand_wildcard_wim_paths(WIMStruct *wim, + const char * const *wildcards, + size_t num_wildcards, + tchar ***expanded_paths_ret, + size_t *num_expanded_paths_ret, + u32 flags) +{ + int ret; + struct expanded_paths_ctx ctx = { + .expanded_paths = NULL, + .num_expanded_paths = 0, + .alloc_length = 0, + }; + for (size_t i = 0; i < num_wildcards; i++) { + ret = expand_wildcard(wim, wildcards[i], append_path_cb, &ctx, + flags); + if (ret) + goto out_free; + } + *expanded_paths_ret = ctx.expanded_paths; + *num_expanded_paths_ret = ctx.num_expanded_paths; + return 0; + +out_free: + for (size_t i = 0; i < ctx.num_expanded_paths; i++) + FREE(ctx.expanded_paths[i]); + FREE(ctx.expanded_paths); + return ret; +} -- 2.43.0