Add support for extract list files
authorEric Biggers <ebiggers3@gmail.com>
Sat, 28 Dec 2013 00:29:31 +0000 (18:29 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Sat, 28 Dec 2013 00:29:31 +0000 (18:29 -0600)
15 files changed:
Makefile.am
doc/imagex-extract.1.in
include/wimlib.h
include/wimlib/dentry.h
include/wimlib/encoding.h
include/wimlib/pathlist.h [new file with mode: 0644]
include/wimlib/wildcard.h [new file with mode: 0644]
include/wimlib/wim.h
programs/imagex.c
src/capture_common.c
src/dentry.c
src/encoding.c
src/extract.c
src/pathlist.c [new file with mode: 0644]
src/wildcard.c [new file with mode: 0644]

index 2602100..b41b826 100644 (file)
@@ -52,6 +52,7 @@ libwim_la_SOURCES =           \
        src/lzx-decompress.c    \
        src/metadata_resource.c \
        src/mount_image.c       \
+       src/pathlist.c          \
        src/paths.c             \
        src/resource.c          \
        src/rbtree.c            \
@@ -63,6 +64,7 @@ libwim_la_SOURCES =           \
        src/update_image.c      \
        src/util.c              \
        src/verify.c            \
+       src/wildcard.c          \
        src/wim.c               \
        src/write.c             \
        src/xml.c               \
@@ -91,6 +93,7 @@ libwim_la_SOURCES =           \
        include/wimlib/lzms.h           \
        include/wimlib/lzx.h            \
        include/wimlib/metadata.h       \
+       include/wimlib/pathlist.h       \
        include/wimlib/paths.h          \
        include/wimlib/rbtree.h         \
        include/wimlib/reparse.h        \
@@ -102,6 +105,7 @@ libwim_la_SOURCES =         \
        include/wimlib/types.h          \
        include/wimlib/util.h           \
        include/wimlib/version.h        \
+       include/wimlib/wildcard.h       \
        include/wimlib/wim.h            \
        include/wimlib/write.h          \
        include/wimlib/xml.h            \
index 0a1fca4..6d21cee 100644 (file)
@@ -3,6 +3,8 @@
 @IMAGEX_PROGNAME@-extract \- Extract files or directories from a WIM image
 .SH SYNOPSIS
 \fB@IMAGEX_PROGNAME@ extract\fR \fIWIMFILE\fR \fIIMAGE\fR [\fIPATH\fR...]  [\fIOPTION\fR...]
+.br
+\fB@IMAGEX_PROGNAME@ extract\fR \fIWIMFILE\fR \fIIMAGE\fR @\fILISTFILE\fR  [\fIOPTION\fR...]
 .SH DESCRIPTION
 \fB@IMAGEX_PROGNAME@ extract\fR extracts one or more files or directory trees
 from the specified \fIIMAGE\fR contained in the Windows Imaging (WIM) file
@@ -20,7 +22,9 @@ the name of an image in the WIM.  Use the \fB@IMAGEX_PROGNAME@ info\fR (1)
 command to show what images a WIM file contains.
 .PP
 Each \fIPATH\fR specifies a file or directory tree within the WIM image to
-extract.  See \fBPATH_SPECIFICATIONS\fR.
+extract.  Alternatively, a single \fILISTFILE\fR beginning with the '@'
+character is taken as a file that itself contains a list of files or directory
+trees to extract.  See \fBPATH_SPECIFICATIONS\fR.
 .PP
 By default, files and directories are extracted to the current directory.  Use
 \fB--dest-dir\fR to choose an alternate target directory.  Alternatively, use
@@ -30,15 +34,31 @@ program.
 \fB@IMAGEX_PROGNAME@ extract\fR supports extracting files and directory trees
 from stand-alone WIMs as well as split WIMs.  See \fBSPLIT WIMS\fR.
 .SH PATH SPECIFICATIONS
-Each \fIPATH\fR specifies a file or directory tree within the WIM image to
-extract.  Each path must be specified as an absolute path starting from the root
-of the WIM image, like those output by the \fB@IMAGEX_PROGNAME@ dir\fR (1)
-command.  However, path separators may be either forward or backward slashes,
-and the leading slash is optional; also, on Windows, the paths are treated
+Except when a single path is specified and prefixd by the '@' character, each
+\fIPATH\fR specifies a file or directory tree within the WIM image to extract.
+Each such path must be specified as an absolute path starting from the root of
+the WIM image, like those output by the \fB@IMAGEX_PROGNAME@ dir\fR (1) command.
+However, path separators may be either forward or backward slashes, and the
+leading slash is optional; also, on Windows, the paths are treated
 case-insensitively, while on UNIX, paths are treated case-sensitively.
 .PP
 If no \fIPATH\fRs are provided, the default behavior is to extract the full
 image, as if the path "/" had been provided.
+.PP
+If a single \fIPATH\fR is provided and is prefixed with the '@' character, it is
+interpreted as the path to a \fILISTFILE\fR which must be a UTF-8 text file that
+contains a list of paths (files or directories) to extract, one per line.  In
+each line, leading and trailing whitespace is ignored, and lines beginning with
+the ';' character and otherwise empty lines are ignored.  Each path must be
+unquoted and must specify a full path in the WIM image, as described above.
+However, unless \fB--no-wildcards\fR is specified, each path in the list file
+may also contain the wildcard characters '?' and '*', and therefore may expand
+to multiple actual files or directories.  By default, paths or wildcards that
+match no wildcards only produce a warning; use \fB--strict-wildcards\fR if you
+want an error instead.  Also, when using a list file, files and directories not
+located at the root of the WIM image will be extracted to a corresponding
+subdirectory of the destination directory rather than directly to the
+destination directory itself.
 .SH SPLIT WIMS
 You may use \fB@IMAGEX_PROGNAME@ extract\fR to extract files or directory trees
 from a split WIM.  This uses the \fB--refs\fR="\fIGLOB\fR" option in the same
@@ -52,7 +72,7 @@ present.
 .TP
 \fB--ref\fR="\fIGLOB\fR"
 File glob of additional WIMs or split WIM parts to reference resources from.
-See \fBSPLIT_WIMS\fR.  Note: since \fIGLOB\fR is listed in quotes because it is
+See \fBSPLIT_WIMS\fR.  Note: \fIGLOB\fR is listed in quotes because it is
 interpreted by \fB@IMAGEX_PROGNAME@\fR and may need to be quoted to protect
 against shell expansion.
 .TP
@@ -77,6 +97,17 @@ extracted.
 \fB--dest-dir\fR=\fIDIR\fR
 Extract the files and directories to the directory \fIDIR\fR instead of to the
 current working directory.
+.TP
+\fB--no-wildcards\fR
+Do not interpret wildcard characters in paths in the \fILISTFILE\fR.
+.TP
+\fB--strict-wildcards\fR
+Fail if any wildcards or paths in \fILISTFILE\fR do not match any files in the
+WIM image.  The default behavior is to warn only.
+.TP
+\fB--case-insensitive-wildcards\fR
+Treat the wildcards or paths in \fILISTFILE\fR as case-insensitive.  On Windows
+this is already the default behavior, but on UNIX-like systems it is not.
 .SH NOTES
 See the documentation \fB@IMAGEX_PROGNAME@ apply\fR (1) for documentation about
 what data and metadata are extracted on UNIX-like systems versus on Windows.
@@ -143,6 +174,23 @@ Extract multiple files and directories in one command:
 .RE
 .RE
 .PP
+Extract files using a list file:
+.RS
+.PP
+@IMAGEX_PROGNAME@ extract install.wim 1 @files.txt
+.RE
+.PP
+ ...  where files.txt could be something like:
+.PP
+.RS
+.RS
+.nf
+Windows\\System32\\*.*
+Windows\\System32\\??-??\\*.*
+Windows\\System32\\en-US\\*.*
+.RE
+.RE
+.fi
 .SH SEE ALSO
 .BR @IMAGEX_PROGNAME@ (1)
 .BR @IMAGEX_PROGNAME@-apply (1)
index 93f7f35..512c614 100644 (file)
  * Another function, wimlib_extract_files(), is also provided.  It can extract
  * certain files or directories from a WIM image, instead of a full image.
  *
- * A third function, wimlib_extract_image_from_pipe(), allows an image to be
- * extracted from a pipable WIM sent over a pipe; see @ref subsec_pipable_wims.
+ * wimlib_extract_paths() and wimlib_extract_pathlist() allow extracting a set
+ * of paths from a WIM image in a manner that may be easier to use than
+ * wimlib_extract_files(), and also can wildcard characters.
+ *
+ * wimlib_extract_image_from_pipe() allows an image to be extracted from a
+ * pipable WIM sent over a pipe; see @ref subsec_pipable_wims.
  *
  * Note that some details of how image extraction/application works are
  * documented more fully in the manual pages for <b>wimlib-imagex apply</b> and
@@ -1382,6 +1386,22 @@ typedef int (*wimlib_iterate_lookup_table_callback_t)(const struct wimlib_resour
  * performance.  */
 #define WIMLIB_EXTRACT_FLAG_FILE_ORDER                 0x00020000
 
+/** For wimlib_extract_paths() and wimlib_extract_pathlist() only:  Treat the
+ * paths in the WIM as case-insensitive globs which may contain the characters
+ * '?' and '*'.  The '?' character matches any character, whereas the '*'
+ * character matches zero or more characters in the same path component.  */
+#define WIMLIB_EXTRACT_FLAG_GLOB_PATHS                 0x00040000
+
+/** In combination with ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS, causes an error
+ * (::WIMLIB_ERR_PATH_DOES_NOT_EXIST) rather than a warning to be issued when
+ * one of the provided globs did not match a file.  */
+#define WIMLIB_EXTRACT_FLAG_STRICT_GLOB                        0x00080000
+
+/** In combination with ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS, causes the globbing to
+ * be performed case insensitively.  On Windows this is already the default
+ * behavior but on UNIX-like systems it is not.  */
+#define WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB      0x00100000
+
 /** @} */
 /** @ingroup G_mounting_wim_images
  * @{ */
@@ -2329,6 +2349,40 @@ wimlib_extract_image_from_pipe(int pipe_fd,
                               wimlib_progress_func_t progress_func);
 
 /**
+ * Similar to wimlib_extract_paths(), but the paths to extract from the WIM
+ * image specified in the UTF-8 text file @p path_list_file which itself
+ * contains the list of paths to use, one per line.  Leading and trailing
+ * whitespace, and otherwise empty lines and lines beginning with the ';'
+ * character are ignored.  No quotes are needed as paths are otherwise delimited
+ * by the newline character.
+ */
+extern int
+wimlib_extract_pathlist(WIMStruct *wim, int image,
+                       const wimlib_tchar *target,
+                       const wimlib_tchar *path_list_file,
+                       int extract_flags,
+                       wimlib_progress_func_t progress_func);
+
+/**
+ * Similar to wimlib_extract_files(), but the files or directories to extract
+ * from the WIM image are specified as an array of paths.  Each path will be
+ * extracted to a corresponding location in @p target based on its location in
+ * the WIM image.
+ *
+ * With ::WIMLIB_EXTRACT_FLAG_GLOB_PATHS specified in @p extract_flags, this
+ * function additionally allows paths to be globs using the wildcard characters
+ * '*' and '?'.
+ */
+extern int
+wimlib_extract_paths(WIMStruct *wim,
+                    int image,
+                    const wimlib_tchar *target,
+                    const wimlib_tchar * const *paths,
+                    size_t num_paths,
+                    int extract_flags,
+                    wimlib_progress_func_t progress_func);
+
+/**
  * @ingroup G_wim_information
  *
  * Extracts the XML data of a WIM file to a file stream.  Every WIM file
index a2735e5..f514028 100644 (file)
@@ -432,15 +432,10 @@ for_dentry_in_rbtree(struct rb_node *node,
                     int (*visitor)(struct wim_dentry *, void *),
                     void *arg);
 
-static inline int
+extern int
 for_dentry_child(const struct wim_dentry *dentry,
                 int (*visitor)(struct wim_dentry *, void *),
-                void *arg)
-{
-       return for_dentry_in_rbtree(dentry->d_inode->i_children.rb_node,
-                                   visitor,
-                                   arg);
-}
+                void *arg);
 
 extern int
 for_dentry_in_tree_depth(struct wim_dentry *root,
index ff3c2ae..a621581 100644 (file)
@@ -30,6 +30,9 @@ DECLARE_CHAR_CONVERSION_FUNCTIONS(utf16le, tstr, utf16lechar, tchar);
 DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf16le, tchar, utf16lechar);
 #endif
 
+DECLARE_CHAR_CONVERSION_FUNCTIONS(utf8, tstr, char, tchar);
+DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf8, tchar, char);
+
 extern int
 utf8_to_tstr_simple(const char *utf8str, tchar **out);
 
diff --git a/include/wimlib/pathlist.h b/include/wimlib/pathlist.h
new file mode 100644 (file)
index 0000000..71878bf
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef _WIMLIB_PATHLIST_H
+#define _WIMLIB_PATHLIST_H
+
+#include "wimlib/types.h"
+
+extern int
+read_path_list_file(const tchar *listfile,
+                   tchar ***paths_ret, size_t *num_paths_ret,
+                   void **mem_ret);
+
+#endif /* _WIMLIB_PATHLIST_H */
diff --git a/include/wimlib/wildcard.h b/include/wimlib/wildcard.h
new file mode 100644 (file)
index 0000000..764f478
--- /dev/null
@@ -0,0 +1,31 @@
+#ifndef _WIMLIB_WILDCARD_H
+#define _WIMLIB_WILDCARD_H
+
+#include <wimlib/types.h>
+
+#define WILDCARD_FLAG_USE_LITERAL_IF_NO_MATCHES        0x00000001
+#define WILDCARD_FLAG_WARN_IF_NO_MATCH         0x00000002
+#define WILDCARD_FLAG_ERROR_IF_NO_MATCH                0x00000004
+#define WILDCARD_FLAG_CASE_INSENSITIVE         0x00000008
+
+extern int
+expand_wildcard_wim_paths(WIMStruct *wim,
+                         const char * const *wildcards,
+                         size_t num_wildcards,
+                         tchar ***expanded_paths_ret,
+                         size_t *num_expanded_paths_ret,
+                         u32 flags);
+
+#ifdef __WIN32__
+extern int
+fnmatch(const tchar *pattern, const tchar *string, int flags);
+#  define FNM_CASEFOLD 0
+#else
+#  include <fnmatch.h>
+#  ifndef FNM_CASEFOLD
+#    warning "FNM_CASEFOLD not defined!"
+#    define FNM_CASEFOLD 0
+#  endif
+#endif
+
+#endif /* _WIMLIB_WILDCARD_H  */
index 2e78034..f045c88 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef _WIMLIB_WIM_H
 #define _WIMLIB_WIM_H
 
+#include "wimlib.h"
 #include "wimlib/header.h"
 #include "wimlib/types.h"
 #include "wimlib/file_io.h"
@@ -43,7 +44,7 @@ struct WIMStruct {
        void *private;
 
        struct wimlib_decompressor *decompressor;
-       enum wimlib_compression_type decompressor_ctype;
+       u8 decompressor_ctype;
        u32 decompressor_max_block_size;
 
        struct list_head subwims;
index 4800633..ce5ac8d 100644 (file)
@@ -119,6 +119,7 @@ static FILE *imagex_info_file;
 enum {
        IMAGEX_ALLOW_OTHER_OPTION,
        IMAGEX_BOOT_OPTION,
+       IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION,
        IMAGEX_CHECK_OPTION,
        IMAGEX_CHUNK_SIZE_OPTION,
        IMAGEX_COMMAND_OPTION,
@@ -142,6 +143,7 @@ enum {
        IMAGEX_NORPFIX_OPTION,
        IMAGEX_NOCHECK_OPTION,
        IMAGEX_NO_ACLS_OPTION,
+       IMAGEX_NO_WILDCARDS_OPTION,
        IMAGEX_NOT_PIPABLE_OPTION,
        IMAGEX_PACK_STREAMS_OPTION,
        IMAGEX_PATH_OPTION,
@@ -157,6 +159,7 @@ enum {
        IMAGEX_STAGING_DIR_OPTION,
        IMAGEX_STREAMS_INTERFACE_OPTION,
        IMAGEX_STRICT_ACLS_OPTION,
+       IMAGEX_STRICT_WILDCARDS_OPTION,
        IMAGEX_SYMLINK_OPTION,
        IMAGEX_THREADS_OPTION,
        IMAGEX_TO_STDOUT_OPTION,
@@ -252,6 +255,9 @@ static const struct option extract_options[] = {
        {T("dest-dir"),    required_argument, NULL, IMAGEX_DEST_DIR_OPTION},
        {T("to-stdout"),   no_argument,       NULL, IMAGEX_TO_STDOUT_OPTION},
        {T("include-invalid-names"), no_argument, NULL, IMAGEX_INCLUDE_INVALID_NAMES_OPTION},
+       {T("strict-wildcards"), no_argument,  NULL, IMAGEX_STRICT_WILDCARDS_OPTION},
+       {T("no-wildcards"), no_argument,      NULL, IMAGEX_NO_WILDCARDS_OPTION},
+       {T("case-insensitive-wildcards"), no_argument, NULL, IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION},
        {NULL, 0, NULL, 0},
 };
 
@@ -2697,8 +2703,10 @@ imagex_extract(int argc, tchar **argv, int cmd)
        int ret;
        const tchar *wimfile;
        const tchar *image_num_or_name;
+       const tchar *pathlist;
        tchar *dest_dir = T(".");
        int extract_flags = WIMLIB_EXTRACT_FLAG_SEQUENTIAL | WIMLIB_EXTRACT_FLAG_NORPFIX;
+       int listfile_extract_flags = WIMLIB_EXTRACT_FLAG_GLOB_PATHS;
 
        STRING_SET(refglobs);
 
@@ -2739,6 +2747,15 @@ imagex_extract(int argc, tchar **argv, int cmd)
                        extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES;
                        extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS;
                        break;
+               case IMAGEX_NO_WILDCARDS_OPTION:
+                       listfile_extract_flags &= ~WIMLIB_EXTRACT_FLAG_GLOB_PATHS;
+                       break;
+               case IMAGEX_CASE_INSENSITIVE_WILDCARDS_OPTION:
+                       listfile_extract_flags |= WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB;
+                       break;
+               case IMAGEX_STRICT_WILDCARDS_OPTION:
+                       listfile_extract_flags |= WIMLIB_EXTRACT_FLAG_STRICT_GLOB;
+                       break;
                default:
                        goto out_usage;
                }
@@ -2755,10 +2772,17 @@ imagex_extract(int argc, tchar **argv, int cmd)
        argc -= 2;
        argv += 2;
 
-       cmds = prepare_extract_commands(argv, argc, extract_flags, dest_dir,
-                                       &num_cmds);
-       if (!cmds)
-               goto out_err;
+       if (argc == 1 && argv[0][0] == T('@')) {
+               pathlist = argv[0] + 1;
+               cmds = NULL;
+               num_cmds = 0;
+       } else {
+               cmds = prepare_extract_commands(argv, argc, extract_flags, dest_dir,
+                                               &num_cmds);
+               if (cmds == NULL)
+                       goto out_err;
+               pathlist = NULL;
+       }
 
        ret = wimlib_open_wim(wimfile, open_flags, &wim, imagex_progress_func);
        if (ret)
@@ -2777,8 +2801,17 @@ imagex_extract(int argc, tchar **argv, int cmd)
                        goto out_wimlib_free;
        }
 
-       ret = wimlib_extract_files(wim, image, cmds, num_cmds, 0,
-                                  imagex_progress_func);
+       ret = 0;
+       if (ret == 0 && cmds != NULL) {
+               ret = wimlib_extract_files(wim, image, cmds, num_cmds, 0,
+                                          imagex_progress_func);
+       }
+       if (ret == 0 && pathlist != NULL) {
+               ret = wimlib_extract_pathlist(wim, image, dest_dir,
+                                             pathlist,
+                                             extract_flags | listfile_extract_flags,
+                                             imagex_progress_func);
+       }
        if (ret == 0) {
                if (!imagex_be_quiet)
                        imagex_printf(T("Done extracting files.\n"));
@@ -2786,7 +2819,7 @@ imagex_extract(int argc, tchar **argv, int cmd)
                tfprintf(stderr, T("Note: You can use `%"TS"' to see what "
                                   "files and directories\n"
                                   "      are in the WIM image.\n"),
-                               get_cmd_string(CMD_INFO, false));
+                               get_cmd_string(CMD_DIR, false));
        } else if (ret == WIMLIB_ERR_RESOURCE_NOT_FOUND) {
                struct wimlib_wim_info info;
 
index 66f6411..f5866b4 100644 (file)
 #include "wimlib/error.h"
 #include "wimlib/lookup_table.h"
 #include "wimlib/paths.h"
+#include "wimlib/wildcard.h"
 
-#ifdef __WIN32__
-#  include "wimlib/win32.h" /* for fnmatch() equivalent */
-#else
-#  include <fnmatch.h>
-#endif
 #include <string.h>
 
-
 static int
 canonicalize_pattern(const tchar *pat, tchar **canonical_pat_ret)
 {
index 8670d27..53a44cf 100644 (file)
@@ -435,6 +435,24 @@ for_dentry_tree_in_rbtree(struct rb_node *node,
        return 0;
 }
 
+/*
+ * Iterate over all children of @dentry, calling the function @visitor, passing
+ * it a child dentry and the extra argument @arg.
+ *
+ * Note: this function iterates over ALL child dentries, even those with the
+ * same case-insensitive name.
+ *
+ * Note: this function clobbers the tmp_list field of the child dentries.  */
+int
+for_dentry_child(const struct wim_dentry *dentry,
+                int (*visitor)(struct wim_dentry *, void *),
+                void *arg)
+{
+       return for_dentry_in_rbtree(dentry->d_inode->i_children.rb_node,
+                                   visitor,
+                                   arg);
+}
+
 /* Calls a function on all directory entries in a WIM dentry tree.  Logically,
  * this is a pre-order traversal (the function is called on a parent dentry
  * before its children), but sibling dentries will be visited in order as well.
index d4f4f39..42f7e95 100644 (file)
@@ -298,7 +298,7 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar,
                                 WIMLIB_ERR_INVALID_UTF16_STRING,
                                 ERROR_WITH_ERRNO("Failed to convert UTF-16LE "
                                                  "string \"%"TS"\" to UTF-8 string!", in),
-                                static)
+                                )
 
 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
                                 tstr, "UTF-16LE", tchar,
@@ -308,7 +308,7 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
                                 WIMLIB_ERR_INVALID_UTF8_STRING,
                                 ERROR_WITH_ERRNO("Failed to convert UTF-8 string "
                                                  "to UTF-16LE string!"),
-                                static)
+                                )
 #else
 
 /* UNIX */
@@ -324,7 +324,7 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar,
                                 ERROR("If the data you provided was UTF-8, please make sure "
                                       "the character\n"
                                       "        encoding of your current locale is UTF-8."),
-                                static)
+                                )
 
 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
                                 tstr, "", tchar,
@@ -337,7 +337,7 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
                                 ERROR("This may be because the UTF-8 data "
                                       "could not be represented\n"
                                       "        in your locale's character encoding."),
-                                static)
+                                )
 #endif
 
 int
index 467452d..aba9286 100644 (file)
@@ -49,6 +49,7 @@
 #include "wimlib/error.h"
 #include "wimlib/lookup_table.h"
 #include "wimlib/metadata.h"
+#include "wimlib/pathlist.h"
 #include "wimlib/paths.h"
 #include "wimlib/reparse.h"
 #include "wimlib/resource.h"
@@ -57,6 +58,7 @@
 #  include "wimlib/win32.h" /* for realpath() equivalent */
 #endif
 #include "wimlib/xml.h"
+#include "wimlib/wildcard.h"
 #include "wimlib/wim.h"
 
 #include <errno.h>
@@ -2545,6 +2547,9 @@ check_extract_command(struct wimlib_extract_command *cmd, int wim_header_flags)
                                                 WIMLIB_EXTRACT_FLAG_HARDLINK))
                return WIMLIB_ERR_INVALID_PARAM;
 
+       if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS)
+               return WIMLIB_ERR_INVALID_PARAM;
+
        if ((extract_flags &
             (WIMLIB_EXTRACT_FLAG_NO_ACLS |
              WIMLIB_EXTRACT_FLAG_STRICT_ACLS)) == (WIMLIB_EXTRACT_FLAG_NO_ACLS |
@@ -3032,3 +3037,122 @@ wimlib_extract_image(WIMStruct *wim,
        return do_wimlib_extract_image(wim, image, target, extract_flags,
                                       progress_func);
 }
+
+/* API function documented in wimlib.h  */
+WIMLIBAPI int
+wimlib_extract_pathlist(WIMStruct *wim, int image,
+                       const tchar *target,
+                       const tchar *path_list_file,
+                       int extract_flags,
+                       wimlib_progress_func_t progress_func)
+{
+       int ret;
+       tchar **paths;
+       size_t num_paths;
+       void *mem;
+
+       ret = read_path_list_file(path_list_file, &paths, &num_paths, &mem);
+       if (ret)
+               return ret;
+
+       ret = wimlib_extract_paths(wim, image, target,
+                                  (const tchar * const *)paths, num_paths,
+                                  extract_flags, progress_func);
+       FREE(paths);
+       FREE(mem);
+       return ret;
+}
+
+/* API function documented in wimlib.h  */
+WIMLIBAPI int
+wimlib_extract_paths(WIMStruct *wim,
+                    int image,
+                    const tchar *target,
+                    const tchar * const *paths,
+                    size_t num_paths,
+                    int extract_flags,
+                    wimlib_progress_func_t progress_func)
+{
+       int ret;
+       tchar **expanded_paths;
+       size_t num_expanded_paths;
+       struct wimlib_extract_command *cmds;
+
+       ret = select_wim_image(wim, image);
+       if (ret)
+               return ret;
+
+       if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) {
+               int wildcard_flags = 0;
+
+               if (extract_flags & WIMLIB_EXTRACT_FLAG_STRICT_GLOB)
+                       wildcard_flags |= WILDCARD_FLAG_ERROR_IF_NO_MATCH;
+               else
+                       wildcard_flags |= WILDCARD_FLAG_WARN_IF_NO_MATCH;
+
+               if (extract_flags & WIMLIB_EXTRACT_FLAG_CASE_INSENSITIVE_GLOB)
+                       wildcard_flags |= WILDCARD_FLAG_CASE_INSENSITIVE;
+
+               ret = expand_wildcard_wim_paths(wim, paths, num_paths,
+                                               &expanded_paths,
+                                               &num_expanded_paths,
+                                               wildcard_flags);
+               if (ret)
+                       return ret;
+       } else {
+               expanded_paths = (tchar**)paths;
+               num_expanded_paths = num_paths;
+       }
+
+       cmds = CALLOC(num_expanded_paths, sizeof(cmds[0]));
+       if (cmds == NULL) {
+               ret = WIMLIB_ERR_NOMEM;
+               goto out_free_expanded_paths;
+       }
+
+       for (size_t i = 0; i < num_expanded_paths; i++) {
+               cmds[i].wim_source_path = expanded_paths[i];
+               cmds[i].extract_flags = 0;
+
+               tchar *dest_path;
+               size_t dest_len = 0;
+               dest_len += tstrlen(target);
+               dest_len += 1;
+               dest_len += tstrlen(expanded_paths[i]);
+               dest_len += 1;
+
+               dest_path = MALLOC(dest_len * sizeof(tchar));
+               if (dest_path == NULL) {
+                       ret = WIMLIB_ERR_NOMEM;
+                       goto out_free_extraction_cmds;
+               }
+               tchar *p = dest_path;
+               p = tmempcpy(p, target, tstrlen(target));
+               *p++ = OS_PREFERRED_PATH_SEPARATOR;
+               for (tchar *path_p = expanded_paths[i]; *path_p != '\0'; path_p++) {
+                       if (is_any_path_separator(*path_p))
+                               *p++ = OS_PREFERRED_PATH_SEPARATOR;
+                       else
+                               *p++ = *path_p;
+               }
+               *p++ = T('\0');
+               wimlib_assert(p - dest_path == dest_len);
+               cmds[i].fs_dest_path = dest_path;
+       }
+
+       ret = wimlib_extract_files(wim, image,
+                                  cmds, num_expanded_paths,
+                                  extract_flags & ~WIMLIB_EXTRACT_FLAG_GLOB_PATHS,
+                                  progress_func);
+out_free_extraction_cmds:
+       for (size_t i = 0; i < num_expanded_paths; i++)
+               FREE(cmds[i].fs_dest_path);
+       FREE(cmds);
+out_free_expanded_paths:
+       if (extract_flags & WIMLIB_EXTRACT_FLAG_GLOB_PATHS) {
+               for (size_t i = 0; i < num_expanded_paths; i++)
+                       FREE(expanded_paths[i]);
+               FREE(expanded_paths);
+       }
+       return ret;
+}
diff --git a/src/pathlist.c b/src/pathlist.c
new file mode 100644 (file)
index 0000000..3bfec2e
--- /dev/null
@@ -0,0 +1,191 @@
+/*
+ * pathlist.c
+ *
+ * Utility function for reading path list files.
+ */
+
+/*
+ * Copyright (C) 2013 Eric Biggers
+ *
+ * This file is part of wimlib, a library for working with WIM files.
+ *
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/encoding.h"
+#include "wimlib/error.h"
+#include "wimlib/file_io.h"
+#include "wimlib/pathlist.h"
+#include "wimlib/util.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+static int
+read_file_contents(const tchar *path, char **buf_ret, size_t *bufsize_ret)
+{
+       int raw_fd;
+       struct filedes fd;
+       struct stat st;
+       void *buf;
+       int ret;
+       int errno_save;
+
+       raw_fd = topen(path, O_RDONLY | O_BINARY);
+       if (raw_fd < 0) {
+               ERROR_WITH_ERRNO("Can't open \"%"TS"\"", path);
+               return WIMLIB_ERR_OPEN;
+       }
+       if (fstat(raw_fd, &st)) {
+               ERROR_WITH_ERRNO("Can't stat \"%"TS"\"", path);
+               close(raw_fd);
+               return WIMLIB_ERR_STAT;
+       }
+       if ((size_t)st.st_size != st.st_size ||
+           (buf = MALLOC(st.st_size)) == NULL)
+       {
+               close(raw_fd);
+               ERROR("Not enough memory to read \"%"TS"\"", path);
+               return WIMLIB_ERR_NOMEM;
+       }
+
+       filedes_init(&fd, raw_fd);
+       ret = full_read(&fd, buf, st.st_size);
+       errno_save = errno;
+       filedes_close(&fd);
+       errno = errno_save;
+       if (ret) {
+               ERROR_WITH_ERRNO("Error reading \"%"TS"\"", path);
+               FREE(buf);
+               return ret;
+       }
+
+       *buf_ret = buf;
+       *bufsize_ret = st.st_size;
+       return 0;
+}
+
+static int
+read_utf8_file_contents(const tchar *path, tchar **buf_ret, size_t *buflen_ret)
+{
+       int ret;
+       char *buf_utf8;
+       size_t bufsize_utf8;
+       tchar *buf_tstr;
+       size_t bufsize_tstr;
+
+       ret = read_file_contents(path, &buf_utf8, &bufsize_utf8);
+       if (ret)
+               return ret;
+
+       ret = utf8_to_tstr(buf_utf8, bufsize_utf8, &buf_tstr, &bufsize_tstr);
+       FREE(buf_utf8);
+       if (ret)
+               return ret;
+
+       *buf_ret = buf_tstr;
+       *buflen_ret = bufsize_tstr / sizeof(tchar);
+       return 0;
+}
+
+static int
+parse_path_list_file(tchar *buf, size_t buflen,
+                    tchar ***paths_ret, size_t *num_paths_ret)
+{
+       tchar **paths = NULL;
+       size_t num_paths = 0;
+       size_t num_alloc_paths = 0;
+       tchar *nl;
+       tchar *p;
+
+       for (p = buf; p != buf + buflen; p = nl + 1) {
+               tchar *line_begin, *line_end;
+               size_t line_len;
+
+               nl = tmemchr(p, T('\n'), buf + buflen - p);
+               if (nl == NULL)
+                       break;
+
+               line_begin = p;
+               line_end = nl;
+
+               /* Ignore leading whitespace.  */
+               while (line_begin < nl && istspace(*line_begin))
+                       line_begin++;
+
+               /* Ignore trailing whitespace.  */
+               while (line_end > line_begin && istspace(*(line_end - 1)))
+                       line_end--;
+
+               line_len = line_end - line_begin;
+
+               /* Ignore comments and empty lines.  */
+               if (line_len == 0 || *line_begin == T(';'))
+                       continue;
+
+               if (num_paths == num_alloc_paths) {
+                       tchar **new_paths;
+                       size_t new_num_alloc_paths = max(num_alloc_paths + 8,
+                                                        num_alloc_paths * 3 / 2);
+
+                       new_paths = REALLOC(paths, new_num_alloc_paths *
+                                                  sizeof(paths[0]));
+                       if (new_paths == NULL)
+                               goto oom;
+                       paths = new_paths;
+                       num_alloc_paths = new_num_alloc_paths;
+               }
+
+               *line_end = T('\0');
+               paths[num_paths++] = line_begin;
+       }
+
+       *paths_ret = paths;
+       *num_paths_ret = num_paths;
+       return 0;
+
+oom:
+       FREE(paths);
+       return WIMLIB_ERR_NOMEM;
+}
+
+int
+read_path_list_file(const tchar *listfile,
+                   tchar ***paths_ret, size_t *num_paths_ret,
+                   void **mem_ret)
+{
+       int ret;
+       tchar *buf;
+       size_t buflen;
+
+       ret = read_utf8_file_contents(listfile, &buf, &buflen);
+       if (ret)
+               return ret;
+
+       ret = parse_path_list_file(buf, buflen, paths_ret, num_paths_ret);
+       if (ret) {
+               FREE(buf);
+               return ret;
+       }
+       *mem_ret = buf;
+       return 0;
+}
diff --git a/src/wildcard.c b/src/wildcard.c
new file mode 100644 (file)
index 0000000..9c878e2
--- /dev/null
@@ -0,0 +1,341 @@
+/*
+ * wildcard.c
+ *
+ * Wildcard matching functions.
+ */
+
+/*
+ * Copyright (C) 2013 Eric Biggers
+ *
+ * This file is part of wimlib, a library for working with WIM files.
+ *
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib/dentry.h"
+#include "wimlib/encoding.h"
+#include "wimlib/error.h"
+#include "wimlib/metadata.h"
+#include "wimlib/wildcard.h"
+
+struct match_dentry_ctx {
+       int (*consume_path)(const tchar *, void *, bool);
+       void *consume_path_ctx;
+       size_t consume_path_count;
+       tchar *expanded_path;
+       size_t expanded_path_len;
+       size_t expanded_path_alloc_len;
+       tchar *wildcard_path;
+       size_t cur_component_offset;
+       size_t cur_component_len;
+       bool case_insensitive;
+};
+
+static bool
+match_wildcard(const tchar *string, tchar *wildcard,
+              size_t wildcard_len, bool case_insensitive)
+{
+       char orig;
+       int flags;
+       int ret;
+
+       orig = wildcard[wildcard_len];
+       wildcard[wildcard_len] = T('\0');
+
+       /* Warning: in Windows builds fnmatch() calls a replacement function.
+        * Also, FNM_CASEFOLD is a GNU extension and it is defined to 0 if not
+        * available.  */
+       flags = FNM_NOESCAPE;
+       if (case_insensitive)
+               flags |= FNM_CASEFOLD;
+       ret = fnmatch(wildcard, string, flags);
+
+       wildcard[wildcard_len] = orig;
+       return (ret == 0);
+}
+
+static int
+expand_wildcard_recursive(struct wim_dentry *cur_dentry,
+                         struct match_dentry_ctx *ctx);
+
+enum {
+       WILDCARD_STATUS_DONE_FULLY,
+       WILDCARD_STATUS_DONE_TRAILING_SLASHES,
+       WILDCARD_STATUS_NOT_DONE,
+};
+
+static int
+wildcard_status(const tchar *wildcard)
+{
+       if (*wildcard == T('\0'))
+               return WILDCARD_STATUS_DONE_FULLY;
+       while (is_any_path_separator(*wildcard))
+               wildcard++;
+       if (*wildcard == T('\0'))
+               return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
+
+       return WILDCARD_STATUS_NOT_DONE;
+}
+
+static int
+match_dentry(struct wim_dentry *cur_dentry, void *_ctx)
+{
+       struct match_dentry_ctx *ctx = _ctx;
+       tchar *name;
+       size_t name_len;
+       int ret;
+
+       if (cur_dentry->file_name_nbytes == 0)
+               return 0;
+
+#if TCHAR_IS_UTF16LE
+       name = cur_dentry->file_name;
+       name_len = cur_dentry->file_name_nbytes;
+#else
+       ret = utf16le_to_tstr(cur_dentry->file_name,
+                             cur_dentry->file_name_nbytes,
+                             &name, &name_len);
+       if (ret)
+               return ret;
+#endif
+       name_len /= sizeof(tchar);
+
+       if (match_wildcard(name,
+                          &ctx->wildcard_path[ctx->cur_component_offset],
+                          ctx->cur_component_len,
+                          ctx->case_insensitive))
+       {
+               size_t len_needed = ctx->expanded_path_len + 1 + name_len + 1;
+               size_t expanded_path_len_save;
+
+               if (len_needed > ctx->expanded_path_alloc_len) {
+                       tchar *expanded_path;
+
+                       expanded_path = REALLOC(ctx->expanded_path,
+                                               len_needed * sizeof(ctx->expanded_path[0]));
+                       if (expanded_path == NULL) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_free_name;
+                       }
+                       ctx->expanded_path = expanded_path;
+                       ctx->expanded_path_alloc_len = len_needed;
+               }
+               expanded_path_len_save = ctx->expanded_path_len;
+
+               ctx->expanded_path[ctx->expanded_path_len++] = WIM_PATH_SEPARATOR;
+               tmemcpy(&ctx->expanded_path[ctx->expanded_path_len],
+                       name, name_len);
+               ctx->expanded_path_len += name_len;
+               ctx->expanded_path[ctx->expanded_path_len] = T('\0');
+
+               switch (wildcard_status(&ctx->wildcard_path[
+                               ctx->cur_component_offset +
+                               ctx->cur_component_len]))
+               {
+               case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
+                       if (!dentry_is_directory(cur_dentry)) {
+                               ret = 0;
+                               break;
+                       }
+                       /* Fall through  */
+               case WILDCARD_STATUS_DONE_FULLY:
+                       ret = (*ctx->consume_path)(ctx->expanded_path,
+                                                  ctx->consume_path_ctx,
+                                                  false);
+                       ctx->consume_path_count++;
+                       break;
+               case WILDCARD_STATUS_NOT_DONE:
+                       ret = expand_wildcard_recursive(cur_dentry, ctx);
+                       break;
+               }
+               ctx->expanded_path_len = expanded_path_len_save;
+               ctx->expanded_path[expanded_path_len_save] = T('\0');
+       } else {
+               ret = 0;
+       }
+
+out_free_name:
+#if !TCHAR_IS_UTF16LE
+       FREE(name);
+#endif
+       return ret;
+}
+
+static int
+expand_wildcard_recursive(struct wim_dentry *cur_dentry,
+                         struct match_dentry_ctx *ctx)
+{
+       tchar *w;
+       size_t begin;
+       size_t end;
+       size_t len;
+       size_t offset_save;
+       size_t len_save;
+       int ret;
+
+       w = ctx->wildcard_path;
+
+       begin = ctx->cur_component_offset + ctx->cur_component_len;
+       while (is_any_path_separator(w[begin]))
+               begin++;
+
+       end = begin;
+
+       while (w[end] != T('\0') && !is_any_path_separator(w[end]))
+               end++;
+
+       len = end - begin;
+
+       if (len == 0)
+               return 0;
+
+       offset_save = ctx->cur_component_offset;
+       len_save = ctx->cur_component_len;
+
+       ctx->cur_component_offset = begin;
+       ctx->cur_component_len = len;
+
+       ret = for_dentry_child(cur_dentry, match_dentry, ctx);
+
+       ctx->cur_component_len = len_save;
+       ctx->cur_component_offset = offset_save;
+
+       return ret;
+}
+
+static int
+expand_wildcard(WIMStruct *wim,
+               const tchar *wildcard_path,
+               int (*consume_path)(const tchar *, void *, bool),
+               void *consume_path_ctx,
+               u32 flags)
+{
+       struct wim_dentry *root;
+       int ret;
+
+       root = wim_root_dentry(wim);
+       if (root == NULL)
+               goto no_match;
+
+       struct match_dentry_ctx ctx = {
+               .consume_path = consume_path,
+               .consume_path_ctx = consume_path_ctx,
+               .consume_path_count = 0,
+               .expanded_path = MALLOC(256 * sizeof(ctx.expanded_path[0])),
+               .expanded_path_len = 0,
+               .expanded_path_alloc_len = 256,
+               .wildcard_path = TSTRDUP(wildcard_path),
+               .cur_component_offset = 0,
+               .cur_component_len = 0,
+               .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
+       };
+
+       if (ctx.expanded_path == NULL || ctx.wildcard_path == NULL) {
+               FREE(ctx.expanded_path);
+               FREE(ctx.wildcard_path);
+               return WIMLIB_ERR_NOMEM;
+       }
+
+       ret = expand_wildcard_recursive(root, &ctx);
+       FREE(ctx.expanded_path);
+       FREE(ctx.wildcard_path);
+       if (ret == 0 && ctx.consume_path_count == 0)
+               goto no_match;
+       return ret;
+
+no_match:
+       ret = 0;
+       if (flags & WILDCARD_FLAG_USE_LITERAL_IF_NO_MATCHES)
+               ret = (*consume_path)(wildcard_path, consume_path_ctx, true);
+
+       if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
+               WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
+
+       if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
+               ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
+               ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
+       }
+       return ret;
+}
+
+struct expanded_paths_ctx {
+       tchar **expanded_paths;
+       size_t num_expanded_paths;
+       size_t alloc_length;
+};
+
+static int
+append_path_cb(const tchar *path, void *_ctx, bool may_need_trans)
+{
+       struct expanded_paths_ctx *ctx = _ctx;
+       tchar *path_dup;
+
+       if (ctx->num_expanded_paths == ctx->alloc_length) {
+               tchar **new_paths;
+               size_t new_alloc_length = max(ctx->alloc_length + 8,
+                                             ctx->alloc_length * 3 / 2);
+
+               new_paths = REALLOC(ctx->expanded_paths,
+                                   new_alloc_length * sizeof(new_paths[0]));
+               if (new_paths == NULL)
+                       return WIMLIB_ERR_NOMEM;
+               ctx->expanded_paths = new_paths;
+               ctx->alloc_length = new_alloc_length;
+       }
+       path_dup = TSTRDUP(path);
+       if (path_dup == NULL)
+               return WIMLIB_ERR_NOMEM;
+       if (may_need_trans) {
+               for (tchar *p = path_dup; *p; p++)
+                       if (is_any_path_separator(*p))
+                               *p = WIM_PATH_SEPARATOR;
+       }
+       ctx->expanded_paths[ctx->num_expanded_paths++] = path_dup;
+       return 0;
+}
+
+int
+expand_wildcard_wim_paths(WIMStruct *wim,
+                         const char * const *wildcards,
+                         size_t num_wildcards,
+                         tchar ***expanded_paths_ret,
+                         size_t *num_expanded_paths_ret,
+                         u32 flags)
+{
+       int ret;
+       struct expanded_paths_ctx ctx = {
+               .expanded_paths = NULL,
+               .num_expanded_paths = 0,
+               .alloc_length = 0,
+       };
+       for (size_t i = 0; i < num_wildcards; i++) {
+               ret = expand_wildcard(wim, wildcards[i], append_path_cb, &ctx,
+                                     flags);
+               if (ret)
+                       goto out_free;
+       }
+       *expanded_paths_ret = ctx.expanded_paths;
+       *num_expanded_paths_ret = ctx.num_expanded_paths;
+       return 0;
+
+out_free:
+       for (size_t i = 0; i < ctx.num_expanded_paths; i++)
+               FREE(ctx.expanded_paths[i]);
+       FREE(ctx.expanded_paths);
+       return ret;
+}