Adjust path matching
authorEric Biggers <ebiggers3@gmail.com>
Tue, 29 Apr 2014 19:13:41 +0000 (14:13 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Tue, 29 Apr 2014 19:17:32 +0000 (14:17 -0500)
- To get consistent behavior, always use own implementation in
  wildcard.c, not fnmatch() or PatchMatchSpec()
- Don't store prefix in capture config
- Don't allow non-rooted paths with multiple directory components (these
  could cause confusion)
- win32_apply.c: Match [PrepopulateList] patterns recursively

12 files changed:
include/wimlib/capture.h
include/wimlib/paths.h
include/wimlib/wildcard.h
src/capture_common.c
src/ntfs-3g_capture.c
src/paths.c
src/unix_capture.c
src/update_image.c
src/wildcard.c
src/win32_apply.c
src/win32_capture.c
src/win32_replacements.c

index aea3e3c..8d79542 100644 (file)
@@ -15,8 +15,6 @@ struct wim_inode;
 struct capture_config {
        struct string_set exclusion_pats;
        struct string_set exclusion_exception_pats;
-       tchar *prefix;
-       size_t prefix_num_tchars;
        void *buf;
 };
 
@@ -47,14 +45,17 @@ struct add_image_params {
         * libntfs-3g capture.  */
        void *extra_arg;
 
-       u64 capture_root_ino;
-       u64 capture_root_dev;
 
        /* If non-NULL, the user-supplied progress function. */
        wimlib_progress_func_t progress_func;
 
        /* Progress data.  */
        union wimlib_progress_info progress;
+
+       /* Can be used by the capture implementation.  */
+       u64 capture_root_ino;
+       u64 capture_root_dev;
+       size_t capture_root_nchars;
 };
 
 
@@ -75,15 +76,12 @@ extern void
 destroy_capture_config(struct capture_config *config);
 
 extern bool
-match_pattern(const tchar *path,
-             const tchar *path_basename,
-             const struct string_set *list);
+match_pattern_list(const tchar *path, size_t path_nchars,
+                  const struct string_set *list);
 
 extern bool
 exclude_path(const tchar *path, size_t path_len,
-            const struct capture_config *config,
-            bool exclude_prefix);
-
+            const struct capture_config *config);
 
 typedef int (*capture_tree_t)(struct wim_dentry **, const tchar *,
                              struct add_image_params *);
index 59c1a86..bad8e74 100644 (file)
@@ -13,6 +13,9 @@ path_basename_with_len(const tchar *path, size_t len);
 extern const tchar *
 path_stream_name(const tchar *path);
 
+extern void
+do_canonicalize_path(const tchar *in, tchar *out);
+
 extern tchar *
 canonicalize_wim_path(const tchar *wim_path) _malloc_attribute;
 
index 5bae113..edd9d84 100644 (file)
@@ -16,19 +16,8 @@ expand_wildcard(WIMStruct *wim,
                void *consume_dentry_ctx,
                u32 flags);
 
-#ifdef __WIN32__
-extern int
-fnmatch(const tchar *pattern, const tchar *string, int flags);
-#  define FNM_CASEFOLD 0x1
-#  define FNM_PATHNAME 0x2
-#  define FNM_NOESCAPE 0x4
-#  define FNM_NOMATCH 1
-#else
-#  include <fnmatch.h>
-#  ifndef FNM_CASEFOLD
-#    warning "FNM_CASEFOLD not defined!"
-#    define FNM_CASEFOLD 0
-#  endif
-#endif
+extern bool
+match_path(const tchar *path, size_t path_nchars,
+          const tchar *wildcard, tchar path_sep, bool prefix_ok);
 
 #endif /* _WIMLIB_WILDCARD_H  */
index c74c94e..99d6af6 100644 (file)
@@ -81,10 +81,11 @@ mangle_pat(tchar *pat, const tchar *path, unsigned long line_no)
                         * relative to the current working directory on the c:
                         * drive.  We require paths with drive letters to be
                         * absolute. */
-                       ERROR("%"TS":%lu: Invalid path \"%"TS"\"; paths including "
-                             "drive letters must be absolute!\n"
-                             "        Maybe try \"%"TC":\\%"TS"\"?",
-                             path, line_no, pat, pat[0], &pat[2]);
+                       ERROR("%"TS":%lu: Invalid pattern \"%"TS"\":\n"
+                             "        Patterns including drive letters must be absolute!\n"
+                             "        Maybe try \"%"TC":%"TC"%"TS"\"?\n",
+                             path, line_no, pat,
+                             pat[0], OS_PREFERRED_PATH_SEPARATOR, &pat[2]);
                        return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
                }
 
@@ -96,11 +97,24 @@ mangle_pat(tchar *pat, const tchar *path, unsigned long line_no)
                tmemmove(pat, pat + 2, tstrlen(pat + 2) + 1);
        }
 
-       /* Translate all possible path separators into the operating system's
-        * preferred path separator.  */
-       for (tchar *p = pat; *p; p++)
-               if (is_any_path_separator(*p))
-                       *p = OS_PREFERRED_PATH_SEPARATOR;
+       /* Collapse and translate path separators.
+        *
+        * Note: we require that this works for filesystem paths and WIM paths,
+        * so the desired path separators must be the same.  */
+       BUILD_BUG_ON(OS_PREFERRED_PATH_SEPARATOR != WIM_PATH_SEPARATOR);
+       do_canonicalize_path(pat, pat);
+
+       /* Relative patterns can only match file names.  */
+       if (pat[0] != OS_PREFERRED_PATH_SEPARATOR &&
+           tstrchr(pat, OS_PREFERRED_PATH_SEPARATOR))
+       {
+               ERROR("%"TS":%lu: Invalid path \"%"TS"\":\n"
+                     "        Relative patterns can only include one path component!\n"
+                     "        Maybe try \"%"TC"%"TS"\"?",
+                     path, line_no, pat, OS_PREFERRED_PATH_SEPARATOR, pat);
+               return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
+       }
+
        return 0;
 }
 
@@ -109,7 +123,12 @@ do_read_capture_config_file(const tchar *config_file, const void *buf,
                            size_t bufsize, struct capture_config *config)
 {
        int ret;
+
+       /* [PrepopulateList] is used for apply, not capture.  But since we do
+        * understand it, recognize it (avoiding unrecognized section warning)
+        * and discard the strings.  */
        STRING_SET(prepopulate_pats);
+
        struct text_file_section sections[] = {
                {T("ExclusionList"),
                        &config->exclusion_pats},
@@ -141,68 +160,48 @@ destroy_capture_config(struct capture_config *config)
 }
 
 bool
-match_pattern(const tchar *path,
-             const tchar *path_basename,
-             const struct string_set *list)
+match_pattern_list(const tchar *path, size_t path_len,
+                  const struct string_set *list)
 {
-       for (size_t i = 0; i < list->num_strings; i++) {
-
-               const tchar *pat = list->strings[i];
-               const tchar *string;
-
-               if (*pat == OS_PREFERRED_PATH_SEPARATOR) {
-                       /* Absolute path from root of capture */
-                       string = path;
-               } else {
-                       if (tstrchr(pat, OS_PREFERRED_PATH_SEPARATOR))
-                               /* Relative path from root of capture */
-                               string = path + 1;
-                       else
-                               /* A file name pattern */
-                               string = path_basename;
-               }
-
-               /* Warning: on Windows native builds, fnmatch() calls the
-                * replacement function in win32.c. */
-               if (fnmatch(pat, string, FNM_PATHNAME | FNM_NOESCAPE
-                               #ifdef FNM_CASEFOLD
-                                       | FNM_CASEFOLD
-                               #endif
-                           ) == 0)
-               {
-                       DEBUG("\"%"TS"\" matches the pattern \"%"TS"\"",
-                             string, pat);
+       for (size_t i = 0; i < list->num_strings; i++)
+               if (match_path(path, path_len, list->strings[i],
+                              OS_PREFERRED_PATH_SEPARATOR, true))
                        return true;
-               }
-       }
        return false;
 }
 
-/* Return true if the image capture configuration file indicates we should
+/*
+ * Return true if the image capture configuration file indicates we should
  * exclude the filename @path from capture.
  *
- * If @exclude_prefix is %true, the part of the path up and including the name
- * of the directory being captured is not included in the path for matching
- * purposes.  This allows, for example, a pattern like /hiberfil.sys to match a
- * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
- * directory.
+ * The passed in @path must be given relative to the root of the capture, but
+ * with a leading path separator.  For example, if the file "in/file" is being
+ * tested and the library user ran wimlib_add_image(wim, "in", ...), then the
+ * directory "in" is the root of the capture and the path should be specified as
+ * "/file".
+ *
+ * Also, all path separators in @path must be OS_PREFERRED_PATH_SEPARATOR, and
+ * there cannot be trailing slashes.
+ *
+ * As a special case, the empty string will be interpreted as a single path
+ * separator.
  */
 bool
-exclude_path(const tchar *path, size_t path_len,
-            const struct capture_config *config, bool exclude_prefix)
+exclude_path(const tchar *path, size_t path_nchars,
+            const struct capture_config *config)
 {
+       tchar dummy[2];
+
        if (!config)
                return false;
-       const tchar *basename = path_basename_with_len(path, path_len);
-       if (exclude_prefix) {
-               wimlib_assert(path_len >= config->prefix_num_tchars);
-               if (!tmemcmp(config->prefix, path, config->prefix_num_tchars) &&
-                   path[config->prefix_num_tchars] == OS_PREFERRED_PATH_SEPARATOR)
-               {
-                       path += config->prefix_num_tchars;
-               }
+
+       if (!*path) {
+               dummy[0] = OS_PREFERRED_PATH_SEPARATOR;
+               dummy[1] = T('\0');
+               path = dummy;
        }
-       return match_pattern(path, basename, &config->exclusion_pats) &&
-               !match_pattern(path, basename, &config->exclusion_exception_pats);
+
+       return match_pattern_list(path, path_nchars, &config->exclusion_pats) &&
+             !match_pattern_list(path, path_nchars, &config->exclusion_exception_pats);
 
 }
index ee30147..335eb6d 100644 (file)
@@ -536,7 +536,7 @@ build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret,
        struct wim_dentry *root = NULL;
        struct wim_inode *inode = NULL;
 
-       if (exclude_path(path, path_len, params->config, false)) {
+       if (exclude_path(path, path_len, params->config)) {
                /* Exclude a file or directory tree based on the capture
                 * configuration file.  */
                ret = 0;
index 675b9f2..62347c9 100644 (file)
@@ -83,6 +83,36 @@ canonicalize_fs_path(const tchar *fs_path)
        return TSTRDUP(fs_path);
 }
 
+/* Collapse and translate path separators, and strip trailing slashes.  Doesn't
+ * add or delete a leading slash.
+ *
+ * @in may alias @out.
+ */
+void
+do_canonicalize_path(const tchar *in, tchar *out)
+{
+       tchar *orig_out = out;
+
+       while (*in) {
+               if (is_any_path_separator(*in)) {
+                       /* Collapse multiple path separators into one  */
+                       *out++ = WIM_PATH_SEPARATOR;
+                       do {
+                               in++;
+                       } while (is_any_path_separator(*in));
+               } else {
+                       /* Copy non-path-separator character  */
+                       *out++ = *in++;
+               }
+       }
+
+       /* Remove trailing slash if existent  */
+       if (out - orig_out > 1 && *(out - 1) == WIM_PATH_SEPARATOR)
+               --out;
+
+       *out = T('\0');
+}
+
 /*
  * canonicalize_wim_path() - Given a user-provided path to a file within a WIM
  * image, translate it into a "canonical" path.
@@ -126,24 +156,7 @@ canonicalize_wim_path(const tchar *wim_path)
        if (!is_any_path_separator(*in))
                *out++ = WIM_PATH_SEPARATOR;
 
-       while (*in) {
-               if (is_any_path_separator(*in)) {
-                       /* Collapse multiple path separators into one  */
-                       *out++ = WIM_PATH_SEPARATOR;
-                       do {
-                               in++;
-                       } while (is_any_path_separator(*in));
-               } else {
-                       /* Copy non-path-separator character  */
-                       *out++ = *in++;
-               }
-       }
-
-       /* Remove trailing slash if existent  */
-       if (*(out - 1) == WIM_PATH_SEPARATOR && (out - 1) != result)
-               --out;
-
-       *out = T('\0');
+       do_canonicalize_path(in, out);
 
        return result;
 }
index 27f493a..c0aa195 100644 (file)
@@ -209,7 +209,10 @@ unix_build_dentry_tree_recursive(struct wim_dentry **root_ret,
        struct wim_inode *inode = NULL;
        struct stat stbuf;
 
-       if (exclude_path(path, path_len, params->config, true)) {
+       if (exclude_path(path + params->capture_root_nchars,
+                        path_len - params->capture_root_nchars,
+                        params->config))
+       {
                ret = 0;
                goto out_progress;
        }
@@ -357,6 +360,8 @@ unix_build_dentry_tree(struct wim_dentry **root_ret,
                return WIMLIB_ERR_NOMEM;
        memcpy(path_buf, root_disk_path, path_len + 1);
 
+       params->capture_root_nchars = path_len;
+
        ret = unix_build_dentry_tree_recursive(root_ret, path_buf,
                                               path_len, params);
        FREE(path_buf);
index 37d6593..a2589f0 100644 (file)
@@ -849,9 +849,6 @@ execute_add_command(struct update_command_journal *j,
        if (progress_func)
                progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, &params.progress);
 
-       config.prefix = fs_source_path;
-       config.prefix_num_tchars = tstrlen(fs_source_path);
-
        if (WIMLIB_IS_WIM_ROOT_PATH(wim_target_path))
                params.add_flags |= WIMLIB_ADD_FLAG_ROOT;
        ret = (*capture_tree)(&branch, fs_source_path, &params);
index 7898690..742b564 100644 (file)
@@ -32,6 +32,7 @@
 #include "wimlib/encoding.h"
 #include "wimlib/error.h"
 #include "wimlib/metadata.h"
+#include "wimlib/paths.h"
 #include "wimlib/wildcard.h"
 
 struct match_dentry_ctx {
@@ -44,16 +45,6 @@ struct match_dentry_ctx {
        bool case_insensitive;
 };
 
-#define PLATFORM_SUPPORTS_FNMATCH
-
-#ifdef __WIN32__
-/* PathMatchSpec() could provide a fnmatch() alternative, but it isn't
- * documented properly, nor does it work properly.  For example, it returns that
- * any name matches *.* even if that name doesn't actually contain a period.  */
-#  undef PLATFORM_SUPPORTS_FNMATCH
-#endif
-
-#ifndef PLATFORM_SUPPORTS_FNMATCH
 static bool
 do_match_wildcard(const tchar *string, size_t string_len,
                  const tchar *wildcard, size_t wildcard_len,
@@ -88,30 +79,86 @@ do_match_wildcard(const tchar *string, size_t string_len,
                }
        }
 }
-#endif /* ! PLATFORM_SUPPORTS_FNMATCH  */
 
 static bool
-match_wildcard(const tchar *string, tchar *wildcard,
+match_wildcard(const tchar *string, const tchar *wildcard,
               size_t wildcard_len, bool ignore_case)
 {
-#ifdef PLATFORM_SUPPORTS_FNMATCH
-       char orig;
-       int ret;
-       int flags = FNM_NOESCAPE;
-       if (ignore_case)
-               flags |= FNM_CASEFOLD;
-
-       orig = wildcard[wildcard_len];
-       wildcard[wildcard_len] = T('\0');
-
-       ret = fnmatch(wildcard, string, flags);
-
-       wildcard[wildcard_len] = orig;
-       return (ret == 0);
-#else
        return do_match_wildcard(string, tstrlen(string),
                                 wildcard, wildcard_len, ignore_case);
-#endif
+}
+
+/*
+ * Determines whether a path matches a wildcard pattern.
+ *
+ * @path
+ *     The path to match.  Assumptions:  All path separators must be @path_sep,
+ *     there cannot be consecutive path separators, there cannot be a trailing
+ *     path separator, and there must be exactly one leading path separator.
+ *
+ * @path_nchars
+ *     Number of characters in @path.
+ *
+ * @wildcard
+ *     The wildcard pattern to match.  It can contain the wildcard characters
+ *     '*' and '?'.  The former matches zero or more characters except
+ *     @path_sep, and the latter matches any character except @path_sep.  All
+ *     path separators in the pattern must be @path_sep, and there cannot be
+ *     consecutive path separators, and there cannot be a trailing path
+ *     separator.  If there is a leading path separator, the match is attempted
+ *     with the filename only; otherwise, the matchis attempted with the whole
+ *     path.
+ *
+ * @path_sep
+ *     Path separator character in @path and @wildcard.
+ *
+ * @prefix_ok
+ *     If %true, allow a prefix of @path, terminated by a path separator, to
+ *     match, in addition to @path itself.  a.k.a. also return true if the
+ *     wildcard actually matches one of the ancestor directories of @path.
+ *
+ * Returns %true if there was a match; %false if there was not.
+ */
+bool
+match_path(const tchar *path, size_t path_nchars,
+          const tchar *wildcard, tchar path_sep, bool prefix_ok)
+{
+       if (*wildcard != path_sep) {
+               /* Pattern doesn't begin with path separator.  Try to match the
+                * file name only.  */
+               return match_wildcard(path_basename_with_len(path, path_nchars),
+                                     wildcard, tstrlen(wildcard),
+                                     default_ignore_case);
+       } else {
+               /* Pattern begins with path separator.  Try to match the whole
+                * path.  */
+               do {
+                       if (!*wildcard) {
+                               /* Path has more components than pattern  */
+                               return prefix_ok;
+                       }
+
+                       size_t path_component_len = 0;
+                       size_t wildcard_component_len = 0;
+
+                       do {
+                               path_component_len++;
+                       } while (path[path_component_len] != path_sep &&
+                                path[path_component_len] != T('\0'));
+                       do {
+                               wildcard_component_len++;
+                       } while (wildcard[wildcard_component_len] != path_sep &&
+                                wildcard[wildcard_component_len] != T('\0'));
+                       if (!do_match_wildcard(path, path_component_len,
+                                              wildcard, wildcard_component_len,
+                                              default_ignore_case))
+                               return false;
+                       path += path_component_len;
+                       wildcard += wildcard_component_len;
+               } while (*path);
+
+               return (*wildcard == '\0');
+       }
 }
 
 static int
index 48ff5d8..c78683b 100644 (file)
 #include "wimlib/win32_common.h"
 
 #include "wimlib/apply.h"
-#include "wimlib/capture.h"
+#include "wimlib/capture.h" /* for mangle_pat()  */
 #include "wimlib/dentry.h"
 #include "wimlib/error.h"
 #include "wimlib/lookup_table.h"
-#include "wimlib/paths.h"
 #include "wimlib/resource.h"
 #include "wimlib/textfile.h"
 #include "wimlib/xml.h"
+#include "wimlib/wildcard.h"
 #include "wimlib/wim.h"
 #include "wimlib/wimboot.h"
 
@@ -114,6 +114,7 @@ load_prepopulate_pats(struct apply_ctx *ctx)
                                LOAD_TEXT_FILE_REMOVE_QUOTES |
                                        LOAD_TEXT_FILE_NO_WARNINGS,
                                mangle_pat);
+       BUILD_BUG_ON(OS_PREFERRED_PATH_SEPARATOR != WIM_PATH_SEPARATOR);
        FREE(buf);
        if (ret) {
                FREE(s);
@@ -129,15 +130,24 @@ in_prepopulate_list(struct wim_dentry *dentry, struct apply_ctx *ctx)
 {
        struct string_set *pats;
        const tchar *path;
+       size_t path_nchars;
 
        pats = get_private_data(ctx)->prepopulate_pats;
-       if (!pats)
+       if (!pats || !pats->num_strings)
                return false;
+
        path = dentry_full_path(dentry);
        if (!path)
                return false;
 
-       return match_pattern(path, path_basename(path), pats);
+       path_nchars = tstrlen(path);
+
+       for (size_t i = 0; i < pats->num_strings; i++)
+               if (match_path(path, path_nchars, pats->strings[i],
+                              OS_PREFERRED_PATH_SEPARATOR, true))
+                       return true;
+
+       return false;
 }
 
 static int
index 71c9cb4..745ee77 100644 (file)
@@ -1141,12 +1141,10 @@ win32_build_dentry_tree_recursive(struct wim_dentry **root_ret,
        DWORD desiredAccess;
 
 
-       if (exclude_path(path, path_num_chars, params->config, true)) {
-               if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) {
-                       ERROR("Cannot exclude the root directory from capture");
-                       ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
-                       goto out;
-               }
+       if (exclude_path(path + params->capture_root_nchars,
+                        path_num_chars - params->capture_root_nchars,
+                        params->config))
+       {
                ret = 0;
                goto out_progress;
        }
@@ -1413,32 +1411,12 @@ win32_build_dentry_tree(struct wim_dentry **root_ret,
                wmemcpy(path, root_disk_path, path_nchars + 1);
        }
 
-       /* Strip trailing slashes.  */
-       while (path_nchars >= 2 &&
-              is_any_path_separator(path[path_nchars - 1]) &&
-              path[path_nchars - 2] != L':')
-       {
-               path[--path_nchars] = L'\0';
-       }
-
-       /* Update pattern prefix.  */
-       if (params->config != NULL)
-       {
-               params->config->prefix = TSTRDUP(path);
-               params->config->prefix_num_tchars = path_nchars;
-               if (params->config->prefix == NULL)
-               {
-                       ret = WIMLIB_ERR_NOMEM;
-                       goto out_free_path;
-               }
-       }
+       params->capture_root_nchars = path_nchars;
 
        memset(&state, 0, sizeof(state));
        ret = win32_build_dentry_tree_recursive(root_ret, path,
                                                path_nchars, params,
                                                &state, vol_flags);
-       if (params->config != NULL)
-               FREE(params->config->prefix);
 out_free_path:
        FREE(path);
        if (ret == 0)
index 0f19e89..0e1023f 100644 (file)
@@ -189,16 +189,6 @@ err_set_errno:
        return -1;
 }
 
-/* Replacement for POSIX fnmatch() (partial functionality only) */
-int
-fnmatch(const wchar_t *pattern, const wchar_t *string, int flags)
-{
-       if (PathMatchSpecW(string, pattern))
-               return 0;
-       else
-               return FNM_NOMATCH;
-}
-
 /* truncate() replacement */
 int
 win32_truncate_replacement(const wchar_t *path, off_t size)