From eeb96703721a2ba6c64ec63f2c330515e8992b4f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 29 Apr 2014 14:13:41 -0500 Subject: [PATCH] Adjust path matching - To get consistent behavior, always use own implementation in wildcard.c, not fnmatch() or PatchMatchSpec() - Don't store prefix in capture config - Don't allow non-rooted paths with multiple directory components (these could cause confusion) - win32_apply.c: Match [PrepopulateList] patterns recursively --- include/wimlib/capture.h | 18 +++--- include/wimlib/paths.h | 3 + include/wimlib/wildcard.h | 17 +----- src/capture_common.c | 117 +++++++++++++++++++------------------- src/ntfs-3g_capture.c | 2 +- src/paths.c | 49 ++++++++++------ src/unix_capture.c | 7 ++- src/update_image.c | 3 - src/wildcard.c | 103 ++++++++++++++++++++++++--------- src/win32_apply.c | 18 ++++-- src/win32_capture.c | 32 ++--------- src/win32_replacements.c | 10 ---- 12 files changed, 204 insertions(+), 175 deletions(-) diff --git a/include/wimlib/capture.h b/include/wimlib/capture.h index aea3e3c9..8d79542f 100644 --- a/include/wimlib/capture.h +++ b/include/wimlib/capture.h @@ -15,8 +15,6 @@ struct wim_inode; struct capture_config { struct string_set exclusion_pats; struct string_set exclusion_exception_pats; - tchar *prefix; - size_t prefix_num_tchars; void *buf; }; @@ -47,14 +45,17 @@ struct add_image_params { * libntfs-3g capture. */ void *extra_arg; - u64 capture_root_ino; - u64 capture_root_dev; /* If non-NULL, the user-supplied progress function. */ wimlib_progress_func_t progress_func; /* Progress data. */ union wimlib_progress_info progress; + + /* Can be used by the capture implementation. */ + u64 capture_root_ino; + u64 capture_root_dev; + size_t capture_root_nchars; }; @@ -75,15 +76,12 @@ extern void destroy_capture_config(struct capture_config *config); extern bool -match_pattern(const tchar *path, - const tchar *path_basename, - const struct string_set *list); +match_pattern_list(const tchar *path, size_t path_nchars, + const struct string_set *list); extern bool exclude_path(const tchar *path, size_t path_len, - const struct capture_config *config, - bool exclude_prefix); - + const struct capture_config *config); typedef int (*capture_tree_t)(struct wim_dentry **, const tchar *, struct add_image_params *); diff --git a/include/wimlib/paths.h b/include/wimlib/paths.h index 59c1a867..bad8e74c 100644 --- a/include/wimlib/paths.h +++ b/include/wimlib/paths.h @@ -13,6 +13,9 @@ path_basename_with_len(const tchar *path, size_t len); extern const tchar * path_stream_name(const tchar *path); +extern void +do_canonicalize_path(const tchar *in, tchar *out); + extern tchar * canonicalize_wim_path(const tchar *wim_path) _malloc_attribute; diff --git a/include/wimlib/wildcard.h b/include/wimlib/wildcard.h index 5bae113d..edd9d84b 100644 --- a/include/wimlib/wildcard.h +++ b/include/wimlib/wildcard.h @@ -16,19 +16,8 @@ expand_wildcard(WIMStruct *wim, void *consume_dentry_ctx, u32 flags); -#ifdef __WIN32__ -extern int -fnmatch(const tchar *pattern, const tchar *string, int flags); -# define FNM_CASEFOLD 0x1 -# define FNM_PATHNAME 0x2 -# define FNM_NOESCAPE 0x4 -# define FNM_NOMATCH 1 -#else -# include -# ifndef FNM_CASEFOLD -# warning "FNM_CASEFOLD not defined!" -# define FNM_CASEFOLD 0 -# endif -#endif +extern bool +match_path(const tchar *path, size_t path_nchars, + const tchar *wildcard, tchar path_sep, bool prefix_ok); #endif /* _WIMLIB_WILDCARD_H */ diff --git a/src/capture_common.c b/src/capture_common.c index c74c94e0..99d6af66 100644 --- a/src/capture_common.c +++ b/src/capture_common.c @@ -81,10 +81,11 @@ mangle_pat(tchar *pat, const tchar *path, unsigned long line_no) * relative to the current working directory on the c: * drive. We require paths with drive letters to be * absolute. */ - ERROR("%"TS":%lu: Invalid path \"%"TS"\"; paths including " - "drive letters must be absolute!\n" - " Maybe try \"%"TC":\\%"TS"\"?", - path, line_no, pat, pat[0], &pat[2]); + ERROR("%"TS":%lu: Invalid pattern \"%"TS"\":\n" + " Patterns including drive letters must be absolute!\n" + " Maybe try \"%"TC":%"TC"%"TS"\"?\n", + path, line_no, pat, + pat[0], OS_PREFERRED_PATH_SEPARATOR, &pat[2]); return WIMLIB_ERR_INVALID_CAPTURE_CONFIG; } @@ -96,11 +97,24 @@ mangle_pat(tchar *pat, const tchar *path, unsigned long line_no) tmemmove(pat, pat + 2, tstrlen(pat + 2) + 1); } - /* Translate all possible path separators into the operating system's - * preferred path separator. */ - for (tchar *p = pat; *p; p++) - if (is_any_path_separator(*p)) - *p = OS_PREFERRED_PATH_SEPARATOR; + /* Collapse and translate path separators. + * + * Note: we require that this works for filesystem paths and WIM paths, + * so the desired path separators must be the same. */ + BUILD_BUG_ON(OS_PREFERRED_PATH_SEPARATOR != WIM_PATH_SEPARATOR); + do_canonicalize_path(pat, pat); + + /* Relative patterns can only match file names. */ + if (pat[0] != OS_PREFERRED_PATH_SEPARATOR && + tstrchr(pat, OS_PREFERRED_PATH_SEPARATOR)) + { + ERROR("%"TS":%lu: Invalid path \"%"TS"\":\n" + " Relative patterns can only include one path component!\n" + " Maybe try \"%"TC"%"TS"\"?", + path, line_no, pat, OS_PREFERRED_PATH_SEPARATOR, pat); + return WIMLIB_ERR_INVALID_CAPTURE_CONFIG; + } + return 0; } @@ -109,7 +123,12 @@ do_read_capture_config_file(const tchar *config_file, const void *buf, size_t bufsize, struct capture_config *config) { int ret; + + /* [PrepopulateList] is used for apply, not capture. But since we do + * understand it, recognize it (avoiding unrecognized section warning) + * and discard the strings. */ STRING_SET(prepopulate_pats); + struct text_file_section sections[] = { {T("ExclusionList"), &config->exclusion_pats}, @@ -141,68 +160,48 @@ destroy_capture_config(struct capture_config *config) } bool -match_pattern(const tchar *path, - const tchar *path_basename, - const struct string_set *list) +match_pattern_list(const tchar *path, size_t path_len, + const struct string_set *list) { - for (size_t i = 0; i < list->num_strings; i++) { - - const tchar *pat = list->strings[i]; - const tchar *string; - - if (*pat == OS_PREFERRED_PATH_SEPARATOR) { - /* Absolute path from root of capture */ - string = path; - } else { - if (tstrchr(pat, OS_PREFERRED_PATH_SEPARATOR)) - /* Relative path from root of capture */ - string = path + 1; - else - /* A file name pattern */ - string = path_basename; - } - - /* Warning: on Windows native builds, fnmatch() calls the - * replacement function in win32.c. */ - if (fnmatch(pat, string, FNM_PATHNAME | FNM_NOESCAPE - #ifdef FNM_CASEFOLD - | FNM_CASEFOLD - #endif - ) == 0) - { - DEBUG("\"%"TS"\" matches the pattern \"%"TS"\"", - string, pat); + for (size_t i = 0; i < list->num_strings; i++) + if (match_path(path, path_len, list->strings[i], + OS_PREFERRED_PATH_SEPARATOR, true)) return true; - } - } return false; } -/* Return true if the image capture configuration file indicates we should +/* + * Return true if the image capture configuration file indicates we should * exclude the filename @path from capture. * - * If @exclude_prefix is %true, the part of the path up and including the name - * of the directory being captured is not included in the path for matching - * purposes. This allows, for example, a pattern like /hiberfil.sys to match a - * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7 - * directory. + * The passed in @path must be given relative to the root of the capture, but + * with a leading path separator. For example, if the file "in/file" is being + * tested and the library user ran wimlib_add_image(wim, "in", ...), then the + * directory "in" is the root of the capture and the path should be specified as + * "/file". + * + * Also, all path separators in @path must be OS_PREFERRED_PATH_SEPARATOR, and + * there cannot be trailing slashes. + * + * As a special case, the empty string will be interpreted as a single path + * separator. */ bool -exclude_path(const tchar *path, size_t path_len, - const struct capture_config *config, bool exclude_prefix) +exclude_path(const tchar *path, size_t path_nchars, + const struct capture_config *config) { + tchar dummy[2]; + if (!config) return false; - const tchar *basename = path_basename_with_len(path, path_len); - if (exclude_prefix) { - wimlib_assert(path_len >= config->prefix_num_tchars); - if (!tmemcmp(config->prefix, path, config->prefix_num_tchars) && - path[config->prefix_num_tchars] == OS_PREFERRED_PATH_SEPARATOR) - { - path += config->prefix_num_tchars; - } + + if (!*path) { + dummy[0] = OS_PREFERRED_PATH_SEPARATOR; + dummy[1] = T('\0'); + path = dummy; } - return match_pattern(path, basename, &config->exclusion_pats) && - !match_pattern(path, basename, &config->exclusion_exception_pats); + + return match_pattern_list(path, path_nchars, &config->exclusion_pats) && + !match_pattern_list(path, path_nchars, &config->exclusion_exception_pats); } diff --git a/src/ntfs-3g_capture.c b/src/ntfs-3g_capture.c index ee301477..335eb6d5 100644 --- a/src/ntfs-3g_capture.c +++ b/src/ntfs-3g_capture.c @@ -536,7 +536,7 @@ build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret, struct wim_dentry *root = NULL; struct wim_inode *inode = NULL; - if (exclude_path(path, path_len, params->config, false)) { + if (exclude_path(path, path_len, params->config)) { /* Exclude a file or directory tree based on the capture * configuration file. */ ret = 0; diff --git a/src/paths.c b/src/paths.c index 675b9f27..62347c9f 100644 --- a/src/paths.c +++ b/src/paths.c @@ -83,6 +83,36 @@ canonicalize_fs_path(const tchar *fs_path) return TSTRDUP(fs_path); } +/* Collapse and translate path separators, and strip trailing slashes. Doesn't + * add or delete a leading slash. + * + * @in may alias @out. + */ +void +do_canonicalize_path(const tchar *in, tchar *out) +{ + tchar *orig_out = out; + + while (*in) { + if (is_any_path_separator(*in)) { + /* Collapse multiple path separators into one */ + *out++ = WIM_PATH_SEPARATOR; + do { + in++; + } while (is_any_path_separator(*in)); + } else { + /* Copy non-path-separator character */ + *out++ = *in++; + } + } + + /* Remove trailing slash if existent */ + if (out - orig_out > 1 && *(out - 1) == WIM_PATH_SEPARATOR) + --out; + + *out = T('\0'); +} + /* * canonicalize_wim_path() - Given a user-provided path to a file within a WIM * image, translate it into a "canonical" path. @@ -126,24 +156,7 @@ canonicalize_wim_path(const tchar *wim_path) if (!is_any_path_separator(*in)) *out++ = WIM_PATH_SEPARATOR; - while (*in) { - if (is_any_path_separator(*in)) { - /* Collapse multiple path separators into one */ - *out++ = WIM_PATH_SEPARATOR; - do { - in++; - } while (is_any_path_separator(*in)); - } else { - /* Copy non-path-separator character */ - *out++ = *in++; - } - } - - /* Remove trailing slash if existent */ - if (*(out - 1) == WIM_PATH_SEPARATOR && (out - 1) != result) - --out; - - *out = T('\0'); + do_canonicalize_path(in, out); return result; } diff --git a/src/unix_capture.c b/src/unix_capture.c index 27f493ac..c0aa1950 100644 --- a/src/unix_capture.c +++ b/src/unix_capture.c @@ -209,7 +209,10 @@ unix_build_dentry_tree_recursive(struct wim_dentry **root_ret, struct wim_inode *inode = NULL; struct stat stbuf; - if (exclude_path(path, path_len, params->config, true)) { + if (exclude_path(path + params->capture_root_nchars, + path_len - params->capture_root_nchars, + params->config)) + { ret = 0; goto out_progress; } @@ -357,6 +360,8 @@ unix_build_dentry_tree(struct wim_dentry **root_ret, return WIMLIB_ERR_NOMEM; memcpy(path_buf, root_disk_path, path_len + 1); + params->capture_root_nchars = path_len; + ret = unix_build_dentry_tree_recursive(root_ret, path_buf, path_len, params); FREE(path_buf); diff --git a/src/update_image.c b/src/update_image.c index 37d6593c..a2589f0c 100644 --- a/src/update_image.c +++ b/src/update_image.c @@ -849,9 +849,6 @@ execute_add_command(struct update_command_journal *j, if (progress_func) progress_func(WIMLIB_PROGRESS_MSG_SCAN_BEGIN, ¶ms.progress); - config.prefix = fs_source_path; - config.prefix_num_tchars = tstrlen(fs_source_path); - if (WIMLIB_IS_WIM_ROOT_PATH(wim_target_path)) params.add_flags |= WIMLIB_ADD_FLAG_ROOT; ret = (*capture_tree)(&branch, fs_source_path, ¶ms); diff --git a/src/wildcard.c b/src/wildcard.c index 7898690a..742b5647 100644 --- a/src/wildcard.c +++ b/src/wildcard.c @@ -32,6 +32,7 @@ #include "wimlib/encoding.h" #include "wimlib/error.h" #include "wimlib/metadata.h" +#include "wimlib/paths.h" #include "wimlib/wildcard.h" struct match_dentry_ctx { @@ -44,16 +45,6 @@ struct match_dentry_ctx { bool case_insensitive; }; -#define PLATFORM_SUPPORTS_FNMATCH - -#ifdef __WIN32__ -/* PathMatchSpec() could provide a fnmatch() alternative, but it isn't - * documented properly, nor does it work properly. For example, it returns that - * any name matches *.* even if that name doesn't actually contain a period. */ -# undef PLATFORM_SUPPORTS_FNMATCH -#endif - -#ifndef PLATFORM_SUPPORTS_FNMATCH static bool do_match_wildcard(const tchar *string, size_t string_len, const tchar *wildcard, size_t wildcard_len, @@ -88,30 +79,86 @@ do_match_wildcard(const tchar *string, size_t string_len, } } } -#endif /* ! PLATFORM_SUPPORTS_FNMATCH */ static bool -match_wildcard(const tchar *string, tchar *wildcard, +match_wildcard(const tchar *string, const tchar *wildcard, size_t wildcard_len, bool ignore_case) { -#ifdef PLATFORM_SUPPORTS_FNMATCH - char orig; - int ret; - int flags = FNM_NOESCAPE; - if (ignore_case) - flags |= FNM_CASEFOLD; - - orig = wildcard[wildcard_len]; - wildcard[wildcard_len] = T('\0'); - - ret = fnmatch(wildcard, string, flags); - - wildcard[wildcard_len] = orig; - return (ret == 0); -#else return do_match_wildcard(string, tstrlen(string), wildcard, wildcard_len, ignore_case); -#endif +} + +/* + * Determines whether a path matches a wildcard pattern. + * + * @path + * The path to match. Assumptions: All path separators must be @path_sep, + * there cannot be consecutive path separators, there cannot be a trailing + * path separator, and there must be exactly one leading path separator. + * + * @path_nchars + * Number of characters in @path. + * + * @wildcard + * The wildcard pattern to match. It can contain the wildcard characters + * '*' and '?'. The former matches zero or more characters except + * @path_sep, and the latter matches any character except @path_sep. All + * path separators in the pattern must be @path_sep, and there cannot be + * consecutive path separators, and there cannot be a trailing path + * separator. If there is a leading path separator, the match is attempted + * with the filename only; otherwise, the matchis attempted with the whole + * path. + * + * @path_sep + * Path separator character in @path and @wildcard. + * + * @prefix_ok + * If %true, allow a prefix of @path, terminated by a path separator, to + * match, in addition to @path itself. a.k.a. also return true if the + * wildcard actually matches one of the ancestor directories of @path. + * + * Returns %true if there was a match; %false if there was not. + */ +bool +match_path(const tchar *path, size_t path_nchars, + const tchar *wildcard, tchar path_sep, bool prefix_ok) +{ + if (*wildcard != path_sep) { + /* Pattern doesn't begin with path separator. Try to match the + * file name only. */ + return match_wildcard(path_basename_with_len(path, path_nchars), + wildcard, tstrlen(wildcard), + default_ignore_case); + } else { + /* Pattern begins with path separator. Try to match the whole + * path. */ + do { + if (!*wildcard) { + /* Path has more components than pattern */ + return prefix_ok; + } + + size_t path_component_len = 0; + size_t wildcard_component_len = 0; + + do { + path_component_len++; + } while (path[path_component_len] != path_sep && + path[path_component_len] != T('\0')); + do { + wildcard_component_len++; + } while (wildcard[wildcard_component_len] != path_sep && + wildcard[wildcard_component_len] != T('\0')); + if (!do_match_wildcard(path, path_component_len, + wildcard, wildcard_component_len, + default_ignore_case)) + return false; + path += path_component_len; + wildcard += wildcard_component_len; + } while (*path); + + return (*wildcard == '\0'); + } } static int diff --git a/src/win32_apply.c b/src/win32_apply.c index 48ff5d87..c78683b7 100644 --- a/src/win32_apply.c +++ b/src/win32_apply.c @@ -30,14 +30,14 @@ #include "wimlib/win32_common.h" #include "wimlib/apply.h" -#include "wimlib/capture.h" +#include "wimlib/capture.h" /* for mangle_pat() */ #include "wimlib/dentry.h" #include "wimlib/error.h" #include "wimlib/lookup_table.h" -#include "wimlib/paths.h" #include "wimlib/resource.h" #include "wimlib/textfile.h" #include "wimlib/xml.h" +#include "wimlib/wildcard.h" #include "wimlib/wim.h" #include "wimlib/wimboot.h" @@ -114,6 +114,7 @@ load_prepopulate_pats(struct apply_ctx *ctx) LOAD_TEXT_FILE_REMOVE_QUOTES | LOAD_TEXT_FILE_NO_WARNINGS, mangle_pat); + BUILD_BUG_ON(OS_PREFERRED_PATH_SEPARATOR != WIM_PATH_SEPARATOR); FREE(buf); if (ret) { FREE(s); @@ -129,15 +130,24 @@ in_prepopulate_list(struct wim_dentry *dentry, struct apply_ctx *ctx) { struct string_set *pats; const tchar *path; + size_t path_nchars; pats = get_private_data(ctx)->prepopulate_pats; - if (!pats) + if (!pats || !pats->num_strings) return false; + path = dentry_full_path(dentry); if (!path) return false; - return match_pattern(path, path_basename(path), pats); + path_nchars = tstrlen(path); + + for (size_t i = 0; i < pats->num_strings; i++) + if (match_path(path, path_nchars, pats->strings[i], + OS_PREFERRED_PATH_SEPARATOR, true)) + return true; + + return false; } static int diff --git a/src/win32_capture.c b/src/win32_capture.c index 71c9cb4c..745ee77a 100644 --- a/src/win32_capture.c +++ b/src/win32_capture.c @@ -1141,12 +1141,10 @@ win32_build_dentry_tree_recursive(struct wim_dentry **root_ret, DWORD desiredAccess; - if (exclude_path(path, path_num_chars, params->config, true)) { - if (params->add_flags & WIMLIB_ADD_FLAG_ROOT) { - ERROR("Cannot exclude the root directory from capture"); - ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG; - goto out; - } + if (exclude_path(path + params->capture_root_nchars, + path_num_chars - params->capture_root_nchars, + params->config)) + { ret = 0; goto out_progress; } @@ -1413,32 +1411,12 @@ win32_build_dentry_tree(struct wim_dentry **root_ret, wmemcpy(path, root_disk_path, path_nchars + 1); } - /* Strip trailing slashes. */ - while (path_nchars >= 2 && - is_any_path_separator(path[path_nchars - 1]) && - path[path_nchars - 2] != L':') - { - path[--path_nchars] = L'\0'; - } - - /* Update pattern prefix. */ - if (params->config != NULL) - { - params->config->prefix = TSTRDUP(path); - params->config->prefix_num_tchars = path_nchars; - if (params->config->prefix == NULL) - { - ret = WIMLIB_ERR_NOMEM; - goto out_free_path; - } - } + params->capture_root_nchars = path_nchars; memset(&state, 0, sizeof(state)); ret = win32_build_dentry_tree_recursive(root_ret, path, path_nchars, params, &state, vol_flags); - if (params->config != NULL) - FREE(params->config->prefix); out_free_path: FREE(path); if (ret == 0) diff --git a/src/win32_replacements.c b/src/win32_replacements.c index 0f19e891..0e1023f6 100644 --- a/src/win32_replacements.c +++ b/src/win32_replacements.c @@ -189,16 +189,6 @@ err_set_errno: return -1; } -/* Replacement for POSIX fnmatch() (partial functionality only) */ -int -fnmatch(const wchar_t *pattern, const wchar_t *string, int flags) -{ - if (PathMatchSpecW(string, pattern)) - return 0; - else - return FNM_NOMATCH; -} - /* truncate() replacement */ int win32_truncate_replacement(const wchar_t *path, off_t size) -- 2.43.0