]> wimlib.net Git - wimlib/blobdiff - src/wildcard.c
A couple cleanups from last commits
[wimlib] / src / wildcard.c
index 9c878e21fde6d1b47d4b0a6c6a08d795b9319b09..2619f47b59fe445f84d2e31012d2e6f90828464e 100644 (file)
 #  include "config.h"
 #endif
 
+#include <ctype.h>
 #include "wimlib/dentry.h"
 #include "wimlib/encoding.h"
 #include "wimlib/error.h"
 #include "wimlib/metadata.h"
+#include "wimlib/paths.h"
 #include "wimlib/wildcard.h"
 
 struct match_dentry_ctx {
-       int (*consume_path)(const tchar *, void *, bool);
-       void *consume_path_ctx;
-       size_t consume_path_count;
-       tchar *expanded_path;
-       size_t expanded_path_len;
-       size_t expanded_path_alloc_len;
+       int (*consume_dentry)(struct wim_dentry *, void *);
+       void *consume_dentry_ctx;
+       size_t consume_dentry_count;
        tchar *wildcard_path;
        size_t cur_component_offset;
        size_t cur_component_len;
@@ -47,26 +46,120 @@ struct match_dentry_ctx {
 };
 
 static bool
-match_wildcard(const tchar *string, tchar *wildcard,
-              size_t wildcard_len, bool case_insensitive)
+do_match_wildcard(const tchar *string, size_t string_len,
+                 const tchar *wildcard, size_t wildcard_len,
+                 bool ignore_case)
 {
-       char orig;
-       int flags;
-       int ret;
+       for (;;) {
+               if (string_len == 0) {
+                       while (wildcard_len != 0 && *wildcard == T('*')) {
+                               wildcard++;
+                               wildcard_len--;
+                       }
+                       return (wildcard_len == 0);
+               } else if (wildcard_len == 0) {
+                       return false;
+               } else if (*string == *wildcard || *wildcard == T('?') ||
+                          (ignore_case && totlower(*string) == totlower(*wildcard)))
+               {
+                       string++;
+                       string_len--;
+                       wildcard_len--;
+                       wildcard++;
+                       continue;
+               } else if (*wildcard == T('*')) {
+                       return do_match_wildcard(string, string_len,
+                                                wildcard + 1, wildcard_len - 1,
+                                                ignore_case) ||
+                              do_match_wildcard(string + 1, string_len - 1,
+                                                wildcard, wildcard_len,
+                                                ignore_case);
+               } else {
+                       return false;
+               }
+       }
+}
 
-       orig = wildcard[wildcard_len];
-       wildcard[wildcard_len] = T('\0');
+static bool
+match_wildcard(const tchar *string, const tchar *wildcard,
+              size_t wildcard_len, bool ignore_case)
+{
+       return do_match_wildcard(string, tstrlen(string),
+                                wildcard, wildcard_len, ignore_case);
+}
 
-       /* Warning: in Windows builds fnmatch() calls a replacement function.
-        * Also, FNM_CASEFOLD is a GNU extension and it is defined to 0 if not
-        * available.  */
-       flags = FNM_NOESCAPE;
-       if (case_insensitive)
-               flags |= FNM_CASEFOLD;
-       ret = fnmatch(wildcard, string, flags);
+/*
+ * Determines whether a path matches a wildcard pattern.
+ *
+ * @path
+ *     The path to match.  Assumptions:  All path separators must be @path_sep,
+ *     there cannot be consecutive path separators, there cannot be a trailing
+ *     path separator, and there must be exactly one leading path separator.
+ *
+ * @path_nchars
+ *     Number of characters in @path.
+ *
+ * @wildcard
+ *     The wildcard pattern to match.  It can contain the wildcard characters
+ *     '*' and '?'.  The former matches zero or more characters except
+ *     @path_sep, and the latter matches any character except @path_sep.  All
+ *     path separators in the pattern must be @path_sep, and there cannot be
+ *     consecutive path separators, and there cannot be a trailing path
+ *     separator.  If there is a leading path separator, the match is attempted
+ *     with the filename only; otherwise, the match is attempted with the whole
+ *     path.
+ *
+ * @path_sep
+ *     Path separator character used in @path and @wildcard.
+ *
+ * @prefix_ok
+ *     If %true, allow a prefix of @path, terminated by a path separator, to
+ *     match the pattern, in addition to @path itself.  In other words, return
+ *     %true if the pattern actually matches one of the ancestor directories of
+ *     @path.
+ *
+ * Returns %true if there was a match; %false if there was not.
+ */
+bool
+match_path(const tchar *path, size_t path_nchars,
+          const tchar *wildcard, tchar path_sep, bool prefix_ok)
+{
+       if (*wildcard != path_sep) {
+               /* Pattern doesn't begin with path separator.  Try to match the
+                * file name only.  */
+               return match_wildcard(path_basename_with_len(path, path_nchars),
+                                     wildcard, tstrlen(wildcard),
+                                     default_ignore_case);
+       } else {
+               /* Pattern begins with path separator.  Try to match the whole
+                * path.  */
+               do {
+                       if (!*wildcard) {
+                               /* Path has more components than pattern  */
+                               return prefix_ok;
+                       }
 
-       wildcard[wildcard_len] = orig;
-       return (ret == 0);
+                       size_t path_component_len = 0;
+                       size_t wildcard_component_len = 0;
+
+                       do {
+                               path_component_len++;
+                       } while (path[path_component_len] != path_sep &&
+                                path[path_component_len] != T('\0'));
+                       do {
+                               wildcard_component_len++;
+                       } while (wildcard[wildcard_component_len] != path_sep &&
+                                wildcard[wildcard_component_len] != T('\0'));
+                       if (!do_match_wildcard(path, path_component_len,
+                                              wildcard, wildcard_component_len,
+                                              default_ignore_case))
+                               return false;
+                       path += path_component_len;
+                       wildcard += wildcard_component_len;
+               } while (*path);
+
+               return (*wildcard == '\0');
+       }
 }
 
 static int
@@ -84,7 +177,7 @@ wildcard_status(const tchar *wildcard)
 {
        if (*wildcard == T('\0'))
                return WILDCARD_STATUS_DONE_FULLY;
-       while (is_any_path_separator(*wildcard))
+       while (*wildcard == WIM_PATH_SEPARATOR)
                wildcard++;
        if (*wildcard == T('\0'))
                return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
@@ -93,9 +186,8 @@ wildcard_status(const tchar *wildcard)
 }
 
 static int
-match_dentry(struct wim_dentry *cur_dentry, void *_ctx)
+match_dentry(struct wim_dentry *cur_dentry, struct match_dentry_ctx *ctx)
 {
-       struct match_dentry_ctx *ctx = _ctx;
        tchar *name;
        size_t name_len;
        int ret;
@@ -120,29 +212,6 @@ match_dentry(struct wim_dentry *cur_dentry, void *_ctx)
                           ctx->cur_component_len,
                           ctx->case_insensitive))
        {
-               size_t len_needed = ctx->expanded_path_len + 1 + name_len + 1;
-               size_t expanded_path_len_save;
-
-               if (len_needed > ctx->expanded_path_alloc_len) {
-                       tchar *expanded_path;
-
-                       expanded_path = REALLOC(ctx->expanded_path,
-                                               len_needed * sizeof(ctx->expanded_path[0]));
-                       if (expanded_path == NULL) {
-                               ret = WIMLIB_ERR_NOMEM;
-                               goto out_free_name;
-                       }
-                       ctx->expanded_path = expanded_path;
-                       ctx->expanded_path_alloc_len = len_needed;
-               }
-               expanded_path_len_save = ctx->expanded_path_len;
-
-               ctx->expanded_path[ctx->expanded_path_len++] = WIM_PATH_SEPARATOR;
-               tmemcpy(&ctx->expanded_path[ctx->expanded_path_len],
-                       name, name_len);
-               ctx->expanded_path_len += name_len;
-               ctx->expanded_path[ctx->expanded_path_len] = T('\0');
-
                switch (wildcard_status(&ctx->wildcard_path[
                                ctx->cur_component_offset +
                                ctx->cur_component_len]))
@@ -154,22 +223,18 @@ match_dentry(struct wim_dentry *cur_dentry, void *_ctx)
                        }
                        /* Fall through  */
                case WILDCARD_STATUS_DONE_FULLY:
-                       ret = (*ctx->consume_path)(ctx->expanded_path,
-                                                  ctx->consume_path_ctx,
-                                                  false);
-                       ctx->consume_path_count++;
+                       ret = (*ctx->consume_dentry)(cur_dentry,
+                                                    ctx->consume_dentry_ctx);
+                       ctx->consume_dentry_count++;
                        break;
                case WILDCARD_STATUS_NOT_DONE:
                        ret = expand_wildcard_recursive(cur_dentry, ctx);
                        break;
                }
-               ctx->expanded_path_len = expanded_path_len_save;
-               ctx->expanded_path[expanded_path_len_save] = T('\0');
        } else {
                ret = 0;
        }
 
-out_free_name:
 #if !TCHAR_IS_UTF16LE
        FREE(name);
 #endif
@@ -187,16 +252,17 @@ expand_wildcard_recursive(struct wim_dentry *cur_dentry,
        size_t offset_save;
        size_t len_save;
        int ret;
+       struct wim_dentry *child;
 
        w = ctx->wildcard_path;
 
        begin = ctx->cur_component_offset + ctx->cur_component_len;
-       while (is_any_path_separator(w[begin]))
+       while (w[begin] == WIM_PATH_SEPARATOR)
                begin++;
 
        end = begin;
 
-       while (w[end] != T('\0') && !is_any_path_separator(w[end]))
+       while (w[end] != T('\0') && w[end] != WIM_PATH_SEPARATOR)
                end++;
 
        len = end - begin;
@@ -210,7 +276,12 @@ expand_wildcard_recursive(struct wim_dentry *cur_dentry,
        ctx->cur_component_offset = begin;
        ctx->cur_component_len = len;
 
-       ret = for_dentry_child(cur_dentry, match_dentry, ctx);
+       ret = 0;
+       for_dentry_child(child, cur_dentry) {
+               ret = match_dentry(child, ctx);
+               if (ret)
+                       break;
+       }
 
        ctx->cur_component_len = len_save;
        ctx->cur_component_offset = offset_save;
@@ -218,11 +289,44 @@ expand_wildcard_recursive(struct wim_dentry *cur_dentry,
        return ret;
 }
 
-static int
+/* Expand a wildcard relative to the current WIM image.
+ *
+ * @wim
+ *     WIMStruct whose currently selected image is searched to expand the
+ *     wildcard.
+ * @wildcard_path
+ *     Wildcard path to expand, which may contain the '?' and '*' characters.
+ *     Path separators must be WIM_PATH_SEPARATOR.  Leading path separators are
+ *     ignored, whereas one or more trailing path separators indicate that the
+ *     wildcard path can only match directories (and not reparse points).
+ * @consume_dentry
+ *     Callback function which will receive each directory entry matched by the
+ *     wildcard.
+ * @consume_dentry_ctx
+ *     Argument to pass to @consume_dentry.
+ * @flags
+ *     Zero or more of the following flags:
+ *
+ *     WILDCARD_FLAG_WARN_IF_NO_MATCH:
+ *             Issue a warning if the wildcard does not match any dentries.
+ *
+ *     WILDCARD_FLAG_ERROR_IF_NO_MATCH:
+ *             Issue an error and return WIMLIB_ERR_PATH_DOES_NOT_EXIST if the
+ *             wildcard does not match any dentries.
+ *
+ *     WILDCARD_FLAG_CASE_INSENSITIVE:
+ *             Perform the matching case insensitively.  Note that this may
+ *             cause @wildcard to match multiple dentries, even if it does not
+ *             contain wildcard characters.
+ *
+ * @return 0 on success; a positive error code on error; or the first nonzero
+ * value returned by @consume_dentry.
+ */
+int
 expand_wildcard(WIMStruct *wim,
                const tchar *wildcard_path,
-               int (*consume_path)(const tchar *, void *, bool),
-               void *consume_path_ctx,
+               int (*consume_dentry)(struct wim_dentry *, void *),
+               void *consume_dentry_ctx,
                u32 flags)
 {
        struct wim_dentry *root;
@@ -233,36 +337,26 @@ expand_wildcard(WIMStruct *wim,
                goto no_match;
 
        struct match_dentry_ctx ctx = {
-               .consume_path = consume_path,
-               .consume_path_ctx = consume_path_ctx,
-               .consume_path_count = 0,
-               .expanded_path = MALLOC(256 * sizeof(ctx.expanded_path[0])),
-               .expanded_path_len = 0,
-               .expanded_path_alloc_len = 256,
+               .consume_dentry = consume_dentry,
+               .consume_dentry_ctx = consume_dentry_ctx,
+               .consume_dentry_count = 0,
                .wildcard_path = TSTRDUP(wildcard_path),
                .cur_component_offset = 0,
                .cur_component_len = 0,
                .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
        };
 
-       if (ctx.expanded_path == NULL || ctx.wildcard_path == NULL) {
-               FREE(ctx.expanded_path);
-               FREE(ctx.wildcard_path);
+       if (ctx.wildcard_path == NULL)
                return WIMLIB_ERR_NOMEM;
-       }
 
        ret = expand_wildcard_recursive(root, &ctx);
-       FREE(ctx.expanded_path);
        FREE(ctx.wildcard_path);
-       if (ret == 0 && ctx.consume_path_count == 0)
+       if (ret == 0 && ctx.consume_dentry_count == 0)
                goto no_match;
        return ret;
 
 no_match:
        ret = 0;
-       if (flags & WILDCARD_FLAG_USE_LITERAL_IF_NO_MATCHES)
-               ret = (*consume_path)(wildcard_path, consume_path_ctx, true);
-
        if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
                WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
 
@@ -272,70 +366,3 @@ no_match:
        }
        return ret;
 }
-
-struct expanded_paths_ctx {
-       tchar **expanded_paths;
-       size_t num_expanded_paths;
-       size_t alloc_length;
-};
-
-static int
-append_path_cb(const tchar *path, void *_ctx, bool may_need_trans)
-{
-       struct expanded_paths_ctx *ctx = _ctx;
-       tchar *path_dup;
-
-       if (ctx->num_expanded_paths == ctx->alloc_length) {
-               tchar **new_paths;
-               size_t new_alloc_length = max(ctx->alloc_length + 8,
-                                             ctx->alloc_length * 3 / 2);
-
-               new_paths = REALLOC(ctx->expanded_paths,
-                                   new_alloc_length * sizeof(new_paths[0]));
-               if (new_paths == NULL)
-                       return WIMLIB_ERR_NOMEM;
-               ctx->expanded_paths = new_paths;
-               ctx->alloc_length = new_alloc_length;
-       }
-       path_dup = TSTRDUP(path);
-       if (path_dup == NULL)
-               return WIMLIB_ERR_NOMEM;
-       if (may_need_trans) {
-               for (tchar *p = path_dup; *p; p++)
-                       if (is_any_path_separator(*p))
-                               *p = WIM_PATH_SEPARATOR;
-       }
-       ctx->expanded_paths[ctx->num_expanded_paths++] = path_dup;
-       return 0;
-}
-
-int
-expand_wildcard_wim_paths(WIMStruct *wim,
-                         const char * const *wildcards,
-                         size_t num_wildcards,
-                         tchar ***expanded_paths_ret,
-                         size_t *num_expanded_paths_ret,
-                         u32 flags)
-{
-       int ret;
-       struct expanded_paths_ctx ctx = {
-               .expanded_paths = NULL,
-               .num_expanded_paths = 0,
-               .alloc_length = 0,
-       };
-       for (size_t i = 0; i < num_wildcards; i++) {
-               ret = expand_wildcard(wim, wildcards[i], append_path_cb, &ctx,
-                                     flags);
-               if (ret)
-                       goto out_free;
-       }
-       *expanded_paths_ret = ctx.expanded_paths;
-       *num_expanded_paths_ret = ctx.num_expanded_paths;
-       return 0;
-
-out_free:
-       for (size_t i = 0; i < ctx.num_expanded_paths; i++)
-               FREE(ctx.expanded_paths[i]);
-       FREE(ctx.expanded_paths);
-       return ret;
-}