]> wimlib.net Git - wimlib/blobdiff - src/wildcard.c
A few cleanups and fixes from recent changes
[wimlib] / src / wildcard.c
index 0965b26c182a0abe3cd7f0cb177c2a474f475f8a..febe8ee039def9e92f435664756ff53ce2801837 100644 (file)
@@ -7,30 +7,31 @@
 /*
  * Copyright (C) 2013 Eric Biggers
  *
- * This file is part of wimlib, a library for working with WIM files.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
  *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
 #ifdef HAVE_CONFIG_H
 #  include "config.h"
 #endif
 
+#include <ctype.h>
+
 #include "wimlib/dentry.h"
 #include "wimlib/encoding.h"
 #include "wimlib/error.h"
 #include "wimlib/metadata.h"
+#include "wimlib/paths.h"
 #include "wimlib/wildcard.h"
 
 struct match_dentry_ctx {
@@ -43,10 +44,10 @@ struct match_dentry_ctx {
        bool case_insensitive;
 };
 
-#ifdef __WIN32__
 static bool
-match_wildcard_case_sensitive(const tchar *string, size_t string_len,
-                             const tchar *wildcard, size_t wildcard_len)
+do_match_wildcard(const tchar *string, size_t string_len,
+                 const tchar *wildcard, size_t wildcard_len,
+                 bool ignore_case)
 {
        for (;;) {
                if (string_len == 0) {
@@ -57,59 +58,107 @@ match_wildcard_case_sensitive(const tchar *string, size_t string_len,
                        return (wildcard_len == 0);
                } else if (wildcard_len == 0) {
                        return false;
-               } else if (*string == *wildcard || *wildcard == T('?')) {
+               } else if (*string == *wildcard || *wildcard == T('?') ||
+                          (ignore_case && totlower(*string) == totlower(*wildcard)))
+               {
                        string++;
                        string_len--;
                        wildcard_len--;
                        wildcard++;
                        continue;
                } else if (*wildcard == T('*')) {
-                       return match_wildcard_case_sensitive(
-                                             string, string_len,
-                                             wildcard + 1, wildcard_len - 1) ||
-                              match_wildcard_case_sensitive(
-                                             string + 1, string_len - 1,
-                                             wildcard, wildcard_len);
+                       return do_match_wildcard(string, string_len,
+                                                wildcard + 1, wildcard_len - 1,
+                                                ignore_case) ||
+                              do_match_wildcard(string + 1, string_len - 1,
+                                                wildcard, wildcard_len,
+                                                ignore_case);
                } else {
                        return false;
                }
        }
 }
-#endif /* __WIN32__ */
 
 static bool
-match_wildcard(const tchar *string, tchar *wildcard,
-              size_t wildcard_len, bool case_insensitive)
+match_wildcard(const tchar *string, const tchar *wildcard,
+              size_t wildcard_len, bool ignore_case)
 {
-       /* Note: in Windows builds fnmatch() calls a replacement function.  It
-        * does not support case-sensitive globbing.  */
-#ifdef __WIN32__
-       if (case_insensitive)
-#endif
-       {
-               char orig;
-               int ret;
-               int flags = FNM_NOESCAPE;
-               if (case_insensitive)
-                       flags |= FNM_CASEFOLD;
-
-               orig = wildcard[wildcard_len];
-               wildcard[wildcard_len] = T('\0');
+       return do_match_wildcard(string, tstrlen(string),
+                                wildcard, wildcard_len, ignore_case);
+}
 
-               ret = fnmatch(wildcard, string, flags);
+/*
+ * Determines whether a path matches a wildcard pattern.
+ *
+ * @path
+ *     The path to match.  Assumptions:  All path separators must be @path_sep,
+ *     there cannot be consecutive path separators, there cannot be a trailing
+ *     path separator, and there must be exactly one leading path separator.
+ *
+ * @path_nchars
+ *     Number of characters in @path.
+ *
+ * @wildcard
+ *     The wildcard pattern to match.  It can contain the wildcard characters
+ *     '*' and '?'.  The former matches zero or more characters except
+ *     @path_sep, and the latter matches any character except @path_sep.  All
+ *     path separators in the pattern must be @path_sep, and there cannot be
+ *     consecutive path separators, and there cannot be a trailing path
+ *     separator.  If there is a leading path separator, the match is attempted
+ *     with the filename only; otherwise, the match is attempted with the whole
+ *     path.
+ *
+ * @path_sep
+ *     Path separator character used in @path and @wildcard.
+ *
+ * @prefix_ok
+ *     If %true, allow a prefix of @path, terminated by a path separator, to
+ *     match the pattern, in addition to @path itself.  In other words, return
+ *     %true if the pattern actually matches one of the ancestor directories of
+ *     @path.
+ *
+ * Returns %true if there was a match; %false if there was not.
+ */
+bool
+match_path(const tchar *path, size_t path_nchars,
+          const tchar *wildcard, tchar path_sep, bool prefix_ok)
+{
+       if (*wildcard != path_sep) {
+               /* Pattern doesn't begin with path separator.  Try to match the
+                * file name only.  */
+               return match_wildcard(path_basename_with_len(path, path_nchars),
+                                     wildcard, tstrlen(wildcard),
+                                     default_ignore_case);
+       } else {
+               /* Pattern begins with path separator.  Try to match the whole
+                * path.  */
+               do {
+                       if (!*wildcard) {
+                               /* Path has more components than pattern  */
+                               return prefix_ok;
+                       }
 
-               wildcard[wildcard_len] = orig;
-               return (ret == 0);
+                       size_t path_component_len = 0;
+                       size_t wildcard_component_len = 0;
+
+                       do {
+                               path_component_len++;
+                       } while (path[path_component_len] != path_sep &&
+                                path[path_component_len] != T('\0'));
+                       do {
+                               wildcard_component_len++;
+                       } while (wildcard[wildcard_component_len] != path_sep &&
+                                wildcard[wildcard_component_len] != T('\0'));
+                       if (!do_match_wildcard(path, path_component_len,
+                                              wildcard, wildcard_component_len,
+                                              default_ignore_case))
+                               return false;
+                       path += path_component_len;
+                       wildcard += wildcard_component_len;
+               } while (*path);
+
+               return (*wildcard == '\0');
        }
-#ifdef __WIN32__
-       else
-       {
-               return match_wildcard_case_sensitive(string,
-                                                    tstrlen(string),
-                                                    wildcard,
-                                                    wildcard_len);
-       }
-#endif
 }
 
 static int
@@ -127,7 +176,7 @@ wildcard_status(const tchar *wildcard)
 {
        if (*wildcard == T('\0'))
                return WILDCARD_STATUS_DONE_FULLY;
-       while (is_any_path_separator(*wildcard))
+       while (*wildcard == WIM_PATH_SEPARATOR)
                wildcard++;
        if (*wildcard == T('\0'))
                return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
@@ -136,32 +185,27 @@ wildcard_status(const tchar *wildcard)
 }
 
 static int
-match_dentry(struct wim_dentry *cur_dentry, void *_ctx)
+match_dentry(struct wim_dentry *cur_dentry, struct match_dentry_ctx *ctx)
 {
-       struct match_dentry_ctx *ctx = _ctx;
-       tchar *name;
-       size_t name_len;
+       const tchar *name;
+       size_t name_nchars;
        int ret;
 
        if (cur_dentry->file_name_nbytes == 0)
                return 0;
 
-#if TCHAR_IS_UTF16LE
-       name = cur_dentry->file_name;
-       name_len = cur_dentry->file_name_nbytes;
-#else
-       ret = utf16le_to_tstr(cur_dentry->file_name,
-                             cur_dentry->file_name_nbytes,
-                             &name, &name_len);
+       ret = utf16le_get_tstr(cur_dentry->file_name,
+                              cur_dentry->file_name_nbytes,
+                              &name, &name_nchars);
        if (ret)
                return ret;
-#endif
-       name_len /= sizeof(tchar);
+       name_nchars /= sizeof(tchar);
 
-       if (match_wildcard(name,
-                          &ctx->wildcard_path[ctx->cur_component_offset],
-                          ctx->cur_component_len,
-                          ctx->case_insensitive))
+       if (do_match_wildcard(name,
+                             name_nchars,
+                             &ctx->wildcard_path[ctx->cur_component_offset],
+                             ctx->cur_component_len,
+                             ctx->case_insensitive))
        {
                switch (wildcard_status(&ctx->wildcard_path[
                                ctx->cur_component_offset +
@@ -186,9 +230,8 @@ match_dentry(struct wim_dentry *cur_dentry, void *_ctx)
                ret = 0;
        }
 
-#if !TCHAR_IS_UTF16LE
-       FREE(name);
-#endif
+       utf16le_put_tstr(name);
+
        return ret;
 }
 
@@ -203,16 +246,17 @@ expand_wildcard_recursive(struct wim_dentry *cur_dentry,
        size_t offset_save;
        size_t len_save;
        int ret;
+       struct wim_dentry *child;
 
        w = ctx->wildcard_path;
 
        begin = ctx->cur_component_offset + ctx->cur_component_len;
-       while (is_any_path_separator(w[begin]))
+       while (w[begin] == WIM_PATH_SEPARATOR)
                begin++;
 
        end = begin;
 
-       while (w[end] != T('\0') && !is_any_path_separator(w[end]))
+       while (w[end] != T('\0') && w[end] != WIM_PATH_SEPARATOR)
                end++;
 
        len = end - begin;
@@ -226,7 +270,12 @@ expand_wildcard_recursive(struct wim_dentry *cur_dentry,
        ctx->cur_component_offset = begin;
        ctx->cur_component_len = len;
 
-       ret = for_dentry_child(cur_dentry, match_dentry, ctx);
+       ret = 0;
+       for_dentry_child(child, cur_dentry) {
+               ret = match_dentry(child, ctx);
+               if (ret)
+                       break;
+       }
 
        ctx->cur_component_len = len_save;
        ctx->cur_component_offset = offset_save;
@@ -241,9 +290,9 @@ expand_wildcard_recursive(struct wim_dentry *cur_dentry,
  *     wildcard.
  * @wildcard_path
  *     Wildcard path to expand, which may contain the '?' and '*' characters.
- *      Path separators may be either forward slashes, and leading path
- *      separators are ignored.  Trailing path separators indicate that the
- *      wildcard can only match directories.
+ *     Path separators must be WIM_PATH_SEPARATOR.  Leading path separators are
+ *     ignored, whereas one or more trailing path separators indicate that the
+ *     wildcard path can only match directories (and not reparse points).
  * @consume_dentry
  *     Callback function which will receive each directory entry matched by the
  *     wildcard.
@@ -266,9 +315,6 @@ expand_wildcard_recursive(struct wim_dentry *cur_dentry,
  *
  * @return 0 on success; a positive error code on error; or the first nonzero
  * value returned by @consume_dentry.
- *
- * Note: this function uses the @tmp_list field of dentries it attempts to
- * match.
  */
 int
 expand_wildcard(WIMStruct *wim,
@@ -280,7 +326,7 @@ expand_wildcard(WIMStruct *wim,
        struct wim_dentry *root;
        int ret;
 
-       root = wim_root_dentry(wim);
+       root = wim_get_current_root_dentry(wim);
        if (root == NULL)
                goto no_match;