]> wimlib.net Git - wimlib/blob - src/wildcard.c
win32_capture.c: Fix exclusion when capture path ends in slash
[wimlib] / src / wildcard.c
1 /*
2  * wildcard.c
3  *
4  * Wildcard matching functions.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include <ctype.h>
31 #include "wimlib/dentry.h"
32 #include "wimlib/encoding.h"
33 #include "wimlib/error.h"
34 #include "wimlib/metadata.h"
35 #include "wimlib/paths.h"
36 #include "wimlib/wildcard.h"
37
38 struct match_dentry_ctx {
39         int (*consume_dentry)(struct wim_dentry *, void *);
40         void *consume_dentry_ctx;
41         size_t consume_dentry_count;
42         tchar *wildcard_path;
43         size_t cur_component_offset;
44         size_t cur_component_len;
45         bool case_insensitive;
46 };
47
48 static bool
49 do_match_wildcard(const tchar *string, size_t string_len,
50                   const tchar *wildcard, size_t wildcard_len,
51                   bool ignore_case)
52 {
53         for (;;) {
54                 if (string_len == 0) {
55                         while (wildcard_len != 0 && *wildcard == T('*')) {
56                                 wildcard++;
57                                 wildcard_len--;
58                         }
59                         return (wildcard_len == 0);
60                 } else if (wildcard_len == 0) {
61                         return false;
62                 } else if (*string == *wildcard || *wildcard == T('?') ||
63                            (ignore_case && totlower(*string) == totlower(*wildcard)))
64                 {
65                         string++;
66                         string_len--;
67                         wildcard_len--;
68                         wildcard++;
69                         continue;
70                 } else if (*wildcard == T('*')) {
71                         return do_match_wildcard(string, string_len,
72                                                  wildcard + 1, wildcard_len - 1,
73                                                  ignore_case) ||
74                                do_match_wildcard(string + 1, string_len - 1,
75                                                  wildcard, wildcard_len,
76                                                  ignore_case);
77                 } else {
78                         return false;
79                 }
80         }
81 }
82
83 static bool
84 match_wildcard(const tchar *string, const tchar *wildcard,
85                size_t wildcard_len, bool ignore_case)
86 {
87         return do_match_wildcard(string, tstrlen(string),
88                                  wildcard, wildcard_len, ignore_case);
89 }
90
91 /*
92  * Determines whether a path matches a wildcard pattern.
93  *
94  * @path
95  *      The path to match.  Assumptions:  All path separators must be @path_sep,
96  *      there cannot be consecutive path separators, there cannot be a trailing
97  *      path separator, and there must be exactly one leading path separator.
98  *
99  * @path_nchars
100  *      Number of characters in @path.
101  *
102  * @wildcard
103  *      The wildcard pattern to match.  It can contain the wildcard characters
104  *      '*' and '?'.  The former matches zero or more characters except
105  *      @path_sep, and the latter matches any character except @path_sep.  All
106  *      path separators in the pattern must be @path_sep, and there cannot be
107  *      consecutive path separators, and there cannot be a trailing path
108  *      separator.  If there is a leading path separator, the match is attempted
109  *      with the filename only; otherwise, the match is attempted with the whole
110  *      path.
111  *
112  * @path_sep
113  *      Path separator character used in @path and @wildcard.
114  *
115  * @prefix_ok
116  *      If %true, allow a prefix of @path, terminated by a path separator, to
117  *      match the pattern, in addition to @path itself.  In other words, return
118  *      %true if the pattern actually matches one of the ancestor directories of
119  *      @path.
120  *
121  * Returns %true if there was a match; %false if there was not.
122  */
123 bool
124 match_path(const tchar *path, size_t path_nchars,
125            const tchar *wildcard, tchar path_sep, bool prefix_ok)
126 {
127         if (*wildcard != path_sep) {
128                 /* Pattern doesn't begin with path separator.  Try to match the
129                  * file name only.  */
130                 return match_wildcard(path_basename_with_len(path, path_nchars),
131                                       wildcard, tstrlen(wildcard),
132                                       default_ignore_case);
133         } else {
134                 /* Pattern begins with path separator.  Try to match the whole
135                  * path.  */
136                 do {
137                         if (!*wildcard) {
138                                 /* Path has more components than pattern  */
139                                 return prefix_ok;
140                         }
141
142                         size_t path_component_len = 0;
143                         size_t wildcard_component_len = 0;
144
145                         do {
146                                 path_component_len++;
147                         } while (path[path_component_len] != path_sep &&
148                                  path[path_component_len] != T('\0'));
149                         do {
150                                 wildcard_component_len++;
151                         } while (wildcard[wildcard_component_len] != path_sep &&
152                                  wildcard[wildcard_component_len] != T('\0'));
153                         if (!do_match_wildcard(path, path_component_len,
154                                                wildcard, wildcard_component_len,
155                                                default_ignore_case))
156                                 return false;
157                         path += path_component_len;
158                         wildcard += wildcard_component_len;
159                 } while (*path);
160
161                 return (*wildcard == '\0');
162         }
163 }
164
165 static int
166 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
167                           struct match_dentry_ctx *ctx);
168
169 enum {
170         WILDCARD_STATUS_DONE_FULLY,
171         WILDCARD_STATUS_DONE_TRAILING_SLASHES,
172         WILDCARD_STATUS_NOT_DONE,
173 };
174
175 static int
176 wildcard_status(const tchar *wildcard)
177 {
178         if (*wildcard == T('\0'))
179                 return WILDCARD_STATUS_DONE_FULLY;
180         while (*wildcard == WIM_PATH_SEPARATOR)
181                 wildcard++;
182         if (*wildcard == T('\0'))
183                 return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
184
185         return WILDCARD_STATUS_NOT_DONE;
186 }
187
188 static int
189 match_dentry(struct wim_dentry *cur_dentry, struct match_dentry_ctx *ctx)
190 {
191         const tchar *name;
192         size_t name_nchars;
193         int ret;
194
195         if (cur_dentry->file_name_nbytes == 0)
196                 return 0;
197
198         ret = utf16le_get_tstr(cur_dentry->file_name,
199                                cur_dentry->file_name_nbytes,
200                                &name, &name_nchars);
201         if (ret)
202                 return ret;
203         name_nchars /= sizeof(tchar);
204
205         if (do_match_wildcard(name,
206                               name_nchars,
207                               &ctx->wildcard_path[ctx->cur_component_offset],
208                               ctx->cur_component_len,
209                               ctx->case_insensitive))
210         {
211                 switch (wildcard_status(&ctx->wildcard_path[
212                                 ctx->cur_component_offset +
213                                 ctx->cur_component_len]))
214                 {
215                 case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
216                         if (!dentry_is_directory(cur_dentry)) {
217                                 ret = 0;
218                                 break;
219                         }
220                         /* Fall through  */
221                 case WILDCARD_STATUS_DONE_FULLY:
222                         ret = (*ctx->consume_dentry)(cur_dentry,
223                                                      ctx->consume_dentry_ctx);
224                         ctx->consume_dentry_count++;
225                         break;
226                 case WILDCARD_STATUS_NOT_DONE:
227                         ret = expand_wildcard_recursive(cur_dentry, ctx);
228                         break;
229                 }
230         } else {
231                 ret = 0;
232         }
233
234         utf16le_put_tstr(name);
235
236         return ret;
237 }
238
239 static int
240 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
241                           struct match_dentry_ctx *ctx)
242 {
243         tchar *w;
244         size_t begin;
245         size_t end;
246         size_t len;
247         size_t offset_save;
248         size_t len_save;
249         int ret;
250         struct wim_dentry *child;
251
252         w = ctx->wildcard_path;
253
254         begin = ctx->cur_component_offset + ctx->cur_component_len;
255         while (w[begin] == WIM_PATH_SEPARATOR)
256                 begin++;
257
258         end = begin;
259
260         while (w[end] != T('\0') && w[end] != WIM_PATH_SEPARATOR)
261                 end++;
262
263         len = end - begin;
264
265         if (len == 0)
266                 return 0;
267
268         offset_save = ctx->cur_component_offset;
269         len_save = ctx->cur_component_len;
270
271         ctx->cur_component_offset = begin;
272         ctx->cur_component_len = len;
273
274         ret = 0;
275         for_dentry_child(child, cur_dentry) {
276                 ret = match_dentry(child, ctx);
277                 if (ret)
278                         break;
279         }
280
281         ctx->cur_component_len = len_save;
282         ctx->cur_component_offset = offset_save;
283
284         return ret;
285 }
286
287 /* Expand a wildcard relative to the current WIM image.
288  *
289  * @wim
290  *      WIMStruct whose currently selected image is searched to expand the
291  *      wildcard.
292  * @wildcard_path
293  *      Wildcard path to expand, which may contain the '?' and '*' characters.
294  *      Path separators must be WIM_PATH_SEPARATOR.  Leading path separators are
295  *      ignored, whereas one or more trailing path separators indicate that the
296  *      wildcard path can only match directories (and not reparse points).
297  * @consume_dentry
298  *      Callback function which will receive each directory entry matched by the
299  *      wildcard.
300  * @consume_dentry_ctx
301  *      Argument to pass to @consume_dentry.
302  * @flags
303  *      Zero or more of the following flags:
304  *
305  *      WILDCARD_FLAG_WARN_IF_NO_MATCH:
306  *              Issue a warning if the wildcard does not match any dentries.
307  *
308  *      WILDCARD_FLAG_ERROR_IF_NO_MATCH:
309  *              Issue an error and return WIMLIB_ERR_PATH_DOES_NOT_EXIST if the
310  *              wildcard does not match any dentries.
311  *
312  *      WILDCARD_FLAG_CASE_INSENSITIVE:
313  *              Perform the matching case insensitively.  Note that this may
314  *              cause @wildcard to match multiple dentries, even if it does not
315  *              contain wildcard characters.
316  *
317  * @return 0 on success; a positive error code on error; or the first nonzero
318  * value returned by @consume_dentry.
319  */
320 int
321 expand_wildcard(WIMStruct *wim,
322                 const tchar *wildcard_path,
323                 int (*consume_dentry)(struct wim_dentry *, void *),
324                 void *consume_dentry_ctx,
325                 u32 flags)
326 {
327         struct wim_dentry *root;
328         int ret;
329
330         root = wim_get_current_root_dentry(wim);
331         if (root == NULL)
332                 goto no_match;
333
334         struct match_dentry_ctx ctx = {
335                 .consume_dentry = consume_dentry,
336                 .consume_dentry_ctx = consume_dentry_ctx,
337                 .consume_dentry_count = 0,
338                 .wildcard_path = TSTRDUP(wildcard_path),
339                 .cur_component_offset = 0,
340                 .cur_component_len = 0,
341                 .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
342         };
343
344         if (ctx.wildcard_path == NULL)
345                 return WIMLIB_ERR_NOMEM;
346
347         ret = expand_wildcard_recursive(root, &ctx);
348         FREE(ctx.wildcard_path);
349         if (ret == 0 && ctx.consume_dentry_count == 0)
350                 goto no_match;
351         return ret;
352
353 no_match:
354         ret = 0;
355         if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
356                 WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
357
358         if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
359                 ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
360                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
361         }
362         return ret;
363 }