]> wimlib.net Git - wimlib/blob - src/wildcard.c
Rename is_win32_name => d_is_win32_name
[wimlib] / src / wildcard.c
1 /*
2  * wildcard.c
3  *
4  * Wildcard matching functions.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option) any
13  * later version.
14  *
15  * This file is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this file; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #include <ctype.h>
29
30 #include "wimlib/dentry.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/metadata.h"
34 #include "wimlib/paths.h"
35 #include "wimlib/wildcard.h"
36
37 struct match_dentry_ctx {
38         int (*consume_dentry)(struct wim_dentry *, void *);
39         void *consume_dentry_ctx;
40         size_t consume_dentry_count;
41         tchar *wildcard_path;
42         size_t cur_component_offset;
43         size_t cur_component_len;
44         bool case_insensitive;
45 };
46
47 static bool
48 do_match_wildcard(const tchar *string, size_t string_len,
49                   const tchar *wildcard, size_t wildcard_len,
50                   bool ignore_case)
51 {
52         for (;;) {
53                 if (string_len == 0) {
54                         while (wildcard_len != 0 && *wildcard == T('*')) {
55                                 wildcard++;
56                                 wildcard_len--;
57                         }
58                         return (wildcard_len == 0);
59                 } else if (wildcard_len == 0) {
60                         return false;
61                 } else if (*string == *wildcard || *wildcard == T('?') ||
62                            (ignore_case && totlower(*string) == totlower(*wildcard)))
63                 {
64                         string++;
65                         string_len--;
66                         wildcard_len--;
67                         wildcard++;
68                         continue;
69                 } else if (*wildcard == T('*')) {
70                         return do_match_wildcard(string, string_len,
71                                                  wildcard + 1, wildcard_len - 1,
72                                                  ignore_case) ||
73                                do_match_wildcard(string + 1, string_len - 1,
74                                                  wildcard, wildcard_len,
75                                                  ignore_case);
76                 } else {
77                         return false;
78                 }
79         }
80 }
81
82 static bool
83 match_wildcard(const tchar *string, const tchar *wildcard,
84                size_t wildcard_len, bool ignore_case)
85 {
86         return do_match_wildcard(string, tstrlen(string),
87                                  wildcard, wildcard_len, ignore_case);
88 }
89
90 /*
91  * Determines whether a path matches a wildcard pattern.
92  *
93  * @path
94  *      The path to match.  Assumptions:  All path separators must be @path_sep,
95  *      there cannot be consecutive path separators, there cannot be a trailing
96  *      path separator, and there must be exactly one leading path separator.
97  *
98  * @path_nchars
99  *      Number of characters in @path.
100  *
101  * @wildcard
102  *      The wildcard pattern to match.  It can contain the wildcard characters
103  *      '*' and '?'.  The former matches zero or more characters except
104  *      @path_sep, and the latter matches any character except @path_sep.  All
105  *      path separators in the pattern must be @path_sep, and there cannot be
106  *      consecutive path separators, and there cannot be a trailing path
107  *      separator.  If there is a leading path separator, the match is attempted
108  *      with the filename only; otherwise, the match is attempted with the whole
109  *      path.
110  *
111  * @path_sep
112  *      Path separator character used in @path and @wildcard.
113  *
114  * @prefix_ok
115  *      If %true, allow a prefix of @path, terminated by a path separator, to
116  *      match the pattern, in addition to @path itself.  In other words, return
117  *      %true if the pattern actually matches one of the ancestor directories of
118  *      @path.
119  *
120  * Returns %true if there was a match; %false if there was not.
121  */
122 bool
123 match_path(const tchar *path, size_t path_nchars,
124            const tchar *wildcard, tchar path_sep, bool prefix_ok)
125 {
126         if (*wildcard != path_sep) {
127                 /* Pattern doesn't begin with path separator.  Try to match the
128                  * file name only.  */
129                 return match_wildcard(path_basename_with_len(path, path_nchars),
130                                       wildcard, tstrlen(wildcard),
131                                       default_ignore_case);
132         } else {
133                 /* Pattern begins with path separator.  Try to match the whole
134                  * path.  */
135                 do {
136                         if (!*wildcard) {
137                                 /* Path has more components than pattern  */
138                                 return prefix_ok;
139                         }
140
141                         size_t path_component_len = 0;
142                         size_t wildcard_component_len = 0;
143
144                         do {
145                                 path_component_len++;
146                         } while (path[path_component_len] != path_sep &&
147                                  path[path_component_len] != T('\0'));
148                         do {
149                                 wildcard_component_len++;
150                         } while (wildcard[wildcard_component_len] != path_sep &&
151                                  wildcard[wildcard_component_len] != T('\0'));
152                         if (!do_match_wildcard(path, path_component_len,
153                                                wildcard, wildcard_component_len,
154                                                default_ignore_case))
155                                 return false;
156                         path += path_component_len;
157                         wildcard += wildcard_component_len;
158                 } while (*path);
159
160                 return (*wildcard == '\0');
161         }
162 }
163
164 static int
165 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
166                           struct match_dentry_ctx *ctx);
167
168 enum {
169         WILDCARD_STATUS_DONE_FULLY,
170         WILDCARD_STATUS_DONE_TRAILING_SLASHES,
171         WILDCARD_STATUS_NOT_DONE,
172 };
173
174 static int
175 wildcard_status(const tchar *wildcard)
176 {
177         if (*wildcard == T('\0'))
178                 return WILDCARD_STATUS_DONE_FULLY;
179         while (*wildcard == WIM_PATH_SEPARATOR)
180                 wildcard++;
181         if (*wildcard == T('\0'))
182                 return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
183
184         return WILDCARD_STATUS_NOT_DONE;
185 }
186
187 static int
188 match_dentry(struct wim_dentry *cur_dentry, struct match_dentry_ctx *ctx)
189 {
190         const tchar *name;
191         size_t name_nchars;
192         int ret;
193
194         if (cur_dentry->d_name_nbytes == 0)
195                 return 0;
196
197         ret = utf16le_get_tstr(cur_dentry->d_name,
198                                cur_dentry->d_name_nbytes,
199                                &name, &name_nchars);
200         if (ret)
201                 return ret;
202         name_nchars /= sizeof(tchar);
203
204         if (do_match_wildcard(name,
205                               name_nchars,
206                               &ctx->wildcard_path[ctx->cur_component_offset],
207                               ctx->cur_component_len,
208                               ctx->case_insensitive))
209         {
210                 switch (wildcard_status(&ctx->wildcard_path[
211                                 ctx->cur_component_offset +
212                                 ctx->cur_component_len]))
213                 {
214                 case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
215                         if (!dentry_is_directory(cur_dentry)) {
216                                 ret = 0;
217                                 break;
218                         }
219                         /* Fall through  */
220                 case WILDCARD_STATUS_DONE_FULLY:
221                         ret = (*ctx->consume_dentry)(cur_dentry,
222                                                      ctx->consume_dentry_ctx);
223                         ctx->consume_dentry_count++;
224                         break;
225                 case WILDCARD_STATUS_NOT_DONE:
226                         ret = expand_wildcard_recursive(cur_dentry, ctx);
227                         break;
228                 }
229         } else {
230                 ret = 0;
231         }
232
233         utf16le_put_tstr(name);
234
235         return ret;
236 }
237
238 static int
239 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
240                           struct match_dentry_ctx *ctx)
241 {
242         tchar *w;
243         size_t begin;
244         size_t end;
245         size_t len;
246         size_t offset_save;
247         size_t len_save;
248         int ret;
249         struct wim_dentry *child;
250
251         w = ctx->wildcard_path;
252
253         begin = ctx->cur_component_offset + ctx->cur_component_len;
254         while (w[begin] == WIM_PATH_SEPARATOR)
255                 begin++;
256
257         end = begin;
258
259         while (w[end] != T('\0') && w[end] != WIM_PATH_SEPARATOR)
260                 end++;
261
262         len = end - begin;
263
264         if (len == 0)
265                 return 0;
266
267         offset_save = ctx->cur_component_offset;
268         len_save = ctx->cur_component_len;
269
270         ctx->cur_component_offset = begin;
271         ctx->cur_component_len = len;
272
273         ret = 0;
274         for_dentry_child(child, cur_dentry) {
275                 ret = match_dentry(child, ctx);
276                 if (ret)
277                         break;
278         }
279
280         ctx->cur_component_len = len_save;
281         ctx->cur_component_offset = offset_save;
282
283         return ret;
284 }
285
286 /* Expand a wildcard relative to the current WIM image.
287  *
288  * @wim
289  *      WIMStruct whose currently selected image is searched to expand the
290  *      wildcard.
291  * @wildcard_path
292  *      Wildcard path to expand, which may contain the '?' and '*' characters.
293  *      Path separators must be WIM_PATH_SEPARATOR.  Leading path separators are
294  *      ignored, whereas one or more trailing path separators indicate that the
295  *      wildcard path can only match directories (and not reparse points).
296  * @consume_dentry
297  *      Callback function which will receive each directory entry matched by the
298  *      wildcard.
299  * @consume_dentry_ctx
300  *      Argument to pass to @consume_dentry.
301  * @flags
302  *      Zero or more of the following flags:
303  *
304  *      WILDCARD_FLAG_WARN_IF_NO_MATCH:
305  *              Issue a warning if the wildcard does not match any dentries.
306  *
307  *      WILDCARD_FLAG_ERROR_IF_NO_MATCH:
308  *              Issue an error and return WIMLIB_ERR_PATH_DOES_NOT_EXIST if the
309  *              wildcard does not match any dentries.
310  *
311  *      WILDCARD_FLAG_CASE_INSENSITIVE:
312  *              Perform the matching case insensitively.  Note that this may
313  *              cause @wildcard to match multiple dentries, even if it does not
314  *              contain wildcard characters.
315  *
316  * @return 0 on success; a positive error code on error; or the first nonzero
317  * value returned by @consume_dentry.
318  */
319 int
320 expand_wildcard(WIMStruct *wim,
321                 const tchar *wildcard_path,
322                 int (*consume_dentry)(struct wim_dentry *, void *),
323                 void *consume_dentry_ctx,
324                 u32 flags)
325 {
326         struct wim_dentry *root;
327         int ret;
328
329         root = wim_get_current_root_dentry(wim);
330         if (root == NULL)
331                 goto no_match;
332
333         struct match_dentry_ctx ctx = {
334                 .consume_dentry = consume_dentry,
335                 .consume_dentry_ctx = consume_dentry_ctx,
336                 .consume_dentry_count = 0,
337                 .wildcard_path = TSTRDUP(wildcard_path),
338                 .cur_component_offset = 0,
339                 .cur_component_len = 0,
340                 .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
341         };
342
343         if (ctx.wildcard_path == NULL)
344                 return WIMLIB_ERR_NOMEM;
345
346         ret = expand_wildcard_recursive(root, &ctx);
347         FREE(ctx.wildcard_path);
348         if (ret == 0 && ctx.consume_dentry_count == 0)
349                 goto no_match;
350         return ret;
351
352 no_match:
353         ret = 0;
354         if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
355                 WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
356
357         if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
358                 ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
359                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
360         }
361         return ret;
362 }