742b564782d3219156ec5f669d2f6432d0bf1e8b
[wimlib] / src / wildcard.c
1 /*
2  * wildcard.c
3  *
4  * Wildcard matching functions.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include <ctype.h>
31 #include "wimlib/dentry.h"
32 #include "wimlib/encoding.h"
33 #include "wimlib/error.h"
34 #include "wimlib/metadata.h"
35 #include "wimlib/paths.h"
36 #include "wimlib/wildcard.h"
37
38 struct match_dentry_ctx {
39         int (*consume_dentry)(struct wim_dentry *, void *);
40         void *consume_dentry_ctx;
41         size_t consume_dentry_count;
42         tchar *wildcard_path;
43         size_t cur_component_offset;
44         size_t cur_component_len;
45         bool case_insensitive;
46 };
47
48 static bool
49 do_match_wildcard(const tchar *string, size_t string_len,
50                   const tchar *wildcard, size_t wildcard_len,
51                   bool ignore_case)
52 {
53         for (;;) {
54                 if (string_len == 0) {
55                         while (wildcard_len != 0 && *wildcard == T('*')) {
56                                 wildcard++;
57                                 wildcard_len--;
58                         }
59                         return (wildcard_len == 0);
60                 } else if (wildcard_len == 0) {
61                         return false;
62                 } else if (*string == *wildcard || *wildcard == T('?') ||
63                            (ignore_case && totlower(*string) == totlower(*wildcard)))
64                 {
65                         string++;
66                         string_len--;
67                         wildcard_len--;
68                         wildcard++;
69                         continue;
70                 } else if (*wildcard == T('*')) {
71                         return do_match_wildcard(string, string_len,
72                                                  wildcard + 1, wildcard_len - 1,
73                                                  ignore_case) ||
74                                do_match_wildcard(string + 1, string_len - 1,
75                                                  wildcard, wildcard_len,
76                                                  ignore_case);
77                 } else {
78                         return false;
79                 }
80         }
81 }
82
83 static bool
84 match_wildcard(const tchar *string, const tchar *wildcard,
85                size_t wildcard_len, bool ignore_case)
86 {
87         return do_match_wildcard(string, tstrlen(string),
88                                  wildcard, wildcard_len, ignore_case);
89 }
90
91 /*
92  * Determines whether a path matches a wildcard pattern.
93  *
94  * @path
95  *      The path to match.  Assumptions:  All path separators must be @path_sep,
96  *      there cannot be consecutive path separators, there cannot be a trailing
97  *      path separator, and there must be exactly one leading path separator.
98  *
99  * @path_nchars
100  *      Number of characters in @path.
101  *
102  * @wildcard
103  *      The wildcard pattern to match.  It can contain the wildcard characters
104  *      '*' and '?'.  The former matches zero or more characters except
105  *      @path_sep, and the latter matches any character except @path_sep.  All
106  *      path separators in the pattern must be @path_sep, and there cannot be
107  *      consecutive path separators, and there cannot be a trailing path
108  *      separator.  If there is a leading path separator, the match is attempted
109  *      with the filename only; otherwise, the matchis attempted with the whole
110  *      path.
111  *
112  * @path_sep
113  *      Path separator character in @path and @wildcard.
114  *
115  * @prefix_ok
116  *      If %true, allow a prefix of @path, terminated by a path separator, to
117  *      match, in addition to @path itself.  a.k.a. also return true if the
118  *      wildcard actually matches one of the ancestor directories of @path.
119  *
120  * Returns %true if there was a match; %false if there was not.
121  */
122 bool
123 match_path(const tchar *path, size_t path_nchars,
124            const tchar *wildcard, tchar path_sep, bool prefix_ok)
125 {
126         if (*wildcard != path_sep) {
127                 /* Pattern doesn't begin with path separator.  Try to match the
128                  * file name only.  */
129                 return match_wildcard(path_basename_with_len(path, path_nchars),
130                                       wildcard, tstrlen(wildcard),
131                                       default_ignore_case);
132         } else {
133                 /* Pattern begins with path separator.  Try to match the whole
134                  * path.  */
135                 do {
136                         if (!*wildcard) {
137                                 /* Path has more components than pattern  */
138                                 return prefix_ok;
139                         }
140
141                         size_t path_component_len = 0;
142                         size_t wildcard_component_len = 0;
143
144                         do {
145                                 path_component_len++;
146                         } while (path[path_component_len] != path_sep &&
147                                  path[path_component_len] != T('\0'));
148                         do {
149                                 wildcard_component_len++;
150                         } while (wildcard[wildcard_component_len] != path_sep &&
151                                  wildcard[wildcard_component_len] != T('\0'));
152                         if (!do_match_wildcard(path, path_component_len,
153                                                wildcard, wildcard_component_len,
154                                                default_ignore_case))
155                                 return false;
156                         path += path_component_len;
157                         wildcard += wildcard_component_len;
158                 } while (*path);
159
160                 return (*wildcard == '\0');
161         }
162 }
163
164 static int
165 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
166                           struct match_dentry_ctx *ctx);
167
168 enum {
169         WILDCARD_STATUS_DONE_FULLY,
170         WILDCARD_STATUS_DONE_TRAILING_SLASHES,
171         WILDCARD_STATUS_NOT_DONE,
172 };
173
174 static int
175 wildcard_status(const tchar *wildcard)
176 {
177         if (*wildcard == T('\0'))
178                 return WILDCARD_STATUS_DONE_FULLY;
179         while (*wildcard == WIM_PATH_SEPARATOR)
180                 wildcard++;
181         if (*wildcard == T('\0'))
182                 return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
183
184         return WILDCARD_STATUS_NOT_DONE;
185 }
186
187 static int
188 match_dentry(struct wim_dentry *cur_dentry, struct match_dentry_ctx *ctx)
189 {
190         tchar *name;
191         size_t name_len;
192         int ret;
193
194         if (cur_dentry->file_name_nbytes == 0)
195                 return 0;
196
197 #if TCHAR_IS_UTF16LE
198         name = cur_dentry->file_name;
199         name_len = cur_dentry->file_name_nbytes;
200 #else
201         ret = utf16le_to_tstr(cur_dentry->file_name,
202                               cur_dentry->file_name_nbytes,
203                               &name, &name_len);
204         if (ret)
205                 return ret;
206 #endif
207         name_len /= sizeof(tchar);
208
209         if (match_wildcard(name,
210                            &ctx->wildcard_path[ctx->cur_component_offset],
211                            ctx->cur_component_len,
212                            ctx->case_insensitive))
213         {
214                 switch (wildcard_status(&ctx->wildcard_path[
215                                 ctx->cur_component_offset +
216                                 ctx->cur_component_len]))
217                 {
218                 case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
219                         if (!dentry_is_directory(cur_dentry)) {
220                                 ret = 0;
221                                 break;
222                         }
223                         /* Fall through  */
224                 case WILDCARD_STATUS_DONE_FULLY:
225                         ret = (*ctx->consume_dentry)(cur_dentry,
226                                                      ctx->consume_dentry_ctx);
227                         ctx->consume_dentry_count++;
228                         break;
229                 case WILDCARD_STATUS_NOT_DONE:
230                         ret = expand_wildcard_recursive(cur_dentry, ctx);
231                         break;
232                 }
233         } else {
234                 ret = 0;
235         }
236
237 #if !TCHAR_IS_UTF16LE
238         FREE(name);
239 #endif
240         return ret;
241 }
242
243 static int
244 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
245                           struct match_dentry_ctx *ctx)
246 {
247         tchar *w;
248         size_t begin;
249         size_t end;
250         size_t len;
251         size_t offset_save;
252         size_t len_save;
253         int ret;
254         struct wim_dentry *child;
255
256         w = ctx->wildcard_path;
257
258         begin = ctx->cur_component_offset + ctx->cur_component_len;
259         while (w[begin] == WIM_PATH_SEPARATOR)
260                 begin++;
261
262         end = begin;
263
264         while (w[end] != T('\0') && w[end] != WIM_PATH_SEPARATOR)
265                 end++;
266
267         len = end - begin;
268
269         if (len == 0)
270                 return 0;
271
272         offset_save = ctx->cur_component_offset;
273         len_save = ctx->cur_component_len;
274
275         ctx->cur_component_offset = begin;
276         ctx->cur_component_len = len;
277
278         ret = 0;
279         for_dentry_child(child, cur_dentry) {
280                 ret = match_dentry(child, ctx);
281                 if (ret)
282                         break;
283         }
284
285         ctx->cur_component_len = len_save;
286         ctx->cur_component_offset = offset_save;
287
288         return ret;
289 }
290
291 /* Expand a wildcard relative to the current WIM image.
292  *
293  * @wim
294  *      WIMStruct whose currently selected image is searched to expand the
295  *      wildcard.
296  * @wildcard_path
297  *      Wildcard path to expand, which may contain the '?' and '*' characters.
298  *      Path separators must be WIM_PATH_SEPARATOR.  Leading path separators are
299  *      ignored, whereas one or more trailing path separators indicate that the
300  *      wildcard path can only match directories (and not reparse points).
301  * @consume_dentry
302  *      Callback function which will receive each directory entry matched by the
303  *      wildcard.
304  * @consume_dentry_ctx
305  *      Argument to pass to @consume_dentry.
306  * @flags
307  *      Zero or more of the following flags:
308  *
309  *      WILDCARD_FLAG_WARN_IF_NO_MATCH:
310  *              Issue a warning if the wildcard does not match any dentries.
311  *
312  *      WILDCARD_FLAG_ERROR_IF_NO_MATCH:
313  *              Issue an error and return WIMLIB_ERR_PATH_DOES_NOT_EXIST if the
314  *              wildcard does not match any dentries.
315  *
316  *      WILDCARD_FLAG_CASE_INSENSITIVE:
317  *              Perform the matching case insensitively.  Note that this may
318  *              cause @wildcard to match multiple dentries, even if it does not
319  *              contain wildcard characters.
320  *
321  * @return 0 on success; a positive error code on error; or the first nonzero
322  * value returned by @consume_dentry.
323  */
324 int
325 expand_wildcard(WIMStruct *wim,
326                 const tchar *wildcard_path,
327                 int (*consume_dentry)(struct wim_dentry *, void *),
328                 void *consume_dentry_ctx,
329                 u32 flags)
330 {
331         struct wim_dentry *root;
332         int ret;
333
334         root = wim_root_dentry(wim);
335         if (root == NULL)
336                 goto no_match;
337
338         struct match_dentry_ctx ctx = {
339                 .consume_dentry = consume_dentry,
340                 .consume_dentry_ctx = consume_dentry_ctx,
341                 .consume_dentry_count = 0,
342                 .wildcard_path = TSTRDUP(wildcard_path),
343                 .cur_component_offset = 0,
344                 .cur_component_len = 0,
345                 .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
346         };
347
348         if (ctx.wildcard_path == NULL)
349                 return WIMLIB_ERR_NOMEM;
350
351         ret = expand_wildcard_recursive(root, &ctx);
352         FREE(ctx.wildcard_path);
353         if (ret == 0 && ctx.consume_dentry_count == 0)
354                 goto no_match;
355         return ret;
356
357 no_match:
358         ret = 0;
359         if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
360                 WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
361
362         if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
363                 ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
364                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
365         }
366         return ret;
367 }