2619f47b59fe445f84d2e31012d2e6f90828464e
[wimlib] / src / wildcard.c
1 /*
2  * wildcard.c
3  *
4  * Wildcard matching functions.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include <ctype.h>
31 #include "wimlib/dentry.h"
32 #include "wimlib/encoding.h"
33 #include "wimlib/error.h"
34 #include "wimlib/metadata.h"
35 #include "wimlib/paths.h"
36 #include "wimlib/wildcard.h"
37
38 struct match_dentry_ctx {
39         int (*consume_dentry)(struct wim_dentry *, void *);
40         void *consume_dentry_ctx;
41         size_t consume_dentry_count;
42         tchar *wildcard_path;
43         size_t cur_component_offset;
44         size_t cur_component_len;
45         bool case_insensitive;
46 };
47
48 static bool
49 do_match_wildcard(const tchar *string, size_t string_len,
50                   const tchar *wildcard, size_t wildcard_len,
51                   bool ignore_case)
52 {
53         for (;;) {
54                 if (string_len == 0) {
55                         while (wildcard_len != 0 && *wildcard == T('*')) {
56                                 wildcard++;
57                                 wildcard_len--;
58                         }
59                         return (wildcard_len == 0);
60                 } else if (wildcard_len == 0) {
61                         return false;
62                 } else if (*string == *wildcard || *wildcard == T('?') ||
63                            (ignore_case && totlower(*string) == totlower(*wildcard)))
64                 {
65                         string++;
66                         string_len--;
67                         wildcard_len--;
68                         wildcard++;
69                         continue;
70                 } else if (*wildcard == T('*')) {
71                         return do_match_wildcard(string, string_len,
72                                                  wildcard + 1, wildcard_len - 1,
73                                                  ignore_case) ||
74                                do_match_wildcard(string + 1, string_len - 1,
75                                                  wildcard, wildcard_len,
76                                                  ignore_case);
77                 } else {
78                         return false;
79                 }
80         }
81 }
82
83 static bool
84 match_wildcard(const tchar *string, const tchar *wildcard,
85                size_t wildcard_len, bool ignore_case)
86 {
87         return do_match_wildcard(string, tstrlen(string),
88                                  wildcard, wildcard_len, ignore_case);
89 }
90
91 /*
92  * Determines whether a path matches a wildcard pattern.
93  *
94  * @path
95  *      The path to match.  Assumptions:  All path separators must be @path_sep,
96  *      there cannot be consecutive path separators, there cannot be a trailing
97  *      path separator, and there must be exactly one leading path separator.
98  *
99  * @path_nchars
100  *      Number of characters in @path.
101  *
102  * @wildcard
103  *      The wildcard pattern to match.  It can contain the wildcard characters
104  *      '*' and '?'.  The former matches zero or more characters except
105  *      @path_sep, and the latter matches any character except @path_sep.  All
106  *      path separators in the pattern must be @path_sep, and there cannot be
107  *      consecutive path separators, and there cannot be a trailing path
108  *      separator.  If there is a leading path separator, the match is attempted
109  *      with the filename only; otherwise, the match is attempted with the whole
110  *      path.
111  *
112  * @path_sep
113  *      Path separator character used in @path and @wildcard.
114  *
115  * @prefix_ok
116  *      If %true, allow a prefix of @path, terminated by a path separator, to
117  *      match the pattern, in addition to @path itself.  In other words, return
118  *      %true if the pattern actually matches one of the ancestor directories of
119  *      @path.
120  *
121  * Returns %true if there was a match; %false if there was not.
122  */
123 bool
124 match_path(const tchar *path, size_t path_nchars,
125            const tchar *wildcard, tchar path_sep, bool prefix_ok)
126 {
127         if (*wildcard != path_sep) {
128                 /* Pattern doesn't begin with path separator.  Try to match the
129                  * file name only.  */
130                 return match_wildcard(path_basename_with_len(path, path_nchars),
131                                       wildcard, tstrlen(wildcard),
132                                       default_ignore_case);
133         } else {
134                 /* Pattern begins with path separator.  Try to match the whole
135                  * path.  */
136                 do {
137                         if (!*wildcard) {
138                                 /* Path has more components than pattern  */
139                                 return prefix_ok;
140                         }
141
142                         size_t path_component_len = 0;
143                         size_t wildcard_component_len = 0;
144
145                         do {
146                                 path_component_len++;
147                         } while (path[path_component_len] != path_sep &&
148                                  path[path_component_len] != T('\0'));
149                         do {
150                                 wildcard_component_len++;
151                         } while (wildcard[wildcard_component_len] != path_sep &&
152                                  wildcard[wildcard_component_len] != T('\0'));
153                         if (!do_match_wildcard(path, path_component_len,
154                                                wildcard, wildcard_component_len,
155                                                default_ignore_case))
156                                 return false;
157                         path += path_component_len;
158                         wildcard += wildcard_component_len;
159                 } while (*path);
160
161                 return (*wildcard == '\0');
162         }
163 }
164
165 static int
166 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
167                           struct match_dentry_ctx *ctx);
168
169 enum {
170         WILDCARD_STATUS_DONE_FULLY,
171         WILDCARD_STATUS_DONE_TRAILING_SLASHES,
172         WILDCARD_STATUS_NOT_DONE,
173 };
174
175 static int
176 wildcard_status(const tchar *wildcard)
177 {
178         if (*wildcard == T('\0'))
179                 return WILDCARD_STATUS_DONE_FULLY;
180         while (*wildcard == WIM_PATH_SEPARATOR)
181                 wildcard++;
182         if (*wildcard == T('\0'))
183                 return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
184
185         return WILDCARD_STATUS_NOT_DONE;
186 }
187
188 static int
189 match_dentry(struct wim_dentry *cur_dentry, struct match_dentry_ctx *ctx)
190 {
191         tchar *name;
192         size_t name_len;
193         int ret;
194
195         if (cur_dentry->file_name_nbytes == 0)
196                 return 0;
197
198 #if TCHAR_IS_UTF16LE
199         name = cur_dentry->file_name;
200         name_len = cur_dentry->file_name_nbytes;
201 #else
202         ret = utf16le_to_tstr(cur_dentry->file_name,
203                               cur_dentry->file_name_nbytes,
204                               &name, &name_len);
205         if (ret)
206                 return ret;
207 #endif
208         name_len /= sizeof(tchar);
209
210         if (match_wildcard(name,
211                            &ctx->wildcard_path[ctx->cur_component_offset],
212                            ctx->cur_component_len,
213                            ctx->case_insensitive))
214         {
215                 switch (wildcard_status(&ctx->wildcard_path[
216                                 ctx->cur_component_offset +
217                                 ctx->cur_component_len]))
218                 {
219                 case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
220                         if (!dentry_is_directory(cur_dentry)) {
221                                 ret = 0;
222                                 break;
223                         }
224                         /* Fall through  */
225                 case WILDCARD_STATUS_DONE_FULLY:
226                         ret = (*ctx->consume_dentry)(cur_dentry,
227                                                      ctx->consume_dentry_ctx);
228                         ctx->consume_dentry_count++;
229                         break;
230                 case WILDCARD_STATUS_NOT_DONE:
231                         ret = expand_wildcard_recursive(cur_dentry, ctx);
232                         break;
233                 }
234         } else {
235                 ret = 0;
236         }
237
238 #if !TCHAR_IS_UTF16LE
239         FREE(name);
240 #endif
241         return ret;
242 }
243
244 static int
245 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
246                           struct match_dentry_ctx *ctx)
247 {
248         tchar *w;
249         size_t begin;
250         size_t end;
251         size_t len;
252         size_t offset_save;
253         size_t len_save;
254         int ret;
255         struct wim_dentry *child;
256
257         w = ctx->wildcard_path;
258
259         begin = ctx->cur_component_offset + ctx->cur_component_len;
260         while (w[begin] == WIM_PATH_SEPARATOR)
261                 begin++;
262
263         end = begin;
264
265         while (w[end] != T('\0') && w[end] != WIM_PATH_SEPARATOR)
266                 end++;
267
268         len = end - begin;
269
270         if (len == 0)
271                 return 0;
272
273         offset_save = ctx->cur_component_offset;
274         len_save = ctx->cur_component_len;
275
276         ctx->cur_component_offset = begin;
277         ctx->cur_component_len = len;
278
279         ret = 0;
280         for_dentry_child(child, cur_dentry) {
281                 ret = match_dentry(child, ctx);
282                 if (ret)
283                         break;
284         }
285
286         ctx->cur_component_len = len_save;
287         ctx->cur_component_offset = offset_save;
288
289         return ret;
290 }
291
292 /* Expand a wildcard relative to the current WIM image.
293  *
294  * @wim
295  *      WIMStruct whose currently selected image is searched to expand the
296  *      wildcard.
297  * @wildcard_path
298  *      Wildcard path to expand, which may contain the '?' and '*' characters.
299  *      Path separators must be WIM_PATH_SEPARATOR.  Leading path separators are
300  *      ignored, whereas one or more trailing path separators indicate that the
301  *      wildcard path can only match directories (and not reparse points).
302  * @consume_dentry
303  *      Callback function which will receive each directory entry matched by the
304  *      wildcard.
305  * @consume_dentry_ctx
306  *      Argument to pass to @consume_dentry.
307  * @flags
308  *      Zero or more of the following flags:
309  *
310  *      WILDCARD_FLAG_WARN_IF_NO_MATCH:
311  *              Issue a warning if the wildcard does not match any dentries.
312  *
313  *      WILDCARD_FLAG_ERROR_IF_NO_MATCH:
314  *              Issue an error and return WIMLIB_ERR_PATH_DOES_NOT_EXIST if the
315  *              wildcard does not match any dentries.
316  *
317  *      WILDCARD_FLAG_CASE_INSENSITIVE:
318  *              Perform the matching case insensitively.  Note that this may
319  *              cause @wildcard to match multiple dentries, even if it does not
320  *              contain wildcard characters.
321  *
322  * @return 0 on success; a positive error code on error; or the first nonzero
323  * value returned by @consume_dentry.
324  */
325 int
326 expand_wildcard(WIMStruct *wim,
327                 const tchar *wildcard_path,
328                 int (*consume_dentry)(struct wim_dentry *, void *),
329                 void *consume_dentry_ctx,
330                 u32 flags)
331 {
332         struct wim_dentry *root;
333         int ret;
334
335         root = wim_root_dentry(wim);
336         if (root == NULL)
337                 goto no_match;
338
339         struct match_dentry_ctx ctx = {
340                 .consume_dentry = consume_dentry,
341                 .consume_dentry_ctx = consume_dentry_ctx,
342                 .consume_dentry_count = 0,
343                 .wildcard_path = TSTRDUP(wildcard_path),
344                 .cur_component_offset = 0,
345                 .cur_component_len = 0,
346                 .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
347         };
348
349         if (ctx.wildcard_path == NULL)
350                 return WIMLIB_ERR_NOMEM;
351
352         ret = expand_wildcard_recursive(root, &ctx);
353         FREE(ctx.wildcard_path);
354         if (ret == 0 && ctx.consume_dentry_count == 0)
355                 goto no_match;
356         return ret;
357
358 no_match:
359         ret = 0;
360         if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
361                 WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
362
363         if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
364                 ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
365                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
366         }
367         return ret;
368 }