Improve tstr <=> UTF-16LE conversions
[wimlib] / src / wildcard.c
1 /*
2  * wildcard.c
3  *
4  * Wildcard matching functions.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include <ctype.h>
31 #include "wimlib/dentry.h"
32 #include "wimlib/encoding.h"
33 #include "wimlib/error.h"
34 #include "wimlib/metadata.h"
35 #include "wimlib/paths.h"
36 #include "wimlib/wildcard.h"
37
38 struct match_dentry_ctx {
39         int (*consume_dentry)(struct wim_dentry *, void *);
40         void *consume_dentry_ctx;
41         size_t consume_dentry_count;
42         tchar *wildcard_path;
43         size_t cur_component_offset;
44         size_t cur_component_len;
45         bool case_insensitive;
46 };
47
48 static bool
49 do_match_wildcard(const tchar *string, size_t string_len,
50                   const tchar *wildcard, size_t wildcard_len,
51                   bool ignore_case)
52 {
53         for (;;) {
54                 if (string_len == 0) {
55                         while (wildcard_len != 0 && *wildcard == T('*')) {
56                                 wildcard++;
57                                 wildcard_len--;
58                         }
59                         return (wildcard_len == 0);
60                 } else if (wildcard_len == 0) {
61                         return false;
62                 } else if (*string == *wildcard || *wildcard == T('?') ||
63                            (ignore_case && totlower(*string) == totlower(*wildcard)))
64                 {
65                         string++;
66                         string_len--;
67                         wildcard_len--;
68                         wildcard++;
69                         continue;
70                 } else if (*wildcard == T('*')) {
71                         return do_match_wildcard(string, string_len,
72                                                  wildcard + 1, wildcard_len - 1,
73                                                  ignore_case) ||
74                                do_match_wildcard(string + 1, string_len - 1,
75                                                  wildcard, wildcard_len,
76                                                  ignore_case);
77                 } else {
78                         return false;
79                 }
80         }
81 }
82
83 static bool
84 match_wildcard(const tchar *string, const tchar *wildcard,
85                size_t wildcard_len, bool ignore_case)
86 {
87         return do_match_wildcard(string, tstrlen(string),
88                                  wildcard, wildcard_len, ignore_case);
89 }
90
91 /*
92  * Determines whether a path matches a wildcard pattern.
93  *
94  * @path
95  *      The path to match.  Assumptions:  All path separators must be @path_sep,
96  *      there cannot be consecutive path separators, there cannot be a trailing
97  *      path separator, and there must be exactly one leading path separator.
98  *
99  * @path_nchars
100  *      Number of characters in @path.
101  *
102  * @wildcard
103  *      The wildcard pattern to match.  It can contain the wildcard characters
104  *      '*' and '?'.  The former matches zero or more characters except
105  *      @path_sep, and the latter matches any character except @path_sep.  All
106  *      path separators in the pattern must be @path_sep, and there cannot be
107  *      consecutive path separators, and there cannot be a trailing path
108  *      separator.  If there is a leading path separator, the match is attempted
109  *      with the filename only; otherwise, the match is attempted with the whole
110  *      path.
111  *
112  * @path_sep
113  *      Path separator character used in @path and @wildcard.
114  *
115  * @prefix_ok
116  *      If %true, allow a prefix of @path, terminated by a path separator, to
117  *      match the pattern, in addition to @path itself.  In other words, return
118  *      %true if the pattern actually matches one of the ancestor directories of
119  *      @path.
120  *
121  * Returns %true if there was a match; %false if there was not.
122  */
123 bool
124 match_path(const tchar *path, size_t path_nchars,
125            const tchar *wildcard, tchar path_sep, bool prefix_ok)
126 {
127         if (*wildcard != path_sep) {
128                 /* Pattern doesn't begin with path separator.  Try to match the
129                  * file name only.  */
130                 return match_wildcard(path_basename_with_len(path, path_nchars),
131                                       wildcard, tstrlen(wildcard),
132                                       default_ignore_case);
133         } else {
134                 /* Pattern begins with path separator.  Try to match the whole
135                  * path.  */
136                 do {
137                         if (!*wildcard) {
138                                 /* Path has more components than pattern  */
139                                 return prefix_ok;
140                         }
141
142                         size_t path_component_len = 0;
143                         size_t wildcard_component_len = 0;
144
145                         do {
146                                 path_component_len++;
147                         } while (path[path_component_len] != path_sep &&
148                                  path[path_component_len] != T('\0'));
149                         do {
150                                 wildcard_component_len++;
151                         } while (wildcard[wildcard_component_len] != path_sep &&
152                                  wildcard[wildcard_component_len] != T('\0'));
153                         if (!do_match_wildcard(path, path_component_len,
154                                                wildcard, wildcard_component_len,
155                                                default_ignore_case))
156                                 return false;
157                         path += path_component_len;
158                         wildcard += wildcard_component_len;
159                 } while (*path);
160
161                 return (*wildcard == '\0');
162         }
163 }
164
165 static int
166 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
167                           struct match_dentry_ctx *ctx);
168
169 enum {
170         WILDCARD_STATUS_DONE_FULLY,
171         WILDCARD_STATUS_DONE_TRAILING_SLASHES,
172         WILDCARD_STATUS_NOT_DONE,
173 };
174
175 static int
176 wildcard_status(const tchar *wildcard)
177 {
178         if (*wildcard == T('\0'))
179                 return WILDCARD_STATUS_DONE_FULLY;
180         while (*wildcard == WIM_PATH_SEPARATOR)
181                 wildcard++;
182         if (*wildcard == T('\0'))
183                 return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
184
185         return WILDCARD_STATUS_NOT_DONE;
186 }
187
188 static int
189 match_dentry(struct wim_dentry *cur_dentry, struct match_dentry_ctx *ctx)
190 {
191         const tchar *name;
192         size_t name_len;
193         int ret;
194
195         if (cur_dentry->file_name_nbytes == 0)
196                 return 0;
197
198         ret = utf16le_get_tstr(cur_dentry->file_name,
199                                cur_dentry->file_name_nbytes,
200                                &name, &name_len);
201         if (ret)
202                 return ret;
203         name_len /= sizeof(tchar);
204
205         if (match_wildcard(name,
206                            &ctx->wildcard_path[ctx->cur_component_offset],
207                            ctx->cur_component_len,
208                            ctx->case_insensitive))
209         {
210                 switch (wildcard_status(&ctx->wildcard_path[
211                                 ctx->cur_component_offset +
212                                 ctx->cur_component_len]))
213                 {
214                 case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
215                         if (!dentry_is_directory(cur_dentry)) {
216                                 ret = 0;
217                                 break;
218                         }
219                         /* Fall through  */
220                 case WILDCARD_STATUS_DONE_FULLY:
221                         ret = (*ctx->consume_dentry)(cur_dentry,
222                                                      ctx->consume_dentry_ctx);
223                         ctx->consume_dentry_count++;
224                         break;
225                 case WILDCARD_STATUS_NOT_DONE:
226                         ret = expand_wildcard_recursive(cur_dentry, ctx);
227                         break;
228                 }
229         } else {
230                 ret = 0;
231         }
232
233         utf16le_put_tstr(name);
234
235         return ret;
236 }
237
238 static int
239 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
240                           struct match_dentry_ctx *ctx)
241 {
242         tchar *w;
243         size_t begin;
244         size_t end;
245         size_t len;
246         size_t offset_save;
247         size_t len_save;
248         int ret;
249         struct wim_dentry *child;
250
251         w = ctx->wildcard_path;
252
253         begin = ctx->cur_component_offset + ctx->cur_component_len;
254         while (w[begin] == WIM_PATH_SEPARATOR)
255                 begin++;
256
257         end = begin;
258
259         while (w[end] != T('\0') && w[end] != WIM_PATH_SEPARATOR)
260                 end++;
261
262         len = end - begin;
263
264         if (len == 0)
265                 return 0;
266
267         offset_save = ctx->cur_component_offset;
268         len_save = ctx->cur_component_len;
269
270         ctx->cur_component_offset = begin;
271         ctx->cur_component_len = len;
272
273         ret = 0;
274         for_dentry_child(child, cur_dentry) {
275                 ret = match_dentry(child, ctx);
276                 if (ret)
277                         break;
278         }
279
280         ctx->cur_component_len = len_save;
281         ctx->cur_component_offset = offset_save;
282
283         return ret;
284 }
285
286 /* Expand a wildcard relative to the current WIM image.
287  *
288  * @wim
289  *      WIMStruct whose currently selected image is searched to expand the
290  *      wildcard.
291  * @wildcard_path
292  *      Wildcard path to expand, which may contain the '?' and '*' characters.
293  *      Path separators must be WIM_PATH_SEPARATOR.  Leading path separators are
294  *      ignored, whereas one or more trailing path separators indicate that the
295  *      wildcard path can only match directories (and not reparse points).
296  * @consume_dentry
297  *      Callback function which will receive each directory entry matched by the
298  *      wildcard.
299  * @consume_dentry_ctx
300  *      Argument to pass to @consume_dentry.
301  * @flags
302  *      Zero or more of the following flags:
303  *
304  *      WILDCARD_FLAG_WARN_IF_NO_MATCH:
305  *              Issue a warning if the wildcard does not match any dentries.
306  *
307  *      WILDCARD_FLAG_ERROR_IF_NO_MATCH:
308  *              Issue an error and return WIMLIB_ERR_PATH_DOES_NOT_EXIST if the
309  *              wildcard does not match any dentries.
310  *
311  *      WILDCARD_FLAG_CASE_INSENSITIVE:
312  *              Perform the matching case insensitively.  Note that this may
313  *              cause @wildcard to match multiple dentries, even if it does not
314  *              contain wildcard characters.
315  *
316  * @return 0 on success; a positive error code on error; or the first nonzero
317  * value returned by @consume_dentry.
318  */
319 int
320 expand_wildcard(WIMStruct *wim,
321                 const tchar *wildcard_path,
322                 int (*consume_dentry)(struct wim_dentry *, void *),
323                 void *consume_dentry_ctx,
324                 u32 flags)
325 {
326         struct wim_dentry *root;
327         int ret;
328
329         root = wim_root_dentry(wim);
330         if (root == NULL)
331                 goto no_match;
332
333         struct match_dentry_ctx ctx = {
334                 .consume_dentry = consume_dentry,
335                 .consume_dentry_ctx = consume_dentry_ctx,
336                 .consume_dentry_count = 0,
337                 .wildcard_path = TSTRDUP(wildcard_path),
338                 .cur_component_offset = 0,
339                 .cur_component_len = 0,
340                 .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
341         };
342
343         if (ctx.wildcard_path == NULL)
344                 return WIMLIB_ERR_NOMEM;
345
346         ret = expand_wildcard_recursive(root, &ctx);
347         FREE(ctx.wildcard_path);
348         if (ret == 0 && ctx.consume_dentry_count == 0)
349                 goto no_match;
350         return ret;
351
352 no_match:
353         ret = 0;
354         if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
355                 WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
356
357         if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
358                 ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
359                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
360         }
361         return ret;
362 }