]> wimlib.net Git - wimlib/blob - src/wildcard.c
Use --enable-ssse3-sha1 for x86_64 Windows builds
[wimlib] / src / wildcard.c
1 /*
2  * wildcard.c
3  *
4  * Wildcard matching functions.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option) any
13  * later version.
14  *
15  * This file is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this file; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #include <ctype.h>
29 #include "wimlib/dentry.h"
30 #include "wimlib/encoding.h"
31 #include "wimlib/error.h"
32 #include "wimlib/metadata.h"
33 #include "wimlib/paths.h"
34 #include "wimlib/wildcard.h"
35
36 struct match_dentry_ctx {
37         int (*consume_dentry)(struct wim_dentry *, void *);
38         void *consume_dentry_ctx;
39         size_t consume_dentry_count;
40         tchar *wildcard_path;
41         size_t cur_component_offset;
42         size_t cur_component_len;
43         bool case_insensitive;
44 };
45
46 static bool
47 do_match_wildcard(const tchar *string, size_t string_len,
48                   const tchar *wildcard, size_t wildcard_len,
49                   bool ignore_case)
50 {
51         for (;;) {
52                 if (string_len == 0) {
53                         while (wildcard_len != 0 && *wildcard == T('*')) {
54                                 wildcard++;
55                                 wildcard_len--;
56                         }
57                         return (wildcard_len == 0);
58                 } else if (wildcard_len == 0) {
59                         return false;
60                 } else if (*string == *wildcard || *wildcard == T('?') ||
61                            (ignore_case && totlower(*string) == totlower(*wildcard)))
62                 {
63                         string++;
64                         string_len--;
65                         wildcard_len--;
66                         wildcard++;
67                         continue;
68                 } else if (*wildcard == T('*')) {
69                         return do_match_wildcard(string, string_len,
70                                                  wildcard + 1, wildcard_len - 1,
71                                                  ignore_case) ||
72                                do_match_wildcard(string + 1, string_len - 1,
73                                                  wildcard, wildcard_len,
74                                                  ignore_case);
75                 } else {
76                         return false;
77                 }
78         }
79 }
80
81 static bool
82 match_wildcard(const tchar *string, const tchar *wildcard,
83                size_t wildcard_len, bool ignore_case)
84 {
85         return do_match_wildcard(string, tstrlen(string),
86                                  wildcard, wildcard_len, ignore_case);
87 }
88
89 /*
90  * Determines whether a path matches a wildcard pattern.
91  *
92  * @path
93  *      The path to match.  Assumptions:  All path separators must be @path_sep,
94  *      there cannot be consecutive path separators, there cannot be a trailing
95  *      path separator, and there must be exactly one leading path separator.
96  *
97  * @path_nchars
98  *      Number of characters in @path.
99  *
100  * @wildcard
101  *      The wildcard pattern to match.  It can contain the wildcard characters
102  *      '*' and '?'.  The former matches zero or more characters except
103  *      @path_sep, and the latter matches any character except @path_sep.  All
104  *      path separators in the pattern must be @path_sep, and there cannot be
105  *      consecutive path separators, and there cannot be a trailing path
106  *      separator.  If there is a leading path separator, the match is attempted
107  *      with the filename only; otherwise, the match is attempted with the whole
108  *      path.
109  *
110  * @path_sep
111  *      Path separator character used in @path and @wildcard.
112  *
113  * @prefix_ok
114  *      If %true, allow a prefix of @path, terminated by a path separator, to
115  *      match the pattern, in addition to @path itself.  In other words, return
116  *      %true if the pattern actually matches one of the ancestor directories of
117  *      @path.
118  *
119  * Returns %true if there was a match; %false if there was not.
120  */
121 bool
122 match_path(const tchar *path, size_t path_nchars,
123            const tchar *wildcard, tchar path_sep, bool prefix_ok)
124 {
125         if (*wildcard != path_sep) {
126                 /* Pattern doesn't begin with path separator.  Try to match the
127                  * file name only.  */
128                 return match_wildcard(path_basename_with_len(path, path_nchars),
129                                       wildcard, tstrlen(wildcard),
130                                       default_ignore_case);
131         } else {
132                 /* Pattern begins with path separator.  Try to match the whole
133                  * path.  */
134                 do {
135                         if (!*wildcard) {
136                                 /* Path has more components than pattern  */
137                                 return prefix_ok;
138                         }
139
140                         size_t path_component_len = 0;
141                         size_t wildcard_component_len = 0;
142
143                         do {
144                                 path_component_len++;
145                         } while (path[path_component_len] != path_sep &&
146                                  path[path_component_len] != T('\0'));
147                         do {
148                                 wildcard_component_len++;
149                         } while (wildcard[wildcard_component_len] != path_sep &&
150                                  wildcard[wildcard_component_len] != T('\0'));
151                         if (!do_match_wildcard(path, path_component_len,
152                                                wildcard, wildcard_component_len,
153                                                default_ignore_case))
154                                 return false;
155                         path += path_component_len;
156                         wildcard += wildcard_component_len;
157                 } while (*path);
158
159                 return (*wildcard == '\0');
160         }
161 }
162
163 static int
164 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
165                           struct match_dentry_ctx *ctx);
166
167 enum {
168         WILDCARD_STATUS_DONE_FULLY,
169         WILDCARD_STATUS_DONE_TRAILING_SLASHES,
170         WILDCARD_STATUS_NOT_DONE,
171 };
172
173 static int
174 wildcard_status(const tchar *wildcard)
175 {
176         if (*wildcard == T('\0'))
177                 return WILDCARD_STATUS_DONE_FULLY;
178         while (*wildcard == WIM_PATH_SEPARATOR)
179                 wildcard++;
180         if (*wildcard == T('\0'))
181                 return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
182
183         return WILDCARD_STATUS_NOT_DONE;
184 }
185
186 static int
187 match_dentry(struct wim_dentry *cur_dentry, struct match_dentry_ctx *ctx)
188 {
189         const tchar *name;
190         size_t name_nchars;
191         int ret;
192
193         if (cur_dentry->file_name_nbytes == 0)
194                 return 0;
195
196         ret = utf16le_get_tstr(cur_dentry->file_name,
197                                cur_dentry->file_name_nbytes,
198                                &name, &name_nchars);
199         if (ret)
200                 return ret;
201         name_nchars /= sizeof(tchar);
202
203         if (do_match_wildcard(name,
204                               name_nchars,
205                               &ctx->wildcard_path[ctx->cur_component_offset],
206                               ctx->cur_component_len,
207                               ctx->case_insensitive))
208         {
209                 switch (wildcard_status(&ctx->wildcard_path[
210                                 ctx->cur_component_offset +
211                                 ctx->cur_component_len]))
212                 {
213                 case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
214                         if (!dentry_is_directory(cur_dentry)) {
215                                 ret = 0;
216                                 break;
217                         }
218                         /* Fall through  */
219                 case WILDCARD_STATUS_DONE_FULLY:
220                         ret = (*ctx->consume_dentry)(cur_dentry,
221                                                      ctx->consume_dentry_ctx);
222                         ctx->consume_dentry_count++;
223                         break;
224                 case WILDCARD_STATUS_NOT_DONE:
225                         ret = expand_wildcard_recursive(cur_dentry, ctx);
226                         break;
227                 }
228         } else {
229                 ret = 0;
230         }
231
232         utf16le_put_tstr(name);
233
234         return ret;
235 }
236
237 static int
238 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
239                           struct match_dentry_ctx *ctx)
240 {
241         tchar *w;
242         size_t begin;
243         size_t end;
244         size_t len;
245         size_t offset_save;
246         size_t len_save;
247         int ret;
248         struct wim_dentry *child;
249
250         w = ctx->wildcard_path;
251
252         begin = ctx->cur_component_offset + ctx->cur_component_len;
253         while (w[begin] == WIM_PATH_SEPARATOR)
254                 begin++;
255
256         end = begin;
257
258         while (w[end] != T('\0') && w[end] != WIM_PATH_SEPARATOR)
259                 end++;
260
261         len = end - begin;
262
263         if (len == 0)
264                 return 0;
265
266         offset_save = ctx->cur_component_offset;
267         len_save = ctx->cur_component_len;
268
269         ctx->cur_component_offset = begin;
270         ctx->cur_component_len = len;
271
272         ret = 0;
273         for_dentry_child(child, cur_dentry) {
274                 ret = match_dentry(child, ctx);
275                 if (ret)
276                         break;
277         }
278
279         ctx->cur_component_len = len_save;
280         ctx->cur_component_offset = offset_save;
281
282         return ret;
283 }
284
285 /* Expand a wildcard relative to the current WIM image.
286  *
287  * @wim
288  *      WIMStruct whose currently selected image is searched to expand the
289  *      wildcard.
290  * @wildcard_path
291  *      Wildcard path to expand, which may contain the '?' and '*' characters.
292  *      Path separators must be WIM_PATH_SEPARATOR.  Leading path separators are
293  *      ignored, whereas one or more trailing path separators indicate that the
294  *      wildcard path can only match directories (and not reparse points).
295  * @consume_dentry
296  *      Callback function which will receive each directory entry matched by the
297  *      wildcard.
298  * @consume_dentry_ctx
299  *      Argument to pass to @consume_dentry.
300  * @flags
301  *      Zero or more of the following flags:
302  *
303  *      WILDCARD_FLAG_WARN_IF_NO_MATCH:
304  *              Issue a warning if the wildcard does not match any dentries.
305  *
306  *      WILDCARD_FLAG_ERROR_IF_NO_MATCH:
307  *              Issue an error and return WIMLIB_ERR_PATH_DOES_NOT_EXIST if the
308  *              wildcard does not match any dentries.
309  *
310  *      WILDCARD_FLAG_CASE_INSENSITIVE:
311  *              Perform the matching case insensitively.  Note that this may
312  *              cause @wildcard to match multiple dentries, even if it does not
313  *              contain wildcard characters.
314  *
315  * @return 0 on success; a positive error code on error; or the first nonzero
316  * value returned by @consume_dentry.
317  */
318 int
319 expand_wildcard(WIMStruct *wim,
320                 const tchar *wildcard_path,
321                 int (*consume_dentry)(struct wim_dentry *, void *),
322                 void *consume_dentry_ctx,
323                 u32 flags)
324 {
325         struct wim_dentry *root;
326         int ret;
327
328         root = wim_get_current_root_dentry(wim);
329         if (root == NULL)
330                 goto no_match;
331
332         struct match_dentry_ctx ctx = {
333                 .consume_dentry = consume_dentry,
334                 .consume_dentry_ctx = consume_dentry_ctx,
335                 .consume_dentry_count = 0,
336                 .wildcard_path = TSTRDUP(wildcard_path),
337                 .cur_component_offset = 0,
338                 .cur_component_len = 0,
339                 .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
340         };
341
342         if (ctx.wildcard_path == NULL)
343                 return WIMLIB_ERR_NOMEM;
344
345         ret = expand_wildcard_recursive(root, &ctx);
346         FREE(ctx.wildcard_path);
347         if (ret == 0 && ctx.consume_dentry_count == 0)
348                 goto no_match;
349         return ret;
350
351 no_match:
352         ret = 0;
353         if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
354                 WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
355
356         if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
357                 ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
358                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
359         }
360         return ret;
361 }