Add support for extract list files
[wimlib] / src / wildcard.c
1 /*
2  * wildcard.c
3  *
4  * Wildcard matching functions.
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include "wimlib/dentry.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/metadata.h"
34 #include "wimlib/wildcard.h"
35
36 struct match_dentry_ctx {
37         int (*consume_path)(const tchar *, void *, bool);
38         void *consume_path_ctx;
39         size_t consume_path_count;
40         tchar *expanded_path;
41         size_t expanded_path_len;
42         size_t expanded_path_alloc_len;
43         tchar *wildcard_path;
44         size_t cur_component_offset;
45         size_t cur_component_len;
46         bool case_insensitive;
47 };
48
49 static bool
50 match_wildcard(const tchar *string, tchar *wildcard,
51                size_t wildcard_len, bool case_insensitive)
52 {
53         char orig;
54         int flags;
55         int ret;
56
57         orig = wildcard[wildcard_len];
58         wildcard[wildcard_len] = T('\0');
59
60         /* Warning: in Windows builds fnmatch() calls a replacement function.
61          * Also, FNM_CASEFOLD is a GNU extension and it is defined to 0 if not
62          * available.  */
63         flags = FNM_NOESCAPE;
64         if (case_insensitive)
65                 flags |= FNM_CASEFOLD;
66         ret = fnmatch(wildcard, string, flags);
67
68         wildcard[wildcard_len] = orig;
69         return (ret == 0);
70 }
71
72 static int
73 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
74                           struct match_dentry_ctx *ctx);
75
76 enum {
77         WILDCARD_STATUS_DONE_FULLY,
78         WILDCARD_STATUS_DONE_TRAILING_SLASHES,
79         WILDCARD_STATUS_NOT_DONE,
80 };
81
82 static int
83 wildcard_status(const tchar *wildcard)
84 {
85         if (*wildcard == T('\0'))
86                 return WILDCARD_STATUS_DONE_FULLY;
87         while (is_any_path_separator(*wildcard))
88                 wildcard++;
89         if (*wildcard == T('\0'))
90                 return WILDCARD_STATUS_DONE_TRAILING_SLASHES;
91
92         return WILDCARD_STATUS_NOT_DONE;
93 }
94
95 static int
96 match_dentry(struct wim_dentry *cur_dentry, void *_ctx)
97 {
98         struct match_dentry_ctx *ctx = _ctx;
99         tchar *name;
100         size_t name_len;
101         int ret;
102
103         if (cur_dentry->file_name_nbytes == 0)
104                 return 0;
105
106 #if TCHAR_IS_UTF16LE
107         name = cur_dentry->file_name;
108         name_len = cur_dentry->file_name_nbytes;
109 #else
110         ret = utf16le_to_tstr(cur_dentry->file_name,
111                               cur_dentry->file_name_nbytes,
112                               &name, &name_len);
113         if (ret)
114                 return ret;
115 #endif
116         name_len /= sizeof(tchar);
117
118         if (match_wildcard(name,
119                            &ctx->wildcard_path[ctx->cur_component_offset],
120                            ctx->cur_component_len,
121                            ctx->case_insensitive))
122         {
123                 size_t len_needed = ctx->expanded_path_len + 1 + name_len + 1;
124                 size_t expanded_path_len_save;
125
126                 if (len_needed > ctx->expanded_path_alloc_len) {
127                         tchar *expanded_path;
128
129                         expanded_path = REALLOC(ctx->expanded_path,
130                                                 len_needed * sizeof(ctx->expanded_path[0]));
131                         if (expanded_path == NULL) {
132                                 ret = WIMLIB_ERR_NOMEM;
133                                 goto out_free_name;
134                         }
135                         ctx->expanded_path = expanded_path;
136                         ctx->expanded_path_alloc_len = len_needed;
137                 }
138                 expanded_path_len_save = ctx->expanded_path_len;
139
140                 ctx->expanded_path[ctx->expanded_path_len++] = WIM_PATH_SEPARATOR;
141                 tmemcpy(&ctx->expanded_path[ctx->expanded_path_len],
142                         name, name_len);
143                 ctx->expanded_path_len += name_len;
144                 ctx->expanded_path[ctx->expanded_path_len] = T('\0');
145
146                 switch (wildcard_status(&ctx->wildcard_path[
147                                 ctx->cur_component_offset +
148                                 ctx->cur_component_len]))
149                 {
150                 case WILDCARD_STATUS_DONE_TRAILING_SLASHES:
151                         if (!dentry_is_directory(cur_dentry)) {
152                                 ret = 0;
153                                 break;
154                         }
155                         /* Fall through  */
156                 case WILDCARD_STATUS_DONE_FULLY:
157                         ret = (*ctx->consume_path)(ctx->expanded_path,
158                                                    ctx->consume_path_ctx,
159                                                    false);
160                         ctx->consume_path_count++;
161                         break;
162                 case WILDCARD_STATUS_NOT_DONE:
163                         ret = expand_wildcard_recursive(cur_dentry, ctx);
164                         break;
165                 }
166                 ctx->expanded_path_len = expanded_path_len_save;
167                 ctx->expanded_path[expanded_path_len_save] = T('\0');
168         } else {
169                 ret = 0;
170         }
171
172 out_free_name:
173 #if !TCHAR_IS_UTF16LE
174         FREE(name);
175 #endif
176         return ret;
177 }
178
179 static int
180 expand_wildcard_recursive(struct wim_dentry *cur_dentry,
181                           struct match_dentry_ctx *ctx)
182 {
183         tchar *w;
184         size_t begin;
185         size_t end;
186         size_t len;
187         size_t offset_save;
188         size_t len_save;
189         int ret;
190
191         w = ctx->wildcard_path;
192
193         begin = ctx->cur_component_offset + ctx->cur_component_len;
194         while (is_any_path_separator(w[begin]))
195                 begin++;
196
197         end = begin;
198
199         while (w[end] != T('\0') && !is_any_path_separator(w[end]))
200                 end++;
201
202         len = end - begin;
203
204         if (len == 0)
205                 return 0;
206
207         offset_save = ctx->cur_component_offset;
208         len_save = ctx->cur_component_len;
209
210         ctx->cur_component_offset = begin;
211         ctx->cur_component_len = len;
212
213         ret = for_dentry_child(cur_dentry, match_dentry, ctx);
214
215         ctx->cur_component_len = len_save;
216         ctx->cur_component_offset = offset_save;
217
218         return ret;
219 }
220
221 static int
222 expand_wildcard(WIMStruct *wim,
223                 const tchar *wildcard_path,
224                 int (*consume_path)(const tchar *, void *, bool),
225                 void *consume_path_ctx,
226                 u32 flags)
227 {
228         struct wim_dentry *root;
229         int ret;
230
231         root = wim_root_dentry(wim);
232         if (root == NULL)
233                 goto no_match;
234
235         struct match_dentry_ctx ctx = {
236                 .consume_path = consume_path,
237                 .consume_path_ctx = consume_path_ctx,
238                 .consume_path_count = 0,
239                 .expanded_path = MALLOC(256 * sizeof(ctx.expanded_path[0])),
240                 .expanded_path_len = 0,
241                 .expanded_path_alloc_len = 256,
242                 .wildcard_path = TSTRDUP(wildcard_path),
243                 .cur_component_offset = 0,
244                 .cur_component_len = 0,
245                 .case_insensitive = ((flags & WILDCARD_FLAG_CASE_INSENSITIVE) != 0),
246         };
247
248         if (ctx.expanded_path == NULL || ctx.wildcard_path == NULL) {
249                 FREE(ctx.expanded_path);
250                 FREE(ctx.wildcard_path);
251                 return WIMLIB_ERR_NOMEM;
252         }
253
254         ret = expand_wildcard_recursive(root, &ctx);
255         FREE(ctx.expanded_path);
256         FREE(ctx.wildcard_path);
257         if (ret == 0 && ctx.consume_path_count == 0)
258                 goto no_match;
259         return ret;
260
261 no_match:
262         ret = 0;
263         if (flags & WILDCARD_FLAG_USE_LITERAL_IF_NO_MATCHES)
264                 ret = (*consume_path)(wildcard_path, consume_path_ctx, true);
265
266         if (flags & WILDCARD_FLAG_WARN_IF_NO_MATCH)
267                 WARNING("No matches for wildcard path \"%"TS"\"", wildcard_path);
268
269         if (flags & WILDCARD_FLAG_ERROR_IF_NO_MATCH) {
270                 ERROR("No matches for wildcard path \"%"TS"\"", wildcard_path);
271                 ret = WIMLIB_ERR_PATH_DOES_NOT_EXIST;
272         }
273         return ret;
274 }
275
276 struct expanded_paths_ctx {
277         tchar **expanded_paths;
278         size_t num_expanded_paths;
279         size_t alloc_length;
280 };
281
282 static int
283 append_path_cb(const tchar *path, void *_ctx, bool may_need_trans)
284 {
285         struct expanded_paths_ctx *ctx = _ctx;
286         tchar *path_dup;
287
288         if (ctx->num_expanded_paths == ctx->alloc_length) {
289                 tchar **new_paths;
290                 size_t new_alloc_length = max(ctx->alloc_length + 8,
291                                               ctx->alloc_length * 3 / 2);
292
293                 new_paths = REALLOC(ctx->expanded_paths,
294                                     new_alloc_length * sizeof(new_paths[0]));
295                 if (new_paths == NULL)
296                         return WIMLIB_ERR_NOMEM;
297                 ctx->expanded_paths = new_paths;
298                 ctx->alloc_length = new_alloc_length;
299         }
300         path_dup = TSTRDUP(path);
301         if (path_dup == NULL)
302                 return WIMLIB_ERR_NOMEM;
303         if (may_need_trans) {
304                 for (tchar *p = path_dup; *p; p++)
305                         if (is_any_path_separator(*p))
306                                 *p = WIM_PATH_SEPARATOR;
307         }
308         ctx->expanded_paths[ctx->num_expanded_paths++] = path_dup;
309         return 0;
310 }
311
312 int
313 expand_wildcard_wim_paths(WIMStruct *wim,
314                           const char * const *wildcards,
315                           size_t num_wildcards,
316                           tchar ***expanded_paths_ret,
317                           size_t *num_expanded_paths_ret,
318                           u32 flags)
319 {
320         int ret;
321         struct expanded_paths_ctx ctx = {
322                 .expanded_paths = NULL,
323                 .num_expanded_paths = 0,
324                 .alloc_length = 0,
325         };
326         for (size_t i = 0; i < num_wildcards; i++) {
327                 ret = expand_wildcard(wim, wildcards[i], append_path_cb, &ctx,
328                                       flags);
329                 if (ret)
330                         goto out_free;
331         }
332         *expanded_paths_ret = ctx.expanded_paths;
333         *num_expanded_paths_ret = ctx.num_expanded_paths;
334         return 0;
335
336 out_free:
337         for (size_t i = 0; i < ctx.num_expanded_paths; i++)
338                 FREE(ctx.expanded_paths[i]);
339         FREE(ctx.expanded_paths);
340         return ret;
341 }