2 * capture_common.c - Mostly code to handle excluding paths from capture.
6 * Copyright (C) 2013, 2014 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/blob_table.h"
29 #include "wimlib/capture.h"
30 #include "wimlib/dentry.h"
31 #include "wimlib/error.h"
32 #include "wimlib/paths.h"
33 #include "wimlib/pattern.h"
34 #include "wimlib/progress.h"
35 #include "wimlib/textfile.h"
38 * Tally a file (or directory) that has been scanned for a capture operation,
39 * and possibly call the progress function provided by the library user.
42 * Flags, optional progress function, and progress data for the capture
45 * Status of the scanned file.
47 * If @status is WIMLIB_SCAN_DENTRY_OK, this is a pointer to the WIM inode
48 * that has been created for the scanned file. The first time the file is
49 * seen, inode->i_nlink will be 1. On subsequent visits of the same inode
50 * via additional hard links, inode->i_nlink will be greater than 1.
53 do_capture_progress(struct capture_params *params, int status,
54 const struct wim_inode *inode)
57 case WIMLIB_SCAN_DENTRY_OK:
58 if (!(params->add_flags & WIMLIB_ADD_FLAG_VERBOSE))
61 case WIMLIB_SCAN_DENTRY_UNSUPPORTED:
62 case WIMLIB_SCAN_DENTRY_EXCLUDED:
63 case WIMLIB_SCAN_DENTRY_FIXED_SYMLINK:
64 case WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK:
65 if (!(params->add_flags & WIMLIB_ADD_FLAG_EXCLUDE_VERBOSE))
69 params->progress.scan.status = status;
70 if (status == WIMLIB_SCAN_DENTRY_OK && inode->i_nlink == 1) {
72 /* Successful scan, and visiting inode for the first time */
74 /* Tally size of all streams. */
75 for (unsigned i = 0; i < inode->i_num_streams; i++) {
76 const struct blob_descriptor *blob =
77 stream_blob_resolved(&inode->i_streams[i]);
79 params->progress.scan.num_bytes_scanned += blob->size;
82 /* Tally the file itself. */
83 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
84 params->progress.scan.num_dirs_scanned++;
86 params->progress.scan.num_nondirs_scanned++;
89 /* Call the user-provided progress function. */
90 return call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_SCAN_DENTRY,
91 ¶ms->progress, params->progctx);
95 * Given a null-terminated pathname pattern @pat that has been read from line
96 * @line_no of the file @path, validate and canonicalize the pattern.
98 * On success, returns 0.
99 * On failure, returns WIMLIB_ERR_INVALID_CAPTURE_CONFIG.
100 * In either case, @pat may have been modified in-place (and possibly
104 mangle_pat(tchar *pat, const tchar *path, unsigned long line_no)
106 if (!is_any_path_separator(pat[0]) &&
107 pat[0] != T('\0') && pat[1] == T(':'))
109 /* Pattern begins with drive letter. */
111 if (!is_any_path_separator(pat[2])) {
112 /* Something like c:file, which is actually a path
113 * relative to the current working directory on the c:
114 * drive. We require paths with drive letters to be
116 ERROR("%"TS":%lu: Invalid pattern \"%"TS"\":\n"
117 " Patterns including drive letters must be absolute!\n"
118 " Maybe try \"%"TC":%"TC"%"TS"\"?\n",
120 pat[0], OS_PREFERRED_PATH_SEPARATOR, &pat[2]);
121 return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
124 WARNING("%"TS":%lu: Pattern \"%"TS"\" starts with a drive "
125 "letter, which is being removed.",
128 /* Strip the drive letter. */
129 tmemmove(pat, pat + 2, tstrlen(pat + 2) + 1);
132 /* Collapse consecutive path separators, and translate both / and \ into
133 * / (UNIX) or \ (Windows).
135 * Note: we expect that this function produces patterns that can be used
136 * for both filesystem paths and WIM paths, so the desired path
137 * separators must be the same. */
138 BUILD_BUG_ON(OS_PREFERRED_PATH_SEPARATOR != WIM_PATH_SEPARATOR);
139 do_canonicalize_path(pat, pat);
141 /* Relative patterns can only match file names, so they must be
142 * single-component only. */
143 if (pat[0] != OS_PREFERRED_PATH_SEPARATOR &&
144 tstrchr(pat, OS_PREFERRED_PATH_SEPARATOR))
146 ERROR("%"TS":%lu: Invalid pattern \"%"TS"\":\n"
147 " Relative patterns can only include one path component!\n"
148 " Maybe try \"%"TC"%"TS"\"?",
149 path, line_no, pat, OS_PREFERRED_PATH_SEPARATOR, pat);
150 return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
157 * Read, parse, and validate a capture configuration file from either an on-disk
158 * file or an in-memory buffer.
160 * To read from a file, specify @config_file, and use NULL for @buf.
161 * To read from a buffer, specify @buf and @bufsize.
163 * @config must be initialized to all 0's.
165 * On success, 0 will be returned, and the resulting capture configuration will
166 * be stored in @config.
168 * On failure, a positive error code will be returned, and the contents of
169 * @config will be invalidated.
172 read_capture_config(const tchar *config_file, const void *buf,
173 size_t bufsize, struct capture_config *config)
177 /* [PrepopulateList] is used for apply, not capture. But since we do
178 * understand it, recognize it, thereby avoiding the unrecognized
179 * section warning, but discard the resulting strings.
181 * We currently ignore [CompressionExclusionList] and
182 * [CompressionFolderList]. This is a known issue that doesn't seem to
183 * have any real consequences, so don't issue warnings about not
184 * recognizing those sections. */
185 STRING_SET(prepopulate_pats);
186 STRING_SET(compression_exclusion_pats);
187 STRING_SET(compression_folder_pats);
189 struct text_file_section sections[] = {
191 &config->exclusion_pats},
192 {T("ExclusionException"),
193 &config->exclusion_exception_pats},
194 {T("PrepopulateList"),
196 {T("CompressionExclusionList"),
197 &compression_exclusion_pats},
198 {T("CompressionFolderList"),
199 &compression_folder_pats},
203 ret = do_load_text_file(config_file, buf, bufsize, &mem,
204 sections, ARRAY_LEN(sections),
205 LOAD_TEXT_FILE_REMOVE_QUOTES, mangle_pat);
207 ERROR("Failed to load capture configuration file \"%"TS"\"",
210 case WIMLIB_ERR_INVALID_UTF8_STRING:
211 case WIMLIB_ERR_INVALID_UTF16_STRING:
212 ERROR("Note: the capture configuration file must be "
213 "valid UTF-8 or UTF-16LE");
214 ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
216 case WIMLIB_ERR_OPEN:
217 case WIMLIB_ERR_STAT:
218 case WIMLIB_ERR_NOMEM:
219 case WIMLIB_ERR_READ:
220 ret = WIMLIB_ERR_UNABLE_TO_READ_CAPTURE_CONFIG;
226 FREE(prepopulate_pats.strings);
227 FREE(compression_exclusion_pats.strings);
228 FREE(compression_folder_pats.strings);
235 destroy_capture_config(struct capture_config *config)
237 FREE(config->exclusion_pats.strings);
238 FREE(config->exclusion_exception_pats.strings);
243 * Determine whether @path, or any ancestor directory of @path, matches any of
244 * the patterns in @list. Path separators in @path must be WIM_PATH_SEPARATOR.
247 match_pattern_list(const tchar *path, const struct string_set *list)
249 for (size_t i = 0; i < list->num_strings; i++)
250 if (match_path(path, list->strings[i], true))
256 * Determine if a file should be excluded from capture.
258 * This function tests exclusions from both possible sources of exclusions:
260 * (1) The capture configuration file
261 * (2) The user-provided progress function
263 * The capture implementation must have set params->capture_root_nchars to an
264 * appropriate value. Example for UNIX: if the capture root directory is
265 * "foobar/subdir", then all paths will be provided starting with
266 * "foobar/subdir", so params->capture_root_nchars must be set to
267 * strlen("foobar/subdir") so that the appropriate path can be matched against
268 * the patterns in the exclusion list.
272 * = 0 if not excluded and no error
273 * > 0 (wimlib error code) if error
276 try_exclude(const tchar *full_path, const struct capture_params *params)
280 if (params->config) {
281 const tchar *path = full_path + params->capture_root_nchars;
282 if (match_pattern_list(path, ¶ms->config->exclusion_pats) &&
283 !match_pattern_list(path, ¶ms->config->exclusion_exception_pats))
287 if (unlikely(params->add_flags & WIMLIB_ADD_FLAG_TEST_FILE_EXCLUSION)) {
288 union wimlib_progress_info info;
290 info.test_file_exclusion.path = full_path;
291 info.test_file_exclusion.will_exclude = false;
294 /* Hack for Windows... */
296 wchar_t *p_question_mark = NULL;
298 if (!wcsncmp(full_path, L"\\??\\", 4)) {
299 /* Trivial transformation: NT namespace => Win32 namespace */
300 p_question_mark = (wchar_t *)&full_path[1];
301 *p_question_mark = L'\\';
305 ret = call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_TEST_FILE_EXCLUSION,
306 &info, params->progctx);
310 *p_question_mark = L'?';
315 if (info.test_file_exclusion.will_exclude)