2 * capture_common.c - Mostly code to handle excluding paths from capture.
6 * Copyright (C) 2013, 2014 Eric Biggers
8 * This file is part of wimlib, a library for working with WIM files.
10 * wimlib is free software; you can redistribute it and/or modify it under the
11 * terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option)
15 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17 * A PARTICULAR PURPOSE. See the GNU General Public License for more
20 * You should have received a copy of the GNU General Public License
21 * along with wimlib; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/capture.h"
29 #include "wimlib/dentry.h"
30 #include "wimlib/error.h"
31 #include "wimlib/lookup_table.h"
32 #include "wimlib/paths.h"
33 #include "wimlib/progress.h"
34 #include "wimlib/textfile.h"
35 #include "wimlib/wildcard.h"
40 * Tally a file (or directory) that has been scanned for a capture operation,
41 * and possibly call the progress function provided by the library user.
44 * Flags, optional progress function, and progress data for the capture
47 * Status of the scanned file.
49 * If @status is WIMLIB_SCAN_DENTRY_OK, this is a pointer to the WIM inode
50 * that has been created for the scanned file. The first time the file is
51 * seen, inode->i_nlink will be 1. On subsequent visits of the same inode
52 * via additional hard links, inode->i_nlink will be greater than 1.
55 do_capture_progress(struct add_image_params *params, int status,
56 const struct wim_inode *inode)
59 case WIMLIB_SCAN_DENTRY_OK:
60 if (!(params->add_flags & WIMLIB_ADD_FLAG_VERBOSE))
63 case WIMLIB_SCAN_DENTRY_UNSUPPORTED:
64 case WIMLIB_SCAN_DENTRY_EXCLUDED:
65 case WIMLIB_SCAN_DENTRY_FIXED_SYMLINK:
66 case WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK:
67 if (!(params->add_flags & WIMLIB_ADD_FLAG_EXCLUDE_VERBOSE))
71 params->progress.scan.status = status;
72 if (status == WIMLIB_SCAN_DENTRY_OK && inode->i_nlink == 1) {
74 /* Successful scan, and visiting inode for the first time */
76 /* Tally size of all data streams. */
77 const struct wim_lookup_table_entry *lte;
78 for (unsigned i = 0; i <= inode->i_num_ads; i++) {
79 lte = inode_stream_lte_resolved(inode, i);
81 params->progress.scan.num_bytes_scanned += lte->size;
84 /* Tally the file itself. */
85 if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
86 params->progress.scan.num_dirs_scanned++;
88 params->progress.scan.num_nondirs_scanned++;
91 /* Call the user-provided progress function. */
92 return call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_SCAN_DENTRY,
93 ¶ms->progress, params->progctx);
97 * Given a null-terminated pathname pattern @pat that has been read from line
98 * @line_no of the file @path, validate and canonicalize the pattern.
100 * On success, returns 0.
101 * On failure, returns WIMLIB_ERR_INVALID_CAPTURE_CONFIG.
102 * In either case, @pat may have been modified in-place (and possibly
106 mangle_pat(tchar *pat, const tchar *path, unsigned long line_no)
108 if (!is_any_path_separator(pat[0]) &&
109 pat[0] != T('\0') && pat[1] == T(':'))
111 /* Pattern begins with drive letter. */
113 if (!is_any_path_separator(pat[2])) {
114 /* Something like c:file, which is actually a path
115 * relative to the current working directory on the c:
116 * drive. We require paths with drive letters to be
118 ERROR("%"TS":%lu: Invalid pattern \"%"TS"\":\n"
119 " Patterns including drive letters must be absolute!\n"
120 " Maybe try \"%"TC":%"TC"%"TS"\"?\n",
122 pat[0], OS_PREFERRED_PATH_SEPARATOR, &pat[2]);
123 return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
126 WARNING("%"TS":%lu: Pattern \"%"TS"\" starts with a drive "
127 "letter, which is being removed.",
130 /* Strip the drive letter. */
131 tmemmove(pat, pat + 2, tstrlen(pat + 2) + 1);
134 /* Collapse consecutive path separators, and translate both / and \ into
135 * / (UNIX) or \ (Windows).
137 * Note: we expect that this function produces patterns that can be used
138 * for both filesystem paths and WIM paths, so the desired path
139 * separators must be the same. */
140 BUILD_BUG_ON(OS_PREFERRED_PATH_SEPARATOR != WIM_PATH_SEPARATOR);
141 do_canonicalize_path(pat, pat);
143 /* Relative patterns can only match file names, so they must be
144 * single-component only. */
145 if (pat[0] != OS_PREFERRED_PATH_SEPARATOR &&
146 tstrchr(pat, OS_PREFERRED_PATH_SEPARATOR))
148 ERROR("%"TS":%lu: Invalid pattern \"%"TS"\":\n"
149 " Relative patterns can only include one path component!\n"
150 " Maybe try \"%"TC"%"TS"\"?",
151 path, line_no, pat, OS_PREFERRED_PATH_SEPARATOR, pat);
152 return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
159 * Read, parse, and validate a capture configuration file from either an on-disk
160 * file or an in-memory buffer.
162 * To read from a file, specify @config_file, and use NULL for @buf.
163 * To read from a buffer, specify @buf and @bufsize.
165 * @config must be initialized to all 0's.
167 * On success, 0 will be returned, and the resulting capture configuration will
168 * be stored in @config.
170 * On failure, a positive error code will be returned, and the contents of
171 * @config will be invalidated.
174 read_capture_config(const tchar *config_file, const void *buf,
175 size_t bufsize, struct capture_config *config)
179 /* [PrepopulateList] is used for apply, not capture. But since we do
180 * understand it, recognize it, thereby avoiding the unrecognized
181 * section warning, but discard the resulting strings.
183 * We currently ignore [CompressionExclusionList] and
184 * [CompressionFolderList]. This is a known issue that doesn't seem to
185 * have any real consequences, so don't issue warnings about not
186 * recognizing those sections. */
187 STRING_SET(prepopulate_pats);
188 STRING_SET(compression_exclusion_pats);
189 STRING_SET(compression_folder_pats);
191 struct text_file_section sections[] = {
193 &config->exclusion_pats},
194 {T("ExclusionException"),
195 &config->exclusion_exception_pats},
196 {T("PrepopulateList"),
198 {T("CompressionExclusionList"),
199 &compression_exclusion_pats},
200 {T("CompressionFolderList"),
201 &compression_folder_pats},
205 ret = do_load_text_file(config_file, buf, bufsize, &mem,
206 sections, ARRAY_LEN(sections),
207 LOAD_TEXT_FILE_REMOVE_QUOTES, mangle_pat);
211 FREE(prepopulate_pats.strings);
212 FREE(compression_exclusion_pats.strings);
213 FREE(compression_folder_pats.strings);
220 destroy_capture_config(struct capture_config *config)
222 FREE(config->exclusion_pats.strings);
223 FREE(config->exclusion_exception_pats.strings);
228 * Determine whether a path matches any wildcard pattern in a list.
230 * Special rules apply about what form @path must be in; see match_path().
233 match_pattern_list(const tchar *path, size_t path_nchars,
234 const struct string_set *list)
236 for (size_t i = 0; i < list->num_strings; i++)
237 if (match_path(path, path_nchars, list->strings[i],
238 OS_PREFERRED_PATH_SEPARATOR, true))
244 * Determine whether the filesystem @path should be excluded from capture, based
245 * on the current capture configuration file.
247 * The @path must be given relative to the root of the capture, but with a
248 * leading path separator. For example, if the file "in/file" is being tested
249 * and the library user ran wimlib_add_image(wim, "in", ...), then the directory
250 * "in" is the root of the capture and the path should be specified as "/file".
252 * Also, all path separators in @path must be OS_PREFERRED_PATH_SEPARATOR, there
253 * cannot be trailing slashes, and there cannot be consecutive path separators.
255 * As a special case, the empty string will be interpreted as a single path
256 * separator (which means the root of capture itself).
259 should_exclude_path(const tchar *path, size_t path_nchars,
260 const struct capture_config *config)
268 dummy[0] = OS_PREFERRED_PATH_SEPARATOR;
274 return match_pattern_list(path, path_nchars, &config->exclusion_pats) &&
275 !match_pattern_list(path, path_nchars, &config->exclusion_exception_pats);
280 * Determine if a file should be excluded from capture.
282 * This function tests exclusions from both of the two possible sources of
285 * (1) The capture configuration file
286 * (2) The user-provided progress function
288 * The capture implementation must have set params->capture_root_nchars to an
289 * appropriate value. Example for UNIX: if the capture root directory is
290 * "foobar/subdir", then all paths will be provided starting with
291 * "foobar/subdir", so params->capture_root_nchars must be set to
292 * strlen("foobar/subdir") so that try_exclude() can use the appropriate suffix
293 * when it calls should_exclude_path().
298 * = 0 if not excluded and no error
299 * > 0 (wimlib error code) if error
302 try_exclude(const tchar *full_path, size_t full_path_nchars,
303 const struct add_image_params *params)
307 if (should_exclude_path(full_path + params->capture_root_nchars,
308 full_path_nchars - params->capture_root_nchars,
312 if (unlikely(params->add_flags & WIMLIB_ADD_FLAG_TEST_FILE_EXCLUSION)) {
313 union wimlib_progress_info info;
315 info.test_file_exclusion.path = full_path;
316 info.test_file_exclusion.will_exclude = false;
319 /* Hack for Windows... */
321 wchar_t *p_question_mark = NULL;
323 if (!wcsncmp(full_path, L"\\??\\", 4)) {
324 /* Trivial transformation: NT namespace => Win32 namespace */
325 p_question_mark = (wchar_t *)&full_path[1];
326 *p_question_mark = L'\\';
330 ret = call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_TEST_FILE_EXCLUSION,
331 &info, params->progctx);
335 *p_question_mark = L'?';
340 if (info.test_file_exclusion.will_exclude)