/*
- * Copyright (C) 2013 Eric Biggers
- *
- * This file is part of wimlib, a library for working with WIM files.
+ * capture_common.c - Mostly code to handle excluding paths from capture.
+ */
+
+/*
+ * Copyright (C) 2013, 2014 Eric Biggers
*
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
*
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
* details.
*
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
*/
-#include "wimlib_internal.h"
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
#include <string.h>
-#ifdef __WIN32__
-# include "win32.h"
-#else
-# include <fnmatch.h>
-#endif
+#include "wimlib/capture.h"
+#include "wimlib/dentry.h"
+#include "wimlib/error.h"
+#include "wimlib/lookup_table.h"
+#include "wimlib/paths.h"
+#include "wimlib/progress.h"
+#include "wimlib/textfile.h"
+#include "wimlib/wildcard.h"
-static int
-canonicalize_pattern(const tchar *pat, tchar **canonical_pat_ret)
+/*
+ * Tally a file (or directory) that has been scanned for a capture operation,
+ * and possibly call the progress function provided by the library user.
+ *
+ * @params
+ * Flags, optional progress function, and progress data for the capture
+ * operation.
+ * @status
+ * Status of the scanned file.
+ * @inode
+ * If @status is WIMLIB_SCAN_DENTRY_OK, this is a pointer to the WIM inode
+ * that has been created for the scanned file. The first time the file is
+ * seen, inode->i_nlink will be 1. On subsequent visits of the same inode
+ * via additional hard links, inode->i_nlink will be greater than 1.
+ */
+int
+do_capture_progress(struct capture_params *params, int status,
+ const struct wim_inode *inode)
{
- tchar *canonical_pat;
+ switch (status) {
+ case WIMLIB_SCAN_DENTRY_OK:
+ if (!(params->add_flags & WIMLIB_ADD_FLAG_VERBOSE))
+ return 0;
+ break;
+ case WIMLIB_SCAN_DENTRY_UNSUPPORTED:
+ case WIMLIB_SCAN_DENTRY_EXCLUDED:
+ case WIMLIB_SCAN_DENTRY_FIXED_SYMLINK:
+ case WIMLIB_SCAN_DENTRY_NOT_FIXED_SYMLINK:
+ if (!(params->add_flags & WIMLIB_ADD_FLAG_EXCLUDE_VERBOSE))
+ return 0;
+ break;
+ }
+ params->progress.scan.status = status;
+ if (status == WIMLIB_SCAN_DENTRY_OK && inode->i_nlink == 1) {
- if (pat[0] != T('/') && pat[0] != T('\\') &&
+ /* Successful scan, and visiting inode for the first time */
+
+ /* Tally size of all data streams. */
+ const struct wim_lookup_table_entry *lte;
+ for (unsigned i = 0; i <= inode->i_num_ads; i++) {
+ lte = inode_stream_lte_resolved(inode, i);
+ if (lte)
+ params->progress.scan.num_bytes_scanned += lte->size;
+ }
+
+ /* Tally the file itself. */
+ if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
+ params->progress.scan.num_dirs_scanned++;
+ else
+ params->progress.scan.num_nondirs_scanned++;
+ }
+
+ /* Call the user-provided progress function. */
+ return call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_SCAN_DENTRY,
+ ¶ms->progress, params->progctx);
+}
+
+/*
+ * Given a null-terminated pathname pattern @pat that has been read from line
+ * @line_no of the file @path, validate and canonicalize the pattern.
+ *
+ * On success, returns 0.
+ * On failure, returns WIMLIB_ERR_INVALID_CAPTURE_CONFIG.
+ * In either case, @pat may have been modified in-place (and possibly
+ * shortened).
+ */
+int
+mangle_pat(tchar *pat, const tchar *path, unsigned long line_no)
+{
+ if (!is_any_path_separator(pat[0]) &&
pat[0] != T('\0') && pat[1] == T(':'))
{
- /* Pattern begins with drive letter */
- if (pat[2] != T('/') && pat[2] != T('\\')) {
+ /* Pattern begins with drive letter. */
+
+ if (!is_any_path_separator(pat[2])) {
/* Something like c:file, which is actually a path
* relative to the current working directory on the c:
* drive. We require paths with drive letters to be
- * absolute. */
- ERROR("Invalid path \"%"TS"\"; paths including drive letters "
- "must be absolute!", pat);
- ERROR("Maybe try \"%"TC":/%"TS"\"?",
- pat[0], pat + 2);
+ * absolute. */
+ ERROR("%"TS":%lu: Invalid pattern \"%"TS"\":\n"
+ " Patterns including drive letters must be absolute!\n"
+ " Maybe try \"%"TC":%"TC"%"TS"\"?\n",
+ path, line_no, pat,
+ pat[0], OS_PREFERRED_PATH_SEPARATOR, &pat[2]);
return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
}
- WARNING("Pattern \"%"TS"\" starts with a drive letter, which is "
- "being removed.", pat);
- /* Strip the drive letter */
- pat += 2;
+ WARNING("%"TS":%lu: Pattern \"%"TS"\" starts with a drive "
+ "letter, which is being removed.",
+ path, line_no, pat);
+
+ /* Strip the drive letter. */
+ tmemmove(pat, pat + 2, tstrlen(pat + 2) + 1);
}
- canonical_pat = canonicalize_fs_path(pat);
- if (!canonical_pat)
- return WIMLIB_ERR_NOMEM;
- *canonical_pat_ret = canonical_pat;
- return 0;
-}
-static int
-copy_and_canonicalize_pattern_list(const struct wimlib_pattern_list *list,
- struct wimlib_pattern_list *copy)
-{
- int ret = 0;
-
- copy->pats = CALLOC(list->num_pats, sizeof(list->pats[0]));
- if (!copy->pats)
- return WIMLIB_ERR_NOMEM;
- copy->num_pats = list->num_pats;
- for (size_t i = 0; i < list->num_pats; i++) {
- ret = canonicalize_pattern(list->pats[i], ©->pats[i]);
- if (ret)
- break;
+ /* Collapse consecutive path separators, and translate both / and \ into
+ * / (UNIX) or \ (Windows).
+ *
+ * Note: we expect that this function produces patterns that can be used
+ * for both filesystem paths and WIM paths, so the desired path
+ * separators must be the same. */
+ BUILD_BUG_ON(OS_PREFERRED_PATH_SEPARATOR != WIM_PATH_SEPARATOR);
+ do_canonicalize_path(pat, pat);
+
+ /* Relative patterns can only match file names, so they must be
+ * single-component only. */
+ if (pat[0] != OS_PREFERRED_PATH_SEPARATOR &&
+ tstrchr(pat, OS_PREFERRED_PATH_SEPARATOR))
+ {
+ ERROR("%"TS":%lu: Invalid pattern \"%"TS"\":\n"
+ " Relative patterns can only include one path component!\n"
+ " Maybe try \"%"TC"%"TS"\"?",
+ path, line_no, pat, OS_PREFERRED_PATH_SEPARATOR, pat);
+ return WIMLIB_ERR_INVALID_CAPTURE_CONFIG;
}
- return ret;
+
+ return 0;
}
+/*
+ * Read, parse, and validate a capture configuration file from either an on-disk
+ * file or an in-memory buffer.
+ *
+ * To read from a file, specify @config_file, and use NULL for @buf.
+ * To read from a buffer, specify @buf and @bufsize.
+ *
+ * @config must be initialized to all 0's.
+ *
+ * On success, 0 will be returned, and the resulting capture configuration will
+ * be stored in @config.
+ *
+ * On failure, a positive error code will be returned, and the contents of
+ * @config will be invalidated.
+ */
int
-copy_and_canonicalize_capture_config(const struct wimlib_capture_config *config,
- struct wimlib_capture_config **config_copy_ret)
+read_capture_config(const tchar *config_file, const void *buf,
+ size_t bufsize, struct capture_config *config)
{
- struct wimlib_capture_config *config_copy;
int ret;
- config_copy = CALLOC(1, sizeof(struct wimlib_capture_config));
- if (!config_copy) {
- ret = WIMLIB_ERR_NOMEM;
- goto out_free_capture_config;
- }
- ret = copy_and_canonicalize_pattern_list(&config->exclusion_pats,
- &config_copy->exclusion_pats);
- if (ret)
- goto out_free_capture_config;
- ret = copy_and_canonicalize_pattern_list(&config->exclusion_exception_pats,
- &config_copy->exclusion_exception_pats);
+ /* [PrepopulateList] is used for apply, not capture. But since we do
+ * understand it, recognize it, thereby avoiding the unrecognized
+ * section warning, but discard the resulting strings.
+ *
+ * We currently ignore [CompressionExclusionList] and
+ * [CompressionFolderList]. This is a known issue that doesn't seem to
+ * have any real consequences, so don't issue warnings about not
+ * recognizing those sections. */
+ STRING_SET(prepopulate_pats);
+ STRING_SET(compression_exclusion_pats);
+ STRING_SET(compression_folder_pats);
+
+ struct text_file_section sections[] = {
+ {T("ExclusionList"),
+ &config->exclusion_pats},
+ {T("ExclusionException"),
+ &config->exclusion_exception_pats},
+ {T("PrepopulateList"),
+ &prepopulate_pats},
+ {T("CompressionExclusionList"),
+ &compression_exclusion_pats},
+ {T("CompressionFolderList"),
+ &compression_folder_pats},
+ };
+ void *mem;
+
+ ret = do_load_text_file(config_file, buf, bufsize, &mem,
+ sections, ARRAY_LEN(sections),
+ LOAD_TEXT_FILE_REMOVE_QUOTES, mangle_pat);
if (ret)
- goto out_free_capture_config;
- *config_copy_ret = config_copy;
- goto out;
-out_free_capture_config:
- free_capture_config(config_copy);
-out:
- return ret;
+ return ret;
+
+ FREE(prepopulate_pats.strings);
+ FREE(compression_exclusion_pats.strings);
+ FREE(compression_folder_pats.strings);
+
+ config->buf = mem;
+ return 0;
}
-static void
-destroy_pattern_list(struct wimlib_pattern_list *list)
+void
+destroy_capture_config(struct capture_config *config)
{
- for (size_t i = 0; i < list->num_pats; i++)
- FREE(list->pats[i]);
- FREE(list->pats);
+ FREE(config->exclusion_pats.strings);
+ FREE(config->exclusion_exception_pats.strings);
+ FREE(config->buf);
}
-void
-free_capture_config(struct wimlib_capture_config *config)
+/*
+ * Determine whether a path matches any wildcard pattern in a list.
+ *
+ * Special rules apply about what form @path must be in; see match_path().
+ */
+bool
+match_pattern_list(const tchar *path, size_t path_nchars,
+ const struct string_set *list)
{
- if (config) {
- destroy_pattern_list(&config->exclusion_pats);
- destroy_pattern_list(&config->exclusion_exception_pats);
- FREE(config);
- }
+ for (size_t i = 0; i < list->num_strings; i++)
+ if (match_path(path, path_nchars, list->strings[i],
+ OS_PREFERRED_PATH_SEPARATOR, true))
+ return true;
+ return false;
}
+/*
+ * Determine whether the filesystem @path should be excluded from capture, based
+ * on the current capture configuration file.
+ *
+ * The @path must be given relative to the root of the capture, but with a
+ * leading path separator. For example, if the file "in/file" is being tested
+ * and the library user ran wimlib_add_image(wim, "in", ...), then the directory
+ * "in" is the root of the capture and the path should be specified as "/file".
+ *
+ * Also, all path separators in @path must be OS_PREFERRED_PATH_SEPARATOR, there
+ * cannot be trailing slashes, and there cannot be consecutive path separators.
+ *
+ * As a special case, the empty string will be interpreted as a single path
+ * separator (which means the root of capture itself).
+ */
static bool
-match_pattern(const tchar *path,
- const tchar *path_basename,
- const struct wimlib_pattern_list *list)
+should_exclude_path(const tchar *path, size_t path_nchars,
+ const struct capture_config *config)
{
- for (size_t i = 0; i < list->num_pats; i++) {
-
- const tchar *pat = list->pats[i];
- const tchar *string;
-
- if (*pat == T('/')) {
- /* Absolute path from root of capture */
- string = path;
- } else {
- if (tstrchr(pat, T('/')))
- /* Relative path from root of capture */
- string = path + 1;
- else
- /* A file name pattern */
- string = path_basename;
- }
+ tchar dummy[2];
- /* Warning: on Windows native builds, fnmatch() calls the
- * replacement function in win32.c. */
- if (fnmatch(pat, string, FNM_PATHNAME | FNM_NOESCAPE
- #ifdef FNM_CASEFOLD
- | FNM_CASEFOLD
- #endif
- ) == 0)
- {
- DEBUG("\"%"TS"\" matches the pattern \"%"TS"\"",
- string, pat);
- return true;
- } else {
- DEBUG2("\"%"TS"\" does not match the pattern \"%"TS"\"",
- string, pat);
- }
+ if (!config)
+ return false;
+
+ if (!*path) {
+ dummy[0] = OS_PREFERRED_PATH_SEPARATOR;
+ dummy[1] = T('\0');
+ path = dummy;
+ path_nchars = 1;
}
- return false;
+
+ return match_pattern_list(path, path_nchars, &config->exclusion_pats) &&
+ !match_pattern_list(path, path_nchars, &config->exclusion_exception_pats);
+
}
-/* Return true if the image capture configuration file indicates we should
- * exclude the filename @path from capture.
+/*
+ * Determine if a file should be excluded from capture.
+ *
+ * This function tests exclusions from both of the two possible sources of
+ * exclusions:
+ *
+ * (1) The capture configuration file
+ * (2) The user-provided progress function
+ *
+ * The capture implementation must have set params->capture_root_nchars to an
+ * appropriate value. Example for UNIX: if the capture root directory is
+ * "foobar/subdir", then all paths will be provided starting with
+ * "foobar/subdir", so params->capture_root_nchars must be set to
+ * strlen("foobar/subdir") so that try_exclude() can use the appropriate suffix
+ * when it calls should_exclude_path().
*
- * If @exclude_prefix is %true, the part of the path up and including the name
- * of the directory being captured is not included in the path for matching
- * purposes. This allows, for example, a pattern like /hiberfil.sys to match a
- * file /mnt/windows7/hiberfil.sys if we are capturing the /mnt/windows7
- * directory.
+ *
+ * Returns:
+ * < 0 if excluded
+ * = 0 if not excluded and no error
+ * > 0 (wimlib error code) if error
*/
-bool
-exclude_path(const tchar *path, size_t path_len,
- const struct wimlib_capture_config *config, bool exclude_prefix)
+int
+try_exclude(const tchar *full_path, size_t full_path_nchars,
+ const struct capture_params *params)
{
- const tchar *basename = path_basename_with_len(path, path_len);
- if (exclude_prefix) {
- wimlib_assert(path_len >= config->_prefix_num_tchars);
- if (!tmemcmp(config->_prefix, path, config->_prefix_num_tchars) &&
- path[config->_prefix_num_tchars] == T('/'))
- {
- path += config->_prefix_num_tchars;
+ int ret;
+
+ if (should_exclude_path(full_path + params->capture_root_nchars,
+ full_path_nchars - params->capture_root_nchars,
+ params->config))
+ return -1;
+
+ if (unlikely(params->add_flags & WIMLIB_ADD_FLAG_TEST_FILE_EXCLUSION)) {
+ union wimlib_progress_info info;
+
+ info.test_file_exclusion.path = full_path;
+ info.test_file_exclusion.will_exclude = false;
+
+ #ifdef __WIN32__
+ /* Hack for Windows... */
+
+ wchar_t *p_question_mark = NULL;
+
+ if (!wcsncmp(full_path, L"\\??\\", 4)) {
+ /* Trivial transformation: NT namespace => Win32 namespace */
+ p_question_mark = (wchar_t *)&full_path[1];
+ *p_question_mark = L'\\';
}
+ #endif
+
+ ret = call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_TEST_FILE_EXCLUSION,
+ &info, params->progctx);
+
+ #ifdef __WIN32__
+ if (p_question_mark)
+ *p_question_mark = L'?';
+ #endif
+
+ if (ret)
+ return ret;
+ if (info.test_file_exclusion.will_exclude)
+ return -1;
}
- return match_pattern(path, basename, &config->exclusion_pats) &&
- !match_pattern(path, basename, &config->exclusion_exception_pats);
+ return 0;
}