X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fcapture_common.c;h=3c06012462ec47e0ee9d353f67b9511b7dc1bb45;hb=a82a0fbc625a96aabd2a11e79fb6aedf1fe313b3;hp=0e6826194ce47f0b3efd9c6c6e87d670354a8f6a;hpb=3de1ec66f778edda19865482d685bc6f4e17faf7;p=wimlib diff --git a/src/capture_common.c b/src/capture_common.c index 0e682619..3c060124 100644 --- a/src/capture_common.c +++ b/src/capture_common.c @@ -30,9 +30,9 @@ #include "wimlib/dentry.h" #include "wimlib/error.h" #include "wimlib/paths.h" +#include "wimlib/pattern.h" #include "wimlib/progress.h" #include "wimlib/textfile.h" -#include "wimlib/wildcard.h" /* * Tally a file (or directory) that has been scanned for a capture operation, @@ -53,6 +53,9 @@ int do_capture_progress(struct capture_params *params, int status, const struct wim_inode *inode) { + int ret; + tchar *cookie; + switch (status) { case WIMLIB_SCAN_DENTRY_OK: if (!(params->add_flags & WIMLIB_ADD_FLAG_VERBOSE)) @@ -67,28 +70,38 @@ do_capture_progress(struct capture_params *params, int status, break; } params->progress.scan.status = status; - if (status == WIMLIB_SCAN_DENTRY_OK && inode->i_nlink == 1) { - - /* Successful scan, and visiting inode for the first time */ - - /* Tally size of all streams. */ - for (unsigned i = 0; i < inode->i_num_streams; i++) { - const struct blob_descriptor *blob = - stream_blob_resolved(&inode->i_streams[i]); - if (blob) - params->progress.scan.num_bytes_scanned += blob->size; + if (status == WIMLIB_SCAN_DENTRY_OK) { + + /* The first time the inode is seen, tally all its streams. */ + if (inode->i_nlink == 1) { + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct blob_descriptor *blob = + stream_blob_resolved(&inode->i_streams[i]); + if (blob) + params->progress.scan.num_bytes_scanned += blob->size; + } } - /* Tally the file itself. */ - if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) + /* Tally the file itself, counting every hard link. It's + * debatable whether every link should be counted, but counting + * every link makes the statistics consistent with the ones + * placed in the FILECOUNT and DIRCOUNT elements of the WIM + * file's XML document. It also avoids possible user confusion + * if the number of files reported were to be lower than that + * displayed by some other software such as file browsers. */ + if (inode_is_directory(inode)) params->progress.scan.num_dirs_scanned++; else params->progress.scan.num_nondirs_scanned++; } /* Call the user-provided progress function. */ - return call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_SCAN_DENTRY, + + cookie = progress_get_win32_path(params->progress.scan.cur_path); + ret = call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_SCAN_DENTRY, ¶ms->progress, params->progctx); + progress_put_win32_path(cookie); + return ret; } /* @@ -135,7 +148,7 @@ mangle_pat(tchar *pat, const tchar *path, unsigned long line_no) * Note: we expect that this function produces patterns that can be used * for both filesystem paths and WIM paths, so the desired path * separators must be the same. */ - BUILD_BUG_ON(OS_PREFERRED_PATH_SEPARATOR != WIM_PATH_SEPARATOR); + STATIC_ASSERT(OS_PREFERRED_PATH_SEPARATOR == WIM_PATH_SEPARATOR); do_canonicalize_path(pat, pat); /* Relative patterns can only match file names, so they must be @@ -203,8 +216,25 @@ read_capture_config(const tchar *config_file, const void *buf, ret = do_load_text_file(config_file, buf, bufsize, &mem, sections, ARRAY_LEN(sections), LOAD_TEXT_FILE_REMOVE_QUOTES, mangle_pat); - if (ret) + if (ret) { + ERROR("Failed to load capture configuration file \"%"TS"\"", + config_file); + switch (ret) { + case WIMLIB_ERR_INVALID_UTF8_STRING: + case WIMLIB_ERR_INVALID_UTF16_STRING: + ERROR("Note: the capture configuration file must be " + "valid UTF-8 or UTF-16LE"); + ret = WIMLIB_ERR_INVALID_CAPTURE_CONFIG; + break; + case WIMLIB_ERR_OPEN: + case WIMLIB_ERR_STAT: + case WIMLIB_ERR_NOMEM: + case WIMLIB_ERR_READ: + ret = WIMLIB_ERR_UNABLE_TO_READ_CAPTURE_CONFIG; + break; + } return ret; + } FREE(prepopulate_pats.strings); FREE(compression_exclusion_pats.strings); @@ -223,62 +253,22 @@ destroy_capture_config(struct capture_config *config) } /* - * Determine whether a path matches any wildcard pattern in a list. - * - * Special rules apply about what form @path must be in; see match_path(). + * Determine whether @path, or any ancestor directory of @path, matches any of + * the patterns in @list. Path separators in @path must be WIM_PATH_SEPARATOR. */ bool -match_pattern_list(const tchar *path, size_t path_nchars, - const struct string_set *list) +match_pattern_list(const tchar *path, const struct string_set *list) { for (size_t i = 0; i < list->num_strings; i++) - if (match_path(path, path_nchars, list->strings[i], - OS_PREFERRED_PATH_SEPARATOR, true)) + if (match_path(path, list->strings[i], true)) return true; return false; } -/* - * Determine whether the filesystem @path should be excluded from capture, based - * on the current capture configuration file. - * - * The @path must be given relative to the root of the capture, but with a - * leading path separator. For example, if the file "in/file" is being tested - * and the library user ran wimlib_add_image(wim, "in", ...), then the directory - * "in" is the root of the capture and the path should be specified as "/file". - * - * Also, all path separators in @path must be OS_PREFERRED_PATH_SEPARATOR, there - * cannot be trailing slashes, and there cannot be consecutive path separators. - * - * As a special case, the empty string will be interpreted as a single path - * separator (which means the root of capture itself). - */ -static bool -should_exclude_path(const tchar *path, size_t path_nchars, - const struct capture_config *config) -{ - tchar dummy[2]; - - if (!config) - return false; - - if (!*path) { - dummy[0] = OS_PREFERRED_PATH_SEPARATOR; - dummy[1] = T('\0'); - path = dummy; - path_nchars = 1; - } - - return match_pattern_list(path, path_nchars, &config->exclusion_pats) && - !match_pattern_list(path, path_nchars, &config->exclusion_exception_pats); - -} - /* * Determine if a file should be excluded from capture. * - * This function tests exclusions from both of the two possible sources of - * exclusions: + * This function tests exclusions from both possible sources of exclusions: * * (1) The capture configuration file * (2) The user-provided progress function @@ -287,9 +277,8 @@ should_exclude_path(const tchar *path, size_t path_nchars, * appropriate value. Example for UNIX: if the capture root directory is * "foobar/subdir", then all paths will be provided starting with * "foobar/subdir", so params->capture_root_nchars must be set to - * strlen("foobar/subdir") so that try_exclude() can use the appropriate suffix - * when it calls should_exclude_path(). - * + * strlen("foobar/subdir") so that the appropriate path can be matched against + * the patterns in the exclusion list. * * Returns: * < 0 if excluded @@ -297,41 +286,31 @@ should_exclude_path(const tchar *path, size_t path_nchars, * > 0 (wimlib error code) if error */ int -try_exclude(const tchar *full_path, size_t full_path_nchars, - const struct capture_params *params) +try_exclude(const tchar *full_path, const struct capture_params *params) { int ret; - if (should_exclude_path(full_path + params->capture_root_nchars, - full_path_nchars - params->capture_root_nchars, - params->config)) - return -1; + if (params->config) { + const tchar *path = full_path + params->capture_root_nchars; + if (match_pattern_list(path, ¶ms->config->exclusion_pats) && + !match_pattern_list(path, ¶ms->config->exclusion_exception_pats)) + return -1; + } if (unlikely(params->add_flags & WIMLIB_ADD_FLAG_TEST_FILE_EXCLUSION)) { + union wimlib_progress_info info; + tchar *cookie; info.test_file_exclusion.path = full_path; info.test_file_exclusion.will_exclude = false; - #ifdef __WIN32__ - /* Hack for Windows... */ - - wchar_t *p_question_mark = NULL; - - if (!wcsncmp(full_path, L"\\??\\", 4)) { - /* Trivial transformation: NT namespace => Win32 namespace */ - p_question_mark = (wchar_t *)&full_path[1]; - *p_question_mark = L'\\'; - } - #endif + cookie = progress_get_win32_path(full_path); ret = call_progress(params->progfunc, WIMLIB_PROGRESS_MSG_TEST_FILE_EXCLUSION, &info, params->progctx); - #ifdef __WIN32__ - if (p_question_mark) - *p_question_mark = L'?'; - #endif + progress_put_win32_path(cookie); if (ret) return ret; @@ -341,3 +320,51 @@ try_exclude(const tchar *full_path, size_t full_path_nchars, return 0; } + +/* + * Determine whether a directory entry of the specified name should be ignored. + * This is a lower level function which runs prior to try_exclude(). It handles + * the standard '.' and '..' entries, which show up in directory listings but + * should not be archived. It also checks for odd filenames that usually should + * not exist but could cause problems if archiving them were to be attempted. + */ +bool +should_ignore_filename(const tchar *name, const int name_nchars) +{ + if (name_nchars <= 0) { + WARNING("Ignoring empty filename"); + return true; + } + + if (name[0] == T('.') && + (name_nchars == 1 || (name_nchars == 2 && name[1] == T('.')))) + return true; + + for (int i = 0; i < name_nchars; i++) { + if (name[i] == T('\0')) { + WARNING("Ignoring filename containing embedded null character"); + return true; + } + if (name[i] == OS_PREFERRED_PATH_SEPARATOR) { + WARNING("Ignoring filename containing embedded path separator"); + return true; + } + } + + return false; +} + +/* Attach a newly scanned directory tree to its parent directory, with duplicate + * handling. */ +void +attach_scanned_tree(struct wim_dentry *parent, struct wim_dentry *child, + struct blob_table *blob_table) +{ + struct wim_dentry *duplicate; + + if (child && (duplicate = dentry_add_child(parent, child))) { + WARNING("Duplicate file path: \"%"TS"\". Only capturing " + "the first version.", dentry_full_path(duplicate)); + free_dentry_tree(child, blob_table); + } +}