Make absolute ExclusionException patterns include ancestor directories
authorEric Biggers <ebiggers3@gmail.com>
Sat, 21 Jul 2018 16:27:22 +0000 (09:27 -0700)
committerEric Biggers <ebiggers3@gmail.com>
Sat, 21 Jul 2018 17:21:18 +0000 (10:21 -0700)
22 files changed:
Makefile.am
NEWS
doc/man1/wimcapture.1
include/wimlib/pattern.h
include/wimlib/scan.h
src/pattern.c
src/scan.c
src/win32_apply.c
tests/exclusionlists/anchored_exception_in_excluded_dir [new file with mode: 0644]
tests/exclusionlists/anchored_prefix_match [new file with mode: 0644]
tests/exclusionlists/anchored_simple [new file with mode: 0644]
tests/exclusionlists/case_insensitive [new file with mode: 0644]
tests/exclusionlists/case_sensitive [new file with mode: 0644]
tests/exclusionlists/inner_star [new file with mode: 0644]
tests/exclusionlists/multiple_stars [new file with mode: 0644]
tests/exclusionlists/prefix_match [new file with mode: 0644]
tests/exclusionlists/question_mark [new file with mode: 0644]
tests/exclusionlists/recursive_match [new file with mode: 0644]
tests/exclusionlists/simple_exception [new file with mode: 0644]
tests/exclusionlists/suffix_match [new file with mode: 0644]
tests/exclusionlists/wildcard_exception [new file with mode: 0644]
tests/test-imagex-capture_and_apply

index 603c8d9..b72bd1d 100644 (file)
@@ -336,6 +336,7 @@ endif
 
 EXTRA_DIST +=                                  \
        tests/common_tests.sh                   \
+       tests/exclusionlists                    \
        tests/test_utils.sh                     \
        tests/security_descriptor_1.base64      \
        tests/security_descriptor_1.bin         \
diff --git a/NEWS b/NEWS
index 5213d67..7a27202 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,12 @@ Version 1.13.0-BETA:
        default; no option has to be provided, and it should work on older
        versions of Windows too.
 
+       Partially fixed a bug where [ExclusionException] entries didn't take
+       effect when the containing directory is matched by [ExclusionList].  It
+       now works when the [ExclusionException] patterns are absolute.  For
+       example, listing "/dir/file" in [ExclusionException] now works even if
+       "/dir" is matched by [ExclusionList].
+
        Added an '--include-integrity' option to various wimlib-imagex commands.
        '--include-integrity' is like '--check', but it will just include an
        integrity table in the output WIM(s), while skipping verification of any
index 990c04c..9426c81 100644 (file)
@@ -291,8 +291,13 @@ bracketed sections.  Currently, the following sections are recognized:
 [ExclusionList] ---  contains a list of path globs to exclude from capture.  If
 a directory is matched, both the directory and its contents are excluded.
 .IP \[bu]
-[ExclusionException] --- contains a list of path globs to include in the
-capture, even when the file or directory also matches a glob in [ExclusionList].
+[ExclusionException] --- contains a list of path globs to include, even when the
+file or directory also matches a glob in [ExclusionList].  If a directory is
+matched, then all its contents are included as well.  Files or directories
+\fIwithin\fR a directory excluded by [ExclusionList] may even be included using
+this, though currently it only works for absolute globs (those that begin with
+"/" or "\\"); for example, "/dir/file" can be included while "/dir" can be
+excluded, but including simply "file" won't work in that case.
 .IP \[bu]
 [PrepopulateList] --- this does not affect capture, but if the image is applied
 later with \fB--wimboot\fR, these are globs of files that shall be extracted
index 67c2073..b0e2999 100644 (file)
@@ -5,8 +5,22 @@
 
 struct wim_dentry;
 
+/* Flags for match_path() and match_pattern_list() */
+
+/*
+ * If set, subdirectories (and sub-files) are also matched.
+ * For example, the pattern "/dir" would match the path "/dir/file".
+ */
+#define MATCH_RECURSIVELY      0x01
+
+/*
+ * If set, ancestor directories are also matched.
+ * For example, the pattern "/dir/file" would match the path "/dir".
+ */
+#define MATCH_ANCESTORS                0x02
+
 extern bool
-match_path(const tchar *path, const tchar *pattern, bool prefix_ok);
+match_path(const tchar *path, const tchar *pattern, int match_flags);
 
 extern int
 expand_path_pattern(struct wim_dentry *root, const tchar *pattern,
index c1c3ecb..fbe304e 100644 (file)
@@ -88,7 +88,8 @@ extern void
 destroy_capture_config(struct capture_config *config);
 
 extern bool
-match_pattern_list(const tchar *path, const struct string_list *list);
+match_pattern_list(const tchar *path, const struct string_list *list,
+                  int match_flags);
 
 extern int
 try_exclude(const struct scan_params *params);
index b83db42..a9e3fbc 100644 (file)
@@ -86,9 +86,8 @@ advance_through_component(const tchar *p)
  *     (which matches any single character).  If there is no leading path
  *     separator, then the match is attempted with the filename component of
  *     @path only; otherwise, the match is attempted with the entire @path.
- * @prefix_ok
- *     If %true, also allow a prefix of @path terminated by a path separator
- *     (a.k.a. an ancestor directory) to match the pattern.
+ * @match_flags
+ *     MATCH_* flags, see the flag definitions.
  *
  * @path and @pattern can both contain path separators (character
  * WIM_PATH_SEPARATOR).  Leading and trailing path separators are not
@@ -102,7 +101,7 @@ advance_through_component(const tchar *p)
  * Returns %true iff the path matched the pattern.
  */
 bool
-match_path(const tchar *path, const tchar *pattern, bool prefix_ok)
+match_path(const tchar *path, const tchar *pattern, int match_flags)
 {
        /* Filename only?  */
        if (*pattern != WIM_PATH_SEPARATOR)
@@ -117,11 +116,11 @@ match_path(const tchar *path, const tchar *pattern, bool prefix_ok)
 
                /* Is the pattern exhausted?  */
                if (!*pattern)
-                       return !*path || prefix_ok;
+                       return !*path || (match_flags & MATCH_RECURSIVELY);
 
                /* Is the path exhausted (but not the pattern)?  */
                if (!*path)
-                       return false;
+                       return (match_flags & MATCH_ANCESTORS);
 
                path_component_end = advance_through_component(path);
                pattern_component_end = advance_through_component(pattern);
index cc86d80..ab1f527 100644 (file)
@@ -254,14 +254,15 @@ destroy_capture_config(struct capture_config *config)
 }
 
 /*
- * Determine whether @path, or any ancestor directory of @path, matches any of
- * the patterns in @list.  Path separators in @path must be WIM_PATH_SEPARATOR.
+ * Determine whether @path matches any of the patterns in @list.
+ * Path separators in @path must be WIM_PATH_SEPARATOR.
  */
 bool
-match_pattern_list(const tchar *path, const struct string_list *list)
+match_pattern_list(const tchar *path, const struct string_list *list,
+                  int match_flags)
 {
        for (size_t i = 0; i < list->num_strings; i++)
-               if (match_path(path, list->strings[i], true))
+               if (match_path(path, list->strings[i], match_flags))
                        return true;
        return false;
 }
@@ -292,8 +293,10 @@ try_exclude(const struct scan_params *params)
 
        if (params->config) {
                const tchar *path = params->cur_path + params->root_path_nchars;
-               if (match_pattern_list(path, &params->config->exclusion_pats) &&
-                   !match_pattern_list(path, &params->config->exclusion_exception_pats))
+               if (match_pattern_list(path, &params->config->exclusion_pats,
+                                      MATCH_RECURSIVELY) &&
+                   !match_pattern_list(path, &params->config->exclusion_exception_pats,
+                                       MATCH_RECURSIVELY | MATCH_ANCESTORS))
                        return -1;
        }
 
index 1434bd2..f677918 100644 (file)
@@ -437,7 +437,8 @@ can_externally_back_path(const wchar_t *path, const struct win32_apply_ctx *ctx)
 {
        /* Does the path match a pattern given in the [PrepopulateList] section
         * of WimBootCompress.ini?  */
-       if (ctx->prepopulate_pats && match_pattern_list(path, ctx->prepopulate_pats))
+       if (ctx->prepopulate_pats && match_pattern_list(path, ctx->prepopulate_pats,
+                                                       MATCH_RECURSIVELY))
                return false;
 
        /* Since we attempt to modify the SYSTEM registry after it's extracted
@@ -449,7 +450,7 @@ can_externally_back_path(const wchar_t *path, const struct win32_apply_ctx *ctx)
         * However, a WIM that wasn't specifically captured in "WIMBoot mode"
         * may contain SYSTEM.* files.  So to make things "just work", hard-code
         * the pattern.  */
-       if (match_path(path, L"\\Windows\\System32\\config\\SYSTEM*", false))
+       if (match_path(path, L"\\Windows\\System32\\config\\SYSTEM*", 0))
                return false;
 
        return true;
@@ -2557,7 +2558,8 @@ set_system_compression_on_inode(struct wim_inode *inode, int format,
                        }
 
                        incompatible = match_pattern_list(dentry->d_full_path,
-                                                         &bootloader_patterns);
+                                                         &bootloader_patterns,
+                                                         MATCH_RECURSIVELY);
                        FREE(dentry->d_full_path);
                        dentry->d_full_path = NULL;
 
diff --git a/tests/exclusionlists/anchored_exception_in_excluded_dir b/tests/exclusionlists/anchored_exception_in_excluded_dir
new file mode 100644 (file)
index 0000000..da23fc8
--- /dev/null
@@ -0,0 +1,11 @@
+[ExclusionList]
+/dir
+
+[ExclusionException]
+/dir/include*
+
+@@@
+
+  dir/
+- dir/exclude.txt
+  dir/include.txt
diff --git a/tests/exclusionlists/anchored_prefix_match b/tests/exclusionlists/anchored_prefix_match
new file mode 100644 (file)
index 0000000..2bd40f3
--- /dev/null
@@ -0,0 +1,10 @@
+[ExclusionList]
+/foo*
+
+@@@
+
+- foo
+- foobar
+  goo/
+  goo/foo
+  goo/foobar
diff --git a/tests/exclusionlists/anchored_simple b/tests/exclusionlists/anchored_simple
new file mode 100644 (file)
index 0000000..4bd782d
--- /dev/null
@@ -0,0 +1,10 @@
+[ExclusionList]
+/foo
+
+@@@
+
+- foo
+  foobar
+  goo/
+  goo/foo
+  goo/foobar
diff --git a/tests/exclusionlists/case_insensitive b/tests/exclusionlists/case_insensitive
new file mode 100644 (file)
index 0000000..1a95a57
--- /dev/null
@@ -0,0 +1,10 @@
+# case insensitive
+[ExclusionList]
+foo
+
+@@@
+
+- FOO
+- foo
+  BOO
+  boo
diff --git a/tests/exclusionlists/case_sensitive b/tests/exclusionlists/case_sensitive
new file mode 100644 (file)
index 0000000..8258216
--- /dev/null
@@ -0,0 +1,7 @@
+[ExclusionList]
+foo
+
+@@@
+
+- foo
+  FOO
diff --git a/tests/exclusionlists/inner_star b/tests/exclusionlists/inner_star
new file mode 100644 (file)
index 0000000..f17cb26
--- /dev/null
@@ -0,0 +1,9 @@
+[ExclusionList]
+foo*bar
+
+@@@
+
+  bar
+  foo
+- foobar
+- fooXYZbar
diff --git a/tests/exclusionlists/multiple_stars b/tests/exclusionlists/multiple_stars
new file mode 100644 (file)
index 0000000..b5d8e3e
--- /dev/null
@@ -0,0 +1,12 @@
+[ExclusionList]
+foo*bar*baz
+
+@@@
+
+  bar
+  barbaz
+  baz
+  foo
+  foobar
+- foobarbaz
+- fooXYZbarXYZbaz
diff --git a/tests/exclusionlists/prefix_match b/tests/exclusionlists/prefix_match
new file mode 100644 (file)
index 0000000..b89cfc2
--- /dev/null
@@ -0,0 +1,12 @@
+[ExclusionList]
+foo*
+
+@@@
+
+  boo
+  boofoo
+- foo
+- foobar
+  goo/
+- goo/foo
+- goo/foobar
diff --git a/tests/exclusionlists/question_mark b/tests/exclusionlists/question_mark
new file mode 100644 (file)
index 0000000..def5c99
--- /dev/null
@@ -0,0 +1,11 @@
+[ExclusionList]
+te?t
+
+@@@
+
+- test
+- text
+- te?t
+  best
+  testing
+  Xtest
diff --git a/tests/exclusionlists/recursive_match b/tests/exclusionlists/recursive_match
new file mode 100644 (file)
index 0000000..6098e05
--- /dev/null
@@ -0,0 +1,13 @@
+[ExclusionList]
+dir
+
+@@@
+
+- dir/
+- dir/a
+- dir/b
+  foo
+  goo
+  subdir/
+- subdir/dir/
+- subdir/dir/abcd
diff --git a/tests/exclusionlists/simple_exception b/tests/exclusionlists/simple_exception
new file mode 100644 (file)
index 0000000..9bf457f
--- /dev/null
@@ -0,0 +1,12 @@
+[ExclusionList]
+*.jpg
+
+[ExclusionException]
+test.jpg
+
+@@@
+
+- a.jpg
+- abcd.jpg
+  abcd.png
+  test.jpg
diff --git a/tests/exclusionlists/suffix_match b/tests/exclusionlists/suffix_match
new file mode 100644 (file)
index 0000000..0081334
--- /dev/null
@@ -0,0 +1,10 @@
+[ExclusionList]
+*.jpg
+
+@@@
+
+  a.png
+- a.jpg
+  dir/
+  dir/a.png
+- dir/a.jpg
diff --git a/tests/exclusionlists/wildcard_exception b/tests/exclusionlists/wildcard_exception
new file mode 100644 (file)
index 0000000..089826f
--- /dev/null
@@ -0,0 +1,13 @@
+[ExclusionList]
+*.jpg
+
+[ExclusionException]
+?oo.jpg
+
+@@@
+
+- a.jpg
+- abcd.jpg
+  boo.jpg
+  foo.jpg
+  foo.png
index aa26a63..fe8cdca 100755 (executable)
@@ -152,6 +152,65 @@ if [ -e out.dir/hiberfil.sys -o -e "out.dir/System Volume Information" ]; then
        error "Files were not excluded from capture as expected"
 fi
 
+#
+# Execute one of the exclusion list test cases in tests/exclusionlists/.
+# The file is divided into two sections, separated by a line containing "@@@".
+# The first is the capture configuration file.  The second is a list of files
+# and directories, where the ones that should be excluded are marked with "- ".
+#
+exclusionlist_test() {
+       local t_file="$1"
+       local in_paths_section=false
+       local path
+       local include
+
+       __msg "Testing exclusion list: ${t_file##*/}"
+
+       echo "/" > expected_out
+       sed -n -e '/^@@@/q;p' "$t_file" > config.txt
+       rm -rf in.dir
+       mkdir in.dir
+       cd in.dir
+       while read -r path; do
+               path="$(echo "$path" | sed -e 's/[[:space:]]*$//')"
+               if ! $in_paths_section; then
+                       if [ "$path" = "# case insensitive" ]; then
+                               export WIMLIB_IMAGEX_IGNORE_CASE=1
+                       elif [ "$path" = "@@@" ]; then
+                               in_paths_section=true
+                       fi
+                       continue
+               fi
+               if [ -z "$path" ]; then
+                       continue
+               fi
+               include=true
+               if [ "${path:0:2}" = "- " ]; then
+                       include=false
+                       path="${path:2}"
+               fi
+               if [ "${path: -1}" = "/" ]; then
+                       path="${path:0:$(( ${#path} - 1))}"
+                       mkdir "$path"
+               else
+                       touch "$path"
+               fi
+               if $include; then
+                       echo "/$path" >> ../expected_out
+               fi
+       done < "$t_file"
+       cd ..
+       $in_paths_section || error "malformed test file: $t_file (never found separator)"
+       wimcapture in.dir test.wim --compress=none --config=config.txt
+       unset WIMLIB_IMAGEX_IGNORE_CASE
+       wimdir test.wim 1 > actual_out
+       diff expected_out actual_out
+}
+
+for t_file in "$srcdir/tests/exclusionlists"/*; do
+       exclusionlist_test "$t_file"
+done
+
 # Make sure reparse point fixups are working as expected
 __msg "Testing --rpfix"
 rm -r in.dir out.dir