From f1460c1d7df10acd410aea0496147c1abe9ef24c Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 2 Dec 2016 22:48:23 -0800 Subject: [PATCH] Improved handling of Data Deduplication reparse points --- NEWS | 3 +++ doc/man1/wimlib-imagex-capture.1 | 35 ++++++++++++++++++++++++++------ include/wimlib/inode.h | 1 + src/ntfs-3g_capture.c | 26 +++++++++++++++++++++++- src/win32_capture.c | 15 ++++++++++++++ 5 files changed, 73 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index dbc43282..bbecd4d0 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,9 @@ Version 1.11.0-BETA1: Updated 'mkwinpeimg' to work correctly on images that have a "windows" (lower case) directory rather than a "Windows" (upper case) directory. + On Windows, improved the way in which files deduplicated with Windows' + Data Deduplication feature are captured. + Fixed configuring with --enable-ssse3-sha1 from release tarball (the file nasm_lt.sh was missing). diff --git a/doc/man1/wimlib-imagex-capture.1 b/doc/man1/wimlib-imagex-capture.1 index e78f6159..e112fca9 100644 --- a/doc/man1/wimlib-imagex-capture.1 +++ b/doc/man1/wimlib-imagex-capture.1 @@ -98,8 +98,7 @@ possible, including: All data streams of all unencrypted files, including the unnamed data stream as well as all named data streams. .IP \[bu] -Reparse points, including symbolic links, junction points, and other reparse -points. +Reparse points. See \fBREPARSE POINTS AND SYMLINKS\fR for details. .IP \[bu] File and directory creation, access, and modification timestamps, using the native NTFS resolution of 100 nanoseconds. @@ -124,6 +123,9 @@ opaque and hand it off to the appropriate API function). The sparse attribute on sparse files will be saved, but the data stored will be the full data of the file rather than the "sparse" data. (The data is, however, subject to the WIM format's compression.) +.IP \[bu] +Some types of reparse points are transparently dereferenced by Windows but not +by NTFS-3G. See \fBREPARSE POINTS AND SYMLINKS\fR. .SH DIRECTORY CAPTURE (WINDOWS) On Windows, \fBwimlib-imagex capture\fR and \fBwimlib-imagex append\fR natively support Windows-specific and NTFS-specific data. They therefore act @@ -138,10 +140,8 @@ try to archive as much data and metadata as possible, including: .IP \[bu] 4 All data streams of all files. .IP \[bu] -Reparse points, including symbolic links, junction points, and other reparse -points, if supported by the source filesystem. (Note: see \fB--rpfix\fR and -\fB--norpfix\fR for documentation on exactly how absolute symbolic links and -junctions are captured.) +Reparse points, if supported by the source filesystem. See \fBREPARSE POINTS +AND SYMLINKS\fR for details. .IP \[bu] File and directory creation, access, and modification timestamps. These are stored with Windows NT's native timestamp resolution of 100 nanoseconds. @@ -179,6 +179,29 @@ ObCaseInsensitive has been set to 0 in the Windows registry), or a file whose name contains certain characters considered invalid by Windows. If you run into problems archiving such files consider using the \fBNTFS VOLUME CAPTURE (UNIX)\fR mode from Linux. +.SH REPARSE POINTS AND SYMLINKS +A "symbolic link" (or "symlink") is a special file which "points to" some other +file or directory. On Windows, a "reparse point" is a generalization of a +symlink which allows access to a file or directory to be redirected in a more +complex way. Windows uses reparse points to support symlinks, and sometimes +uses them for various other features as well. Normally, applications can choose +whether they want to "dereference" reparse points and symlinks or not. +.PP +The default behavior of \fBwimcapture\fR is that reparse points and symlinks are +\fInot\fR dereferenced, meaning that the reparse points or symlinks themselves +are stored in the archive rather than the files or data they point to. There is +a \fB--dereference\fR option, but it is currently only supported by the UNIX +version of \fBwimcapture\fR on UNIX filesystems (it's not yet implemented for +Windows filesystems). +.PP +Windows also treats certain types of reparse points specially. For example, +Windows applications reading from deduplicated, WIM-backed, or system-compressed +files always see the dereferenced data, even if they ask not to. Therefore, +\fBwimcapture\fR on Windows will store these files dereferenced, not as reparse +points. But \fBwimcapture\fR on UNIX in NTFS-3G mode cannot dereference these +files and will store them as reparse points instead. This difference can be +significant in certain situations, e.g. when capturing deduplicated files which, +to be readable after extraction, require that the chunk store also be present. .SH OPTIONS .TP 6 \fB--boot\fR diff --git a/include/wimlib/inode.h b/include/wimlib/inode.h index 8785e1f7..7652ef52 100644 --- a/include/wimlib/inode.h +++ b/include/wimlib/inode.h @@ -239,6 +239,7 @@ struct wim_inode_extra { */ #define WIM_IO_REPARSE_TAG_MOUNT_POINT 0xA0000003 #define WIM_IO_REPARSE_TAG_SYMLINK 0xA000000C +#define WIM_IO_REPARSE_TAG_DEDUP 0x80000013 #define WIM_IO_REPARSE_TAG_WOF 0x80000017 /* Flags for the rp_flags field. Currently the only known flag is NOT_FIXED, diff --git a/src/ntfs-3g_capture.c b/src/ntfs-3g_capture.c index 33f83657..b496e02a 100644 --- a/src/ntfs-3g_capture.c +++ b/src/ntfs-3g_capture.c @@ -65,6 +65,7 @@ struct ntfs_volume_wrapper { ntfs_volume *vol; size_t refcnt; + bool dedup_warned; }; /* Description of where data is located in an NTFS volume */ @@ -231,6 +232,27 @@ read_reparse_header(ntfs_inode *ni, struct wim_inode *inode) return 0; } + +static void +warn_special_reparse_points(const struct wim_inode *inode, + const struct scan_params *params, + struct ntfs_volume_wrapper *volume) +{ + if (inode->i_reparse_tag == WIM_IO_REPARSE_TAG_DEDUP && + (params->add_flags & WIMLIB_ADD_FLAG_WINCONFIG) && + !volume->dedup_warned) + { + WARNING( + "Filesystem includes files deduplicated with Windows'\n" +" Data Deduplication feature, which to properly restore\n" +" would require that the chunk store in \"System Volume Information\"\n" +" be included in the WIM image. By default \"System Volume Information\"\n" +" is excluded, so you may want to use a custom capture configuration\n" +" file which includes it."); + volume->dedup_warned = true; + } +} + static int attr_type_to_wimlib_stream_type(ATTR_TYPES type) { @@ -793,6 +815,8 @@ ntfs_3g_build_dentry_tree_recursive(struct wim_dentry **root_ret, volume, AT_REPARSE_POINT); if (ret) goto out; + + warn_special_reparse_points(inode, params, volume); } /* Load the object ID. */ @@ -863,7 +887,7 @@ ntfs_3g_build_dentry_tree(struct wim_dentry **root_ret, char *path; int ret; - volume = MALLOC(sizeof(struct ntfs_volume_wrapper)); + volume = CALLOC(1, sizeof(struct ntfs_volume_wrapper)); if (!volume) return WIMLIB_ERR_NOMEM; diff --git a/src/win32_capture.c b/src/win32_capture.c index e9f4df0e..b5b8bc5c 100644 --- a/src/win32_capture.c +++ b/src/win32_capture.c @@ -1050,6 +1050,21 @@ winnt_load_reparse_data(HANDLE h, struct wim_inode *inode, return WIMLIB_ERR_INVALID_REPARSE_DATA; } + if (le32_to_cpu(rpbuf.rptag) == WIM_IO_REPARSE_TAG_DEDUP) { + /* + * Windows treats Data Deduplication reparse points specially. + * Reads from the unnamed data stream actually return the + * redirected file contents, even with FILE_OPEN_REPARSE_POINT. + * Deduplicated files also cannot be properly restored without + * also restoring the "System Volume Information" directory, + * which wimlib excludes by default. Therefore, the logical + * behavior for us seems to be to ignore the reparse point and + * treat the file as a normal file. + */ + inode->i_attributes &= ~FILE_ATTRIBUTE_REPARSE_POINT; + return 0; + } + if (params->add_flags & WIMLIB_ADD_FLAG_RPFIX) { ret = winnt_try_rpfix(&rpbuf, &rpbuflen, full_path, params); if (ret == RP_FIXED) -- 2.43.0