2 * reparse.c - Reparse point handling
6 * Copyright (C) 2012, 2013, 2015 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see https://www.gnu.org/licenses/.
28 #include "wimlib/alloca.h"
29 #include "wimlib/blob_table.h"
30 #include "wimlib/endianness.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/guid.h"
34 #include "wimlib/inode.h"
35 #include "wimlib/reparse.h"
36 #include "wimlib/resource.h"
39 * Reconstruct the header of a reparse point buffer. This is necessary because
40 * only reparse data is stored in WIM files. The reparse tag is instead stored
41 * in the on-disk WIM dentry, and the reparse data length is equal to the size
42 * of the blob in which the reparse data was stored, minus the size of a GUID
43 * (16 bytes) if the reparse tag does not have the "Microsoft" bit set.
46 complete_reparse_point(struct reparse_buffer_disk *rpbuf,
47 const struct wim_inode *inode, u16 blob_size)
49 rpbuf->rptag = cpu_to_le32(inode->i_reparse_tag);
50 if (blob_size >= GUID_SIZE && !(inode->i_reparse_tag & 0x80000000))
51 blob_size -= GUID_SIZE;
52 rpbuf->rpdatalen = cpu_to_le16(blob_size);
53 rpbuf->rpreserved = cpu_to_le16(inode->i_rp_reserved);
56 /* Parse the buffer for a symbolic link or junction reparse point and fill in a
57 * 'struct link_reparse_point'. */
59 parse_link_reparse_point(const struct reparse_buffer_disk *rpbuf, u16 rpbuflen,
60 struct link_reparse_point *link)
62 u16 substitute_name_offset;
63 u16 print_name_offset;
66 link->rptag = le32_to_cpu(rpbuf->rptag);
68 /* Not a symbolic link or junction? */
69 if (link->rptag != WIM_IO_REPARSE_TAG_SYMLINK &&
70 link->rptag != WIM_IO_REPARSE_TAG_MOUNT_POINT)
71 return WIMLIB_ERR_INVALID_REPARSE_DATA;
73 /* Is the buffer too small to be a symlink or a junction? */
74 if (rpbuflen < offsetof(struct reparse_buffer_disk, link.junction.data))
75 return WIMLIB_ERR_INVALID_REPARSE_DATA;
77 link->rpreserved = le16_to_cpu(rpbuf->rpreserved);
78 link->substitute_name_nbytes = le16_to_cpu(rpbuf->link.substitute_name_nbytes);
79 substitute_name_offset = le16_to_cpu(rpbuf->link.substitute_name_offset);
80 link->print_name_nbytes = le16_to_cpu(rpbuf->link.print_name_nbytes);
81 print_name_offset = le16_to_cpu(rpbuf->link.print_name_offset);
83 /* The names must be properly sized and aligned. */
84 if ((substitute_name_offset | print_name_offset |
85 link->substitute_name_nbytes | link->print_name_nbytes) & 1)
86 return WIMLIB_ERR_INVALID_REPARSE_DATA;
88 if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
89 if (rpbuflen < offsetof(struct reparse_buffer_disk, link.symlink.data))
90 return WIMLIB_ERR_INVALID_REPARSE_DATA;
91 link->symlink_flags = le32_to_cpu(rpbuf->link.symlink.flags);
92 data = rpbuf->link.symlink.data;
94 data = rpbuf->link.junction.data;
97 /* Verify that the names don't overflow the buffer. */
98 if ((data - (const u8 *)rpbuf) + substitute_name_offset +
99 link->substitute_name_nbytes > rpbuflen)
100 return WIMLIB_ERR_INVALID_REPARSE_DATA;
102 if ((data - (const u8 *)rpbuf) + print_name_offset +
103 link->print_name_nbytes > rpbuflen)
104 return WIMLIB_ERR_INVALID_REPARSE_DATA;
106 /* Save the name pointers. */
107 link->substitute_name = (utf16lechar *)&data[substitute_name_offset];
108 link->print_name = (utf16lechar *)&data[print_name_offset];
112 /* Translate a 'struct link_reparse_point' into a reparse point buffer. */
114 make_link_reparse_point(const struct link_reparse_point *link,
115 struct reparse_buffer_disk *rpbuf, u16 *rpbuflen_ret)
119 if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
120 data = rpbuf->link.symlink.data;
121 else if (link->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT)
122 data = rpbuf->link.junction.data;
123 else /* Callers should forbid this case, but check anyway. */
124 return WIMLIB_ERR_INVALID_REPARSE_DATA;
126 /* Check if the names are too long to fit in a reparse point. */
127 if ((data - (u8 *)rpbuf) + link->substitute_name_nbytes +
128 link->print_name_nbytes +
129 2 * sizeof(utf16lechar) > REPARSE_POINT_MAX_SIZE)
130 return WIMLIB_ERR_INVALID_REPARSE_DATA;
132 rpbuf->rptag = cpu_to_le32(link->rptag);
133 rpbuf->rpreserved = cpu_to_le16(link->rpreserved);
134 rpbuf->link.substitute_name_offset = cpu_to_le16(0);
135 rpbuf->link.substitute_name_nbytes = cpu_to_le16(link->substitute_name_nbytes);
136 rpbuf->link.print_name_offset = cpu_to_le16(link->substitute_name_nbytes +
137 sizeof(utf16lechar));
138 rpbuf->link.print_name_nbytes = cpu_to_le16(link->print_name_nbytes);
140 if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
141 rpbuf->link.symlink.flags = cpu_to_le32(link->symlink_flags);
143 /* We null-terminate the substitute and print names, although this isn't
144 * strictly necessary. Note that the nbytes fields do not include the
145 * null terminators. */
146 data = mempcpy(data, link->substitute_name, link->substitute_name_nbytes);
147 *(utf16lechar *)data = cpu_to_le16(0);
148 data += sizeof(utf16lechar);
149 data = mempcpy(data, link->print_name, link->print_name_nbytes);
150 *(utf16lechar *)data = cpu_to_le16(0);
151 data += sizeof(utf16lechar);
152 rpbuf->rpdatalen = cpu_to_le16(data - rpbuf->rpdata);
154 *rpbuflen_ret = data - (u8 *)rpbuf;
158 /* UNIX symlink <=> Windows reparse point translation */
161 /* Retrieve the inode's reparse point buffer into @rpbuf and @rpbuflen_ret.
162 * This gets the reparse data from @blob if specified, otherwise from the
163 * inode's reparse point stream. The inode's streams must be resolved. */
165 wim_inode_get_reparse_point(const struct wim_inode *inode,
166 struct reparse_buffer_disk *rpbuf,
168 const struct blob_descriptor *blob)
174 const struct wim_inode_stream *strm;
176 strm = inode_get_unnamed_stream(inode, STREAM_TYPE_REPARSE_POINT);
178 blob = stream_blob_resolved(strm);
182 if (blob->size > REPARSE_DATA_MAX_SIZE)
183 return WIMLIB_ERR_INVALID_REPARSE_DATA;
184 blob_size = blob->size;
185 ret = read_blob_into_buf(blob, rpbuf->rpdata);
190 complete_reparse_point(rpbuf, inode, blob_size);
192 *rpbuflen_ret = REPARSE_DATA_OFFSET + blob_size;
197 copy(char **buf_p, size_t *bufsize_p, const char *src, size_t src_size)
199 size_t n = min(*bufsize_p, src_size);
200 memcpy(*buf_p, src, n);
206 * Get a UNIX-style symlink target from the WIM inode for a reparse point.
209 * The inode from which to read the symlink. If not a symbolic link or
210 * junction reparse point, then -EINVAL will be returned.
212 * Buffer into which to place the link target.
214 * Available space in @buf, in bytes.
216 * If not NULL, the blob from which to read the reparse data. Otherwise,
217 * the reparse data will be read from the reparse point stream of @inode.
219 * If @altroot_len != 0 and the link is an absolute link that was stored as
220 * "fixed", then prepend this path to the link target.
222 * Length of the @altroot string or 0.
224 * Similar to POSIX readlink(), this function writes as much of the symlink
225 * target as possible (up to @bufsize bytes) to @buf with no null terminator and
226 * returns the number of bytes written or a negative errno value on error. Note
227 * that the target is truncated and @bufsize is returned in the overflow case.
230 wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize,
231 const struct blob_descriptor *blob,
232 const char *altroot, size_t altroot_len)
234 struct reparse_buffer_disk rpbuf;
236 struct link_reparse_point link;
241 bool rpfix_ok = false;
243 /* Not a symbolic link or junction? */
244 if (!inode_is_symlink(inode))
247 /* Retrieve the native Windows "substitute name". */
249 if (wim_inode_get_reparse_point(inode, &rpbuf, &rpbuflen, blob))
252 if (parse_link_reparse_point(&rpbuf, rpbuflen, &link))
255 /* Translate the substitute name to a multibyte string. */
256 if (utf16le_to_tstr(link.substitute_name, link.substitute_name_nbytes,
257 &target_buffer, &target_len))
259 target = target_buffer;
262 * The substitute name is a native Windows NT path. There are two cases:
264 * 1. The reparse point is a symlink (rptag=WIM_IO_REPARSE_TAG_SYMLINK)
265 * and SYMBOLIC_LINK_RELATIVE is set. Windows resolves the path
266 * relative to the directory containing the reparse point file. In
267 * this case, we just translate the path separators.
268 * 2. Otherwise, Windows resolves the path from the root of the Windows
269 * NT kernel object namespace. In this case, we attempt to strip the
270 * device name, in addition to translating the path separators; e.g.
271 * "\??\C:\Users\Public" is translated to "/Users/Public".
273 * Also in case (2) the link target may have been stored as "fixed",
274 * meaning that with the device portion stripped off it is effectively
275 * "relative to the root of the WIM image". If this is the case, and if
276 * the caller provided an alternate root directory, then rewrite the
277 * link to be relative to that directory.
279 if (!link_is_relative_symlink(&link)) {
280 static const char *const nt_root_dirs[] = {
281 "\\??\\", "\\DosDevices\\", "\\Device\\",
283 for (size_t i = 0; i < ARRAY_LEN(nt_root_dirs); i++) {
284 size_t len = strlen(nt_root_dirs[i]);
285 if (!strncmp(target, nt_root_dirs[i], len)) {
286 char *p = target + len;
289 while (*p && *p != '\\')
291 target_len -= (p - target);
297 if (!(inode->i_rp_flags & WIM_RP_FLAG_NOT_FIXED))
301 /* Translate backslashes (Windows NT path separator) to forward slashes
302 * (UNIX path separator). In addition, translate forwards slashes to
303 * backslashes; this enables lossless handling of UNIX symbolic link
304 * targets that contain the backslash character. */
305 for (char *p = target; *p; p++) {
312 /* Copy as much of the link target as possible to the output buffer and
313 * return the number of bytes copied. */
315 if (rpfix_ok && altroot_len != 0) {
316 copy(&buf_ptr, &bufsize, altroot, altroot_len);
317 } else if (target_len == 0) {
318 /* An absolute link target that was made relative to the same
319 * directory pointed to will end up empty if the original target
320 * did not have a trailing slash. Here, we are reading this
321 * adjusted link target without prefixing it. This usually
322 * doesn't happen, but if it does then we need to change it to
323 * "/" so that it is a valid target. */
327 copy(&buf_ptr, &bufsize, target, target_len);
329 return buf_ptr - buf;
332 /* Given a UNIX-style symbolic link target, create a Windows-style reparse point
333 * buffer and assign it to the specified inode. */
335 wim_inode_set_symlink(struct wim_inode *inode, const char *_target,
336 struct blob_table *blob_table)
341 size_t target_nbytes;
342 struct link_reparse_point link;
343 struct reparse_buffer_disk rpbuf;
346 /* Translate the link target to UTF-16LE. */
347 ret = tstr_to_utf16le(_target, strlen(_target), &target, &target_nbytes);
351 /* Translate forward slashes (UNIX path separator) to backslashes
352 * (Windows NT path separator). In addition, translate backslashes to
353 * forward slashes; this enables lossless handling of UNIX symbolic link
354 * targets that contain the backslash character. */
355 for (utf16lechar *p = target; *p; p++) {
356 if (*p == cpu_to_le16('/'))
357 *p = cpu_to_le16('\\');
358 else if (*p == cpu_to_le16('\\'))
359 *p = cpu_to_le16('/');
362 link.rptag = WIM_IO_REPARSE_TAG_SYMLINK;
365 /* Note: an absolute link that was rewritten to be relative to another
366 * directory is assumed to either be empty or to have a leading slash.
367 * See unix_relativize_link_target(). */
368 if (*target == cpu_to_le16('\\') || !*target) {
370 * UNIX link target was absolute. In this case we represent the
371 * link as a symlink reparse point with SYMBOLIC_LINK_RELATIVE
372 * cleared. For this to work we need to assign it a path that
373 * can be resolved from the root of the Windows NT kernel object
374 * namespace. We do this by using "\??\C:" as a dummy prefix.
376 * Note that we could instead represent UNIX absolute links by
377 * setting SYMBOLIC_LINK_RELATIVE and then leaving the path
378 * backslash-prefixed like "\Users\Public". On Windows this is
379 * valid and denotes a path relative to the root of the
380 * filesystem on which the reparse point resides. The problem
381 * with this is that neither WIMGAPI nor wimlib (on Windows)
382 * will do "reparse point fixups" when extracting such links
383 * (modifying the link target to point into the actual
384 * extraction directory). So for the greatest cross-platform
385 * consistency, we have to use the fake C: drive approach.
387 static const utf16lechar prefix[6] = {
396 /* Do not show \??\ in print name */
397 const size_t num_unprintable_chars = 4;
399 link.symlink_flags = 0;
400 link.substitute_name_nbytes = sizeof(prefix) + target_nbytes;
401 link.substitute_name = alloca(link.substitute_name_nbytes);
402 memcpy(link.substitute_name, prefix, sizeof(prefix));
403 memcpy(link.substitute_name + ARRAY_LEN(prefix), target, target_nbytes);
404 link.print_name_nbytes = link.substitute_name_nbytes -
405 (num_unprintable_chars * sizeof(utf16lechar));
406 link.print_name = link.substitute_name + num_unprintable_chars;
408 /* UNIX link target was relative. In this case we represent the
409 * link as a symlink reparse point with SYMBOLIC_LINK_RELATIVE
410 * set. This causes Windows to interpret the link relative to
411 * the directory containing the reparse point file. */
412 link.symlink_flags = SYMBOLIC_LINK_RELATIVE;
413 link.substitute_name_nbytes = target_nbytes;
414 link.substitute_name = target;
415 link.print_name_nbytes = target_nbytes;
416 link.print_name = target;
419 /* Generate the reparse buffer. */
420 ret = make_link_reparse_point(&link, &rpbuf, &rpbuflen);
422 goto out_free_target;
424 /* Save the reparse data with the inode. */
425 ret = WIMLIB_ERR_NOMEM;
426 if (!inode_add_stream_with_data(inode,
427 STREAM_TYPE_REPARSE_POINT,
430 rpbuflen - REPARSE_DATA_OFFSET,
432 goto out_free_target;
434 /* The inode is now a reparse point. */
435 inode->i_reparse_tag = link.rptag;
436 inode->i_attributes &= ~FILE_ATTRIBUTE_NORMAL;
437 inode->i_attributes |= FILE_ATTRIBUTE_REPARSE_POINT;