2 * reparse.c - Handle reparse data.
6 * Copyright (C) 2012, 2013 Eric Biggers
8 * This file is part of wimlib, a library for working with WIM files.
10 * wimlib is free software; you can redistribute it and/or modify it under the
11 * terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option)
15 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17 * A PARTICULAR PURPOSE. See the GNU General Public License for more
20 * You should have received a copy of the GNU General Public License
21 * along with wimlib; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/assert.h"
29 #include "wimlib/buffer_io.h"
30 #include "wimlib/dentry.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/lookup_table.h"
34 #include "wimlib/reparse.h"
35 #include "wimlib/resource.h"
38 # include "wimlib/win32.h" /* for win32_get_file_and_vol_ids() */
47 static const utf16lechar volume_junction_prefix[11] = {
61 /* Parse the "substitute name" (link target) from a symbolic link or junction
66 * Non-negative integer:
67 * The name is an absolute symbolic link in one of several formats,
68 * and the return value is the number of UTF-16LE characters that need to
69 * be advanced to reach a simple "absolute" path starting with a backslash
70 * (i.e. skip over \??\ and/or drive letter)
72 * SUBST_NAME_IS_VOLUME_JUNCTION:
73 * The name is a volume junction.
74 * SUBST_NAME_IS_RELATIVE_LINK:
75 * The name is a relative symbolic link.
76 * SUBST_NAME_IS_UNKNOWN:
77 * The name does not appear to be a valid symbolic link, junction,
81 parse_substitute_name(const utf16lechar *substitute_name,
82 u16 substitute_name_nbytes, u32 rptag)
84 u16 substitute_name_nchars = substitute_name_nbytes / 2;
86 if (substitute_name_nchars >= 7 &&
87 substitute_name[0] == cpu_to_le16('\\') &&
88 substitute_name[1] == cpu_to_le16('?') &&
89 substitute_name[2] == cpu_to_le16('?') &&
90 substitute_name[3] == cpu_to_le16('\\') &&
91 substitute_name[4] != cpu_to_le16('\0') &&
92 substitute_name[5] == cpu_to_le16(':') &&
93 substitute_name[6] == cpu_to_le16('\\'))
95 /* "Full" symlink or junction (\??\x:\ prefixed path) */
97 } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
98 substitute_name_nchars >= 12 &&
99 memcmp(substitute_name, volume_junction_prefix,
100 sizeof(volume_junction_prefix)) == 0 &&
101 substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\'))
103 /* Volume junction. Can't really do anything with it. */
104 return SUBST_NAME_IS_VOLUME_JUNCTION;
105 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
106 substitute_name_nchars >= 3 &&
107 substitute_name[0] != cpu_to_le16('\0') &&
108 substitute_name[1] == cpu_to_le16(':') &&
109 substitute_name[2] == cpu_to_le16('\\'))
111 /* "Absolute" symlink, with drive letter */
113 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
114 substitute_name_nchars >= 1)
116 if (substitute_name[0] == cpu_to_le16('\\'))
117 /* "Absolute" symlink, without drive letter */
120 /* "Relative" symlink, without drive letter */
121 return SUBST_NAME_IS_RELATIVE_LINK;
123 return SUBST_NAME_IS_UNKNOWN;
128 * Read the data from a symbolic link, junction, or mount point reparse point
129 * buffer into a `struct reparse_data'.
131 * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
132 * description of the format of the reparse point buffers.
135 parse_reparse_data(const u8 *rpbuf, u16 rpbuflen, struct reparse_data *rpdata)
138 u16 substitute_name_offset;
139 u16 print_name_offset;
141 memset(rpdata, 0, sizeof(*rpdata));
144 p = get_u32(p, &rpdata->rptag);
145 wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK ||
146 rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
147 p = get_u16(p, &rpdata->rpdatalen);
148 p = get_u16(p, &rpdata->rpreserved);
149 p = get_u16(p, &substitute_name_offset);
150 p = get_u16(p, &rpdata->substitute_name_nbytes);
151 p = get_u16(p, &print_name_offset);
152 p = get_u16(p, &rpdata->print_name_nbytes);
153 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
156 p = get_u32(p, &rpdata->rpflags);
158 if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes +
159 (p - rpbuf) > rpbuflen)
161 if ((size_t)print_name_offset + rpdata->print_name_nbytes +
162 (p - rpbuf) > rpbuflen)
164 rpdata->substitute_name = (utf16lechar*)&p[substitute_name_offset];
165 rpdata->print_name = (utf16lechar*)&p[print_name_offset];
168 ERROR("Invalid reparse data");
169 return WIMLIB_ERR_INVALID_REPARSE_DATA;
173 * Create a reparse point data buffer.
175 * @rpdata: Structure that contains the data we need.
177 * @rpbuf: Buffer into which to write the reparse point data buffer. Must be
178 * at least REPARSE_POINT_MAX_SIZE bytes long.
181 make_reparse_buffer(const struct reparse_data *rpdata, u8 *rpbuf)
185 p = put_u32(p, rpdata->rptag);
186 p += 2; /* We set ReparseDataLength later */
187 p = put_u16(p, rpdata->rpreserved);
188 p = put_u16(p, 0); /* substitute name offset */
189 p = put_u16(p, rpdata->substitute_name_nbytes); /* substitute name nbytes */
190 p = put_u16(p, rpdata->substitute_name_nbytes + 2); /* print name offset */
191 p = put_u16(p, rpdata->print_name_nbytes); /* print name nbytes */
192 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
193 p = put_u32(p, rpdata->rpflags);
194 /* We null-terminate the substitute and print names, although this may
195 * not be strictly necessary. Note that the byte counts should not
196 * include the null terminators. */
197 if (p + rpdata->substitute_name_nbytes +
198 rpdata->print_name_nbytes +
199 2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE)
201 ERROR("Reparse data is too long!");
202 return WIMLIB_ERR_INVALID_REPARSE_DATA;
204 p = put_bytes(p, rpdata->substitute_name_nbytes, rpdata->substitute_name);
206 p = put_bytes(p, rpdata->print_name_nbytes, rpdata->print_name);
208 put_u16(rpbuf + 4, p - rpbuf - 8); /* Set ReparseDataLength */
213 * Read the reparse data from a WIM inode that is a reparse point.
215 * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which
216 * the reparse point data buffer will be reconstructed.
218 * Note: in the WIM format, the first 8 bytes of the reparse point data buffer
219 * are omitted, presumably because we already know the reparse tag from the
220 * dentry, and we already know the reparse tag length from the lookup table
221 * entry resource length. However, we reconstruct the first 8 bytes in the
222 * buffer returned by this function.
225 wim_inode_get_reparse_data(const struct wim_inode *inode, u8 *rpbuf)
227 struct wim_lookup_table_entry *lte;
230 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
232 lte = inode_unnamed_lte_resolved(inode);
234 ERROR("Reparse point has no reparse data!");
235 return WIMLIB_ERR_INVALID_REPARSE_DATA;
237 if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE - 8) {
238 ERROR("Reparse data is too long!");
239 return WIMLIB_ERR_INVALID_REPARSE_DATA;
242 /* Read the data from the WIM file */
243 ret = read_full_resource_into_buf(lte, rpbuf + 8);
247 /* Reconstruct the first 8 bytes of the reparse point buffer */
250 put_u32(rpbuf, inode->i_reparse_tag);
252 /* ReparseDataLength */
253 put_u16(rpbuf + 4, wim_resource_size(lte));
256 * XXX this could be one of the unknown fields in the WIM dentry. */
257 put_u16(rpbuf + 6, 0);
261 /* UNIX version of getting and setting the data in reparse points */
262 #if !defined(__WIN32__)
264 /* Get the UNIX symlink target from a WIM inode. The inode may be either a
265 * "real" symlink (reparse tag WIM_IO_REPARSE_TAG_SYMLINK), or it may be a
266 * junction point (reparse tag WIM_IO_REPARSE_TAG_MOUNT_POINT).
268 * This has similar semantics to the UNIX readlink() function, except the path
269 * argument is swapped out with the `struct wim_inode' for a reparse point, and
270 * on failure a negated error code is returned rather than -1 with errno set. */
272 wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize)
275 u8 rpbuf[REPARSE_POINT_MAX_SIZE];
277 struct reparse_data rpdata;
279 char *translated_target;
280 size_t link_target_len;
282 wimlib_assert(inode_is_symlink(inode));
284 if (wim_inode_get_reparse_data(inode, rpbuf))
287 get_u16(rpbuf + 4, &rpdatalen);
289 if (parse_reparse_data(rpbuf, rpdatalen + 8, &rpdata))
292 ret = utf16le_to_tstr(rpdata.substitute_name,
293 rpdata.substitute_name_nbytes,
294 &link_target, &link_target_len);
298 translated_target = link_target;
299 ret = parse_substitute_name(rpdata.substitute_name,
300 rpdata.substitute_name_nbytes,
303 case SUBST_NAME_IS_RELATIVE_LINK:
304 goto out_translate_slashes;
305 case SUBST_NAME_IS_VOLUME_JUNCTION:
307 case SUBST_NAME_IS_UNKNOWN:
308 ERROR("Can't understand reparse point "
309 "substitute name \"%s\"", link_target);
312 translated_target += ret;
313 link_target_len -= ret;
317 out_translate_slashes:
318 for (size_t i = 0; i < link_target_len; i++)
319 if (translated_target[i] == '\\')
320 translated_target[i] = '/';
322 if (link_target_len > bufsize) {
323 link_target_len = bufsize;
326 ret = link_target_len;
328 memcpy(buf, translated_target, link_target_len);
334 wim_inode_set_symlink(struct wim_inode *inode,
336 struct wim_lookup_table *lookup_table)
339 u8 rpbuf[REPARSE_POINT_MAX_SIZE];
341 struct reparse_data rpdata;
342 static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
343 static const char abs_print_name_prefix[4] = "C\0:\0";
344 utf16lechar *name_utf16le;
345 size_t name_utf16le_nbytes;
348 DEBUG("Creating reparse point data buffer for UNIX "
349 "symlink target \"%s\"", target);
350 memset(&rpdata, 0, sizeof(rpdata));
351 ret = tstr_to_utf16le(target, strlen(target),
352 &name_utf16le, &name_utf16le_nbytes);
356 for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
357 if (name_utf16le[i] == cpu_to_le16('/'))
358 name_utf16le[i] = cpu_to_le16('\\');
360 /* Compatability notes:
362 * On UNIX, an absolute symbolic link begins with '/'; everything else
363 * is a relative symbolic link. (Quite simple compared to the various
364 * ways to provide Windows paths.)
366 * To change a UNIX relative symbolic link to Windows format, we only
367 * need to translate it to UTF-16LE and replace backslashes with forward
368 * slashes. We do not make any attempt to handle filename character
369 * problems, such as a link target that itself contains backslashes on
370 * UNIX. Then, for these relative links, we set the reparse header
371 * @flags field to SYMBOLIC_LINK_RELATIVE.
373 * For UNIX absolute symbolic links, we must set the @flags field to 0.
374 * Then, there are multiple options as to actually represent the
375 * absolute link targets:
377 * (1) An absolute path beginning with one backslash character. similar
378 * to UNIX-style, just with a different path separator. Print name same
379 * as substitute name.
381 * (2) Absolute path beginning with drive letter followed by a
382 * backslash. Print name same as substitute name.
384 * (3) Absolute path beginning with drive letter followed by a
385 * backslash; substitute name prefixed with \??\, otherwise same as
388 * We choose option (3) here, and we just assume C: for the drive
389 * letter. The reasoning for this is:
391 * (1) Microsoft imagex.exe has a bug where it does not attempt to do
392 * reparse point fixups for these links, even though they are valid
393 * absolute links. (Note: in this case prefixing the substitute name
394 * with \??\ does not work; it just makes the data unable to be restored
396 * (2) Microsoft imagex.exe will fail when doing reparse point fixups
397 * for these. It apparently contains a bug that causes it to create an
398 * invalid reparse point, which then cannot be restored.
399 * (3) This is the only option I tested for which reparse point fixups
400 * worked properly in Microsoft imagex.exe.
402 * So option (3) it is.
405 rpdata.rptag = inode->i_reparse_tag;
406 if (target[0] == '/') {
407 rpdata.substitute_name_nbytes = name_utf16le_nbytes +
408 sizeof(abs_subst_name_prefix);
409 rpdata.print_name_nbytes = name_utf16le_nbytes +
410 sizeof(abs_print_name_prefix);
411 rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes);
412 rpdata.print_name = alloca(rpdata.print_name_nbytes);
413 memcpy(rpdata.substitute_name, abs_subst_name_prefix,
414 sizeof(abs_subst_name_prefix));
415 memcpy(rpdata.print_name, abs_print_name_prefix,
416 sizeof(abs_print_name_prefix));
417 memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix),
418 name_utf16le, name_utf16le_nbytes);
419 memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix),
420 name_utf16le, name_utf16le_nbytes);
422 rpdata.substitute_name_nbytes = name_utf16le_nbytes;
423 rpdata.print_name_nbytes = name_utf16le_nbytes;
424 rpdata.substitute_name = name_utf16le;
425 rpdata.print_name = name_utf16le;
426 rpdata.rpflags = SYMBOLIC_LINK_RELATIVE;
429 ret = make_reparse_buffer(&rpdata, rpbuf);
431 get_u16(rpbuf + 4, &rpdatalen);
432 ret = inode_set_unnamed_stream(inode, rpbuf + 8, rpdatalen,
439 #include <sys/stat.h>
442 unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
445 if (stat(path, &stbuf)) {
447 WARNING_WITH_ERRNO("Failed to stat \"%s\"", path);
448 /* Treat as a link pointing outside the capture root (it
449 * most likely is). */
450 return WIMLIB_ERR_STAT;
452 *ino_ret = stbuf.st_ino;
453 *dev_ret = stbuf.st_dev;
458 #endif /* !defined(__WIN32__) */
461 # define RP_PATH_SEPARATOR L'\\'
462 # define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
463 # define os_get_ino_and_dev win32_get_file_and_vol_ids
465 # define RP_PATH_SEPARATOR '/'
466 # define is_rp_path_separator(c) ((c) == '/')
467 # define os_get_ino_and_dev unix_get_ino_and_dev
470 /* Fix up absolute symbolic link targets--- mostly shared between UNIX and
473 capture_fixup_absolute_symlink(tchar *dest,
474 u64 capture_root_ino, u64 capture_root_dev)
479 /* Skip drive letter */
480 if (!is_rp_path_separator(*dest))
484 DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
491 while (is_rp_path_separator(*p))
496 ret = os_get_ino_and_dev(dest, &ino, &dev);
499 if (ret) /* stat() failed before we got to the capture root---
500 assume the link points outside it. */
503 if (ino == capture_root_ino && dev == capture_root_dev) {
504 /* Link points inside capture root. Return abbreviated
507 *(p - 1) = RP_PATH_SEPARATOR;
508 while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
511 if (!is_rp_path_separator(dest[0])) {
516 wimlib_assert(p >= dest);
521 /* Link points outside capture root. */
527 } while (!is_rp_path_separator(*p) && *p != T('\0'));