2 * reparse.c - Handle reparse data.
6 * Copyright (C) 2012, 2013 Eric Biggers
8 * This file is part of wimlib, a library for working with WIM files.
10 * wimlib is free software; you can redistribute it and/or modify it under the
11 * terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option)
15 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17 * A PARTICULAR PURPOSE. See the GNU General Public License for more
20 * You should have received a copy of the GNU General Public License
21 * along with wimlib; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/assert.h"
29 #include "wimlib/compiler.h"
30 #include "wimlib/endianness.h"
31 #include "wimlib/dentry.h"
32 #include "wimlib/encoding.h"
33 #include "wimlib/error.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/reparse.h"
36 #include "wimlib/resource.h"
39 # include "wimlib/win32.h" /* for win32_get_file_and_vol_ids() */
48 /* On-disk format of a symbolic link (WIM_IO_REPARSE_TAG_SYMLINK) or junction
49 * point (WIM_IO_REPARSE_TAG_MOUNT_POINT) reparse data buffer. */
50 struct reparse_buffer_disk {
54 le16 substitute_name_offset;
55 le16 substitute_name_nbytes;
56 le16 print_name_offset;
57 le16 print_name_nbytes;
61 u8 data[REPARSE_POINT_MAX_SIZE - 20];
62 } _packed_attribute symlink;
64 u8 data[REPARSE_POINT_MAX_SIZE - 16];
65 } _packed_attribute junction;
69 static const utf16lechar volume_junction_prefix[11] = {
83 /* Parse the "substitute name" (link target) from a symbolic link or junction
88 * Non-negative integer:
89 * The name is an absolute symbolic link in one of several formats,
90 * and the return value is the number of UTF-16LE characters that need to
91 * be advanced to reach a simple "absolute" path starting with a backslash
92 * (i.e. skip over \??\ and/or drive letter)
94 * SUBST_NAME_IS_VOLUME_JUNCTION:
95 * The name is a volume junction.
96 * SUBST_NAME_IS_RELATIVE_LINK:
97 * The name is a relative symbolic link.
98 * SUBST_NAME_IS_UNKNOWN:
99 * The name does not appear to be a valid symbolic link, junction,
103 parse_substitute_name(const utf16lechar *substitute_name,
104 u16 substitute_name_nbytes, u32 rptag)
106 u16 substitute_name_nchars = substitute_name_nbytes / 2;
108 if (substitute_name_nchars >= 7 &&
109 substitute_name[0] == cpu_to_le16('\\') &&
110 substitute_name[1] == cpu_to_le16('?') &&
111 substitute_name[2] == cpu_to_le16('?') &&
112 substitute_name[3] == cpu_to_le16('\\') &&
113 substitute_name[4] != cpu_to_le16('\0') &&
114 substitute_name[5] == cpu_to_le16(':') &&
115 substitute_name[6] == cpu_to_le16('\\'))
117 /* "Full" symlink or junction (\??\x:\ prefixed path) */
119 } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
120 substitute_name_nchars >= 12 &&
121 memcmp(substitute_name, volume_junction_prefix,
122 sizeof(volume_junction_prefix)) == 0 &&
123 substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\'))
125 /* Volume junction. Can't really do anything with it. */
126 return SUBST_NAME_IS_VOLUME_JUNCTION;
127 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
128 substitute_name_nchars >= 3 &&
129 substitute_name[0] != cpu_to_le16('\0') &&
130 substitute_name[1] == cpu_to_le16(':') &&
131 substitute_name[2] == cpu_to_le16('\\'))
133 /* "Absolute" symlink, with drive letter */
135 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
136 substitute_name_nchars >= 1)
138 if (substitute_name[0] == cpu_to_le16('\\'))
139 /* "Absolute" symlink, without drive letter */
142 /* "Relative" symlink, without drive letter */
143 return SUBST_NAME_IS_RELATIVE_LINK;
145 return SUBST_NAME_IS_UNKNOWN;
150 * Read the data from a symbolic link, junction, or mount point reparse point
151 * buffer into a `struct reparse_data'.
153 * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
154 * description of the format of the reparse point buffers.
157 parse_reparse_data(const u8 * restrict rpbuf, u16 rpbuflen,
158 struct reparse_data * restrict rpdata)
160 u16 substitute_name_offset;
161 u16 print_name_offset;
162 const struct reparse_buffer_disk *rpbuf_disk =
163 (const struct reparse_buffer_disk*)rpbuf;
166 memset(rpdata, 0, sizeof(*rpdata));
169 rpdata->rptag = le32_to_cpu(rpbuf_disk->rptag);
170 wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK ||
171 rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
172 rpdata->rpdatalen = le16_to_cpu(rpbuf_disk->rpdatalen);
173 rpdata->rpreserved = le16_to_cpu(rpbuf_disk->rpreserved);
174 substitute_name_offset = le16_to_cpu(rpbuf_disk->substitute_name_offset);
175 rpdata->substitute_name_nbytes = le16_to_cpu(rpbuf_disk->substitute_name_nbytes);
176 print_name_offset = le16_to_cpu(rpbuf_disk->print_name_offset);
177 rpdata->print_name_nbytes = le16_to_cpu(rpbuf_disk->print_name_nbytes);
179 if ((substitute_name_offset & 1) | (print_name_offset & 1) |
180 (rpdata->substitute_name_nbytes & 1) | (rpdata->print_name_nbytes & 1))
182 /* Names would be unaligned... */
186 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
189 rpdata->rpflags = le32_to_cpu(rpbuf_disk->symlink.rpflags);
190 data = rpbuf_disk->symlink.data;
192 data = rpbuf_disk->junction.data;
194 if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes +
195 (data - rpbuf) > rpbuflen)
197 if ((size_t)print_name_offset + rpdata->print_name_nbytes +
198 (data - rpbuf) > rpbuflen)
200 rpdata->substitute_name = (utf16lechar*)&data[substitute_name_offset];
201 rpdata->print_name = (utf16lechar*)&data[print_name_offset];
204 ERROR("Invalid reparse data");
205 return WIMLIB_ERR_INVALID_REPARSE_DATA;
209 * Create a reparse point data buffer.
211 * @rpdata: Structure that contains the data we need.
213 * @rpbuf: Buffer into which to write the reparse point data buffer. Must be
214 * at least REPARSE_POINT_MAX_SIZE bytes long.
217 make_reparse_buffer(const struct reparse_data * restrict rpdata,
219 u16 * restrict rpbuflen_ret)
221 struct reparse_buffer_disk *rpbuf_disk =
222 (struct reparse_buffer_disk*)rpbuf;
225 rpbuf_disk->rptag = cpu_to_le32(rpdata->rptag);
226 rpbuf_disk->rpreserved = cpu_to_le16(rpdata->rpreserved);
227 rpbuf_disk->substitute_name_offset = cpu_to_le16(0);
228 rpbuf_disk->substitute_name_nbytes = cpu_to_le16(rpdata->substitute_name_nbytes);
229 rpbuf_disk->print_name_offset = cpu_to_le16(rpdata->substitute_name_nbytes + 2);
230 rpbuf_disk->print_name_nbytes = cpu_to_le16(rpdata->print_name_nbytes);
232 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
233 rpbuf_disk->symlink.rpflags = cpu_to_le32(rpdata->rpflags);
234 data = rpbuf_disk->symlink.data;
236 data = rpbuf_disk->junction.data;
239 /* We null-terminate the substitute and print names, although this may
240 * not be strictly necessary. Note that the byte counts should not
241 * include the null terminators. */
242 if (data + rpdata->substitute_name_nbytes +
243 rpdata->print_name_nbytes +
244 2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE)
246 ERROR("Reparse data is too long!");
247 return WIMLIB_ERR_INVALID_REPARSE_DATA;
249 data = mempcpy(data, rpdata->substitute_name, rpdata->substitute_name_nbytes);
250 *(utf16lechar*)data = cpu_to_le16(0);
252 data = mempcpy(data, rpdata->print_name, rpdata->print_name_nbytes);
253 *(utf16lechar*)data = cpu_to_le16(0);
255 rpbuf_disk->rpdatalen = cpu_to_le16(data - rpbuf - 8);
256 *rpbuflen_ret = data - rpbuf;
261 * Read the reparse data from a WIM inode that is a reparse point.
263 * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which
264 * the reparse point data buffer will be reconstructed.
266 * Note: in the WIM format, the first 8 bytes of the reparse point data buffer
267 * are omitted, presumably because we already know the reparse tag from the
268 * dentry, and we already know the reparse tag length from the lookup table
269 * entry resource length. However, we reconstruct the first 8 bytes in the
270 * buffer returned by this function.
273 wim_inode_get_reparse_data(const struct wim_inode * restrict inode,
275 u16 * restrict rpbuflen_ret,
276 struct wim_lookup_table_entry *lte_override)
278 struct wim_lookup_table_entry *lte;
280 struct reparse_buffer_disk *rpbuf_disk;
283 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
286 lte = inode_unnamed_lte_resolved(inode);
288 ERROR("Reparse point has no reparse data!");
289 return WIMLIB_ERR_INVALID_REPARSE_DATA;
295 if (lte->size > REPARSE_POINT_MAX_SIZE - 8) {
296 ERROR("Reparse data is too long!");
297 return WIMLIB_ERR_INVALID_REPARSE_DATA;
299 rpdatalen = lte->size;
301 /* Read the data from the WIM file */
302 ret = read_full_stream_into_buf(lte, rpbuf + 8);
306 /* Reconstruct the first 8 bytes of the reparse point buffer */
307 rpbuf_disk = (struct reparse_buffer_disk*)rpbuf;
310 rpbuf_disk->rptag = cpu_to_le32(inode->i_reparse_tag);
312 /* ReparseDataLength */
313 rpbuf_disk->rpdatalen = cpu_to_le16(rpdatalen);
316 * XXX this could be one of the unknown fields in the WIM dentry. */
317 rpbuf_disk->rpreserved = cpu_to_le16(0);
319 *rpbuflen_ret = rpdatalen + 8;
323 /* UNIX version of getting and setting the data in reparse points */
324 #if !defined(__WIN32__)
326 /* Get the UNIX symlink target from a WIM inode. The inode may be either a
327 * "real" symlink (reparse tag WIM_IO_REPARSE_TAG_SYMLINK), or it may be a
328 * junction point (reparse tag WIM_IO_REPARSE_TAG_MOUNT_POINT).
330 * This has similar semantics to the UNIX readlink() function, except the path
331 * argument is swapped out with the `struct wim_inode' for a reparse point, and
332 * on failure a negated error code is returned rather than -1 with errno set. */
334 wim_inode_readlink(const struct wim_inode * restrict inode,
335 char * restrict buf, size_t bufsize,
336 struct wim_lookup_table_entry *lte_override)
339 struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8);
340 struct reparse_data rpdata;
342 char *translated_target;
343 size_t link_target_len;
346 wimlib_assert(inode_is_symlink(inode));
348 if (wim_inode_get_reparse_data(inode, (u8*)&rpbuf_disk, &rpbuflen,
352 if (parse_reparse_data((const u8*)&rpbuf_disk, rpbuflen, &rpdata))
355 ret = utf16le_to_tstr(rpdata.substitute_name,
356 rpdata.substitute_name_nbytes,
357 &link_target, &link_target_len);
361 translated_target = link_target;
362 ret = parse_substitute_name(rpdata.substitute_name,
363 rpdata.substitute_name_nbytes,
366 case SUBST_NAME_IS_RELATIVE_LINK:
367 goto out_translate_slashes;
368 case SUBST_NAME_IS_VOLUME_JUNCTION:
370 case SUBST_NAME_IS_UNKNOWN:
371 ERROR("Can't understand reparse point "
372 "substitute name \"%s\"", link_target);
374 goto out_free_link_target;
376 translated_target += ret;
377 link_target_len -= ret;
381 out_translate_slashes:
382 for (size_t i = 0; i < link_target_len; i++)
383 if (translated_target[i] == '\\')
384 translated_target[i] = '/';
386 if (link_target_len > bufsize) {
387 link_target_len = bufsize;
390 ret = link_target_len;
392 memcpy(buf, translated_target, link_target_len);
393 out_free_link_target:
399 wim_inode_set_symlink(struct wim_inode *inode,
401 struct wim_lookup_table *lookup_table)
404 struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8);
405 struct reparse_data rpdata;
406 static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
407 static const char abs_print_name_prefix[4] = "C\0:\0";
408 utf16lechar *name_utf16le;
409 size_t name_utf16le_nbytes;
413 DEBUG("Creating reparse point data buffer for UNIX "
414 "symlink target \"%s\"", target);
415 memset(&rpdata, 0, sizeof(rpdata));
416 ret = tstr_to_utf16le(target, strlen(target),
417 &name_utf16le, &name_utf16le_nbytes);
421 for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
422 if (name_utf16le[i] == cpu_to_le16('/'))
423 name_utf16le[i] = cpu_to_le16('\\');
425 /* Compatability notes:
427 * On UNIX, an absolute symbolic link begins with '/'; everything else
428 * is a relative symbolic link. (Quite simple compared to the various
429 * ways to provide Windows paths.)
431 * To change a UNIX relative symbolic link to Windows format, we only
432 * need to translate it to UTF-16LE and replace forward slashes with
433 * backslashes. We do not make any attempt to handle filename character
434 * problems, such as a link target that itself contains backslashes on
435 * UNIX. Then, for these relative links, we set the reparse header
436 * @flags field to SYMBOLIC_LINK_RELATIVE.
438 * For UNIX absolute symbolic links, we must set the @flags field to 0.
439 * Then, there are multiple options as to actually represent the
440 * absolute link targets:
442 * (1) An absolute path beginning with one backslash character. similar
443 * to UNIX-style, just with a different path separator. Print name same
444 * as substitute name.
446 * (2) Absolute path beginning with drive letter followed by a
447 * backslash. Print name same as substitute name.
449 * (3) Absolute path beginning with drive letter followed by a
450 * backslash; substitute name prefixed with \??\, otherwise same as
453 * We choose option (3) here, and we just assume C: for the drive
454 * letter. The reasoning for this is:
456 * (1) Microsoft imagex.exe has a bug where it does not attempt to do
457 * reparse point fixups for these links, even though they are valid
458 * absolute links. (Note: in this case prefixing the substitute name
459 * with \??\ does not work; it just makes the data unable to be restored
461 * (2) Microsoft imagex.exe will fail when doing reparse point fixups
462 * for these. It apparently contains a bug that causes it to create an
463 * invalid reparse point, which then cannot be restored.
464 * (3) This is the only option I tested for which reparse point fixups
465 * worked properly in Microsoft imagex.exe.
467 * So option (3) it is.
470 rpdata.rptag = inode->i_reparse_tag;
471 if (target[0] == '/') {
472 rpdata.substitute_name_nbytes = name_utf16le_nbytes +
473 sizeof(abs_subst_name_prefix);
474 rpdata.print_name_nbytes = name_utf16le_nbytes +
475 sizeof(abs_print_name_prefix);
476 rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes);
477 rpdata.print_name = alloca(rpdata.print_name_nbytes);
478 memcpy(rpdata.substitute_name, abs_subst_name_prefix,
479 sizeof(abs_subst_name_prefix));
480 memcpy(rpdata.print_name, abs_print_name_prefix,
481 sizeof(abs_print_name_prefix));
482 memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix),
483 name_utf16le, name_utf16le_nbytes);
484 memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix),
485 name_utf16le, name_utf16le_nbytes);
487 rpdata.substitute_name_nbytes = name_utf16le_nbytes;
488 rpdata.print_name_nbytes = name_utf16le_nbytes;
489 rpdata.substitute_name = name_utf16le;
490 rpdata.print_name = name_utf16le;
491 rpdata.rpflags = SYMBOLIC_LINK_RELATIVE;
494 ret = make_reparse_buffer(&rpdata, (u8*)&rpbuf_disk, &rpbuflen);
496 ret = inode_set_unnamed_stream(inode,
497 (u8*)&rpbuf_disk + 8,
505 #include <sys/stat.h>
508 unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
511 if (stat(path, &stbuf)) {
513 WARNING_WITH_ERRNO("Failed to stat \"%s\"", path);
514 /* Treat as a link pointing outside the capture root (it
515 * most likely is). */
516 return WIMLIB_ERR_STAT;
518 *ino_ret = stbuf.st_ino;
519 *dev_ret = stbuf.st_dev;
524 #endif /* !defined(__WIN32__) */
526 /* is_rp_path_separator() - characters treated as path separators in absolute
527 * symbolic link targets */
530 # define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
531 # define os_get_ino_and_dev win32_get_file_and_vol_ids
533 # define is_rp_path_separator(c) ((c) == '/')
534 # define os_get_ino_and_dev unix_get_ino_and_dev
537 /* Fix up absolute symbolic link targets--- mostly shared between UNIX and
540 capture_fixup_absolute_symlink(tchar *dest,
541 u64 capture_root_ino, u64 capture_root_dev)
546 /* Skip drive letter */
547 if (!is_rp_path_separator(*dest))
551 DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
558 while (is_rp_path_separator(*p))
563 ret = os_get_ino_and_dev(dest, &ino, &dev);
566 if (ret) /* stat() failed before we got to the capture root---
567 assume the link points outside it. */
570 if (ino == capture_root_ino && dev == capture_root_dev) {
571 /* Link points inside capture root. Return abbreviated
574 *(p - 1) = OS_PREFERRED_PATH_SEPARATOR;
575 while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
578 if (!is_rp_path_separator(dest[0])) {
583 wimlib_assert(p >= dest);
588 /* Link points outside capture root. */
594 } while (!is_rp_path_separator(*p) && *p != T('\0'));