2 * reparse.c - Handle reparse data.
6 * Copyright (C) 2012, 2013 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/alloca.h"
29 #include "wimlib/assert.h"
30 #include "wimlib/blob_table.h"
31 #include "wimlib/compiler.h"
32 #include "wimlib/endianness.h"
33 #include "wimlib/encoding.h"
34 #include "wimlib/error.h"
35 #include "wimlib/inode.h"
36 #include "wimlib/reparse.h"
37 #include "wimlib/resource.h"
40 * Read the data from a symbolic link, junction, or mount point reparse point
41 * buffer into a `struct reparse_data'.
43 * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
44 * description of the format of the reparse point buffers.
47 parse_reparse_data(const u8 * restrict rpbuf, u16 rpbuflen,
48 struct reparse_data * restrict rpdata)
50 u16 substitute_name_offset;
51 u16 print_name_offset;
52 const struct reparse_buffer_disk *rpbuf_disk =
53 (const struct reparse_buffer_disk*)rpbuf;
56 memset(rpdata, 0, sizeof(*rpdata));
59 rpdata->rptag = le32_to_cpu(rpbuf_disk->rptag);
60 wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK ||
61 rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
62 rpdata->rpdatalen = le16_to_cpu(rpbuf_disk->rpdatalen);
63 rpdata->rpreserved = le16_to_cpu(rpbuf_disk->rpreserved);
64 substitute_name_offset = le16_to_cpu(rpbuf_disk->symlink.substitute_name_offset);
65 rpdata->substitute_name_nbytes = le16_to_cpu(rpbuf_disk->symlink.substitute_name_nbytes);
66 print_name_offset = le16_to_cpu(rpbuf_disk->symlink.print_name_offset);
67 rpdata->print_name_nbytes = le16_to_cpu(rpbuf_disk->symlink.print_name_nbytes);
69 if ((substitute_name_offset & 1) | (print_name_offset & 1) |
70 (rpdata->substitute_name_nbytes & 1) | (rpdata->print_name_nbytes & 1))
72 /* Names would be unaligned... */
76 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
79 rpdata->rpflags = le32_to_cpu(rpbuf_disk->symlink.rpflags);
80 data = rpbuf_disk->symlink.data;
82 data = rpbuf_disk->junction.data;
84 if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes +
85 (data - rpbuf) > rpbuflen)
87 if ((size_t)print_name_offset + rpdata->print_name_nbytes +
88 (data - rpbuf) > rpbuflen)
90 rpdata->substitute_name = (utf16lechar*)&data[substitute_name_offset];
91 rpdata->print_name = (utf16lechar*)&data[print_name_offset];
94 ERROR("Invalid reparse data");
95 return WIMLIB_ERR_INVALID_REPARSE_DATA;
99 * Create a reparse point data buffer.
101 * @rpdata: Structure that contains the data we need.
103 * @rpbuf: Buffer into which to write the reparse point data buffer. Must be
104 * at least REPARSE_POINT_MAX_SIZE bytes long.
107 make_reparse_buffer(const struct reparse_data * restrict rpdata,
109 u16 * restrict rpbuflen_ret)
111 struct reparse_buffer_disk *rpbuf_disk =
112 (struct reparse_buffer_disk*)rpbuf;
115 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
116 data = rpbuf_disk->symlink.data;
118 data = rpbuf_disk->junction.data;
120 if ((data - rpbuf) + rpdata->substitute_name_nbytes +
121 rpdata->print_name_nbytes +
122 2 * sizeof(utf16lechar) > REPARSE_POINT_MAX_SIZE)
124 ERROR("Reparse data is too long!");
125 return WIMLIB_ERR_INVALID_REPARSE_DATA;
128 rpbuf_disk->rptag = cpu_to_le32(rpdata->rptag);
129 rpbuf_disk->rpreserved = cpu_to_le16(rpdata->rpreserved);
130 rpbuf_disk->symlink.substitute_name_offset = cpu_to_le16(0);
131 rpbuf_disk->symlink.substitute_name_nbytes = cpu_to_le16(rpdata->substitute_name_nbytes);
132 rpbuf_disk->symlink.print_name_offset = cpu_to_le16(rpdata->substitute_name_nbytes + 2);
133 rpbuf_disk->symlink.print_name_nbytes = cpu_to_le16(rpdata->print_name_nbytes);
135 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
136 rpbuf_disk->symlink.rpflags = cpu_to_le32(rpdata->rpflags);
138 /* We null-terminate the substitute and print names, although this may
139 * not be strictly necessary. Note that the byte counts should not
140 * include the null terminators. */
141 data = mempcpy(data, rpdata->substitute_name, rpdata->substitute_name_nbytes);
142 *(utf16lechar*)data = cpu_to_le16(0);
144 data = mempcpy(data, rpdata->print_name, rpdata->print_name_nbytes);
145 *(utf16lechar*)data = cpu_to_le16(0);
147 rpbuf_disk->rpdatalen = cpu_to_le16(data - rpbuf - REPARSE_DATA_OFFSET);
148 *rpbuflen_ret = data - rpbuf;
152 /* UNIX version of getting and setting the data in reparse points */
156 * Read the reparse data from a WIM inode that is a reparse point.
158 * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which
159 * the reparse point data buffer will be reconstructed.
161 * Note: in the WIM format, the first 8 bytes of the reparse point data buffer
162 * are omitted, presumably because we already know the reparse tag from the
163 * dentry, and we already know the reparse tag length from the blob length.
164 * However, we reconstruct the first 8 bytes in the buffer returned by this
168 wim_inode_get_reparse_data(const struct wim_inode * restrict inode,
170 u16 * restrict rpbuflen_ret,
171 const struct blob_descriptor *blob_override)
173 const struct blob_descriptor *blob;
175 struct reparse_buffer_disk *rpbuf_disk;
178 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
181 blob = blob_override;
183 struct wim_inode_stream *strm;
185 strm = inode_get_unnamed_stream(inode, STREAM_TYPE_REPARSE_POINT);
187 blob = stream_blob_resolved(strm);
191 ERROR("Reparse point has no reparse data!");
192 return WIMLIB_ERR_INVALID_REPARSE_DATA;
196 if (blob->size > REPARSE_DATA_MAX_SIZE) {
197 ERROR("Reparse data is too long!");
198 return WIMLIB_ERR_INVALID_REPARSE_DATA;
200 rpdatalen = blob->size;
202 /* Read the reparse data from blob */
203 ret = read_blob_into_buf(blob, rpbuf + REPARSE_DATA_OFFSET);
207 /* Reconstruct the first 8 bytes of the reparse point buffer */
208 rpbuf_disk = (struct reparse_buffer_disk*)rpbuf;
211 rpbuf_disk->rptag = cpu_to_le32(inode->i_reparse_tag);
213 /* ReparseDataLength */
214 rpbuf_disk->rpdatalen = cpu_to_le16(rpdatalen);
217 * XXX this could be one of the unknown fields in the WIM dentry. */
218 rpbuf_disk->rpreserved = cpu_to_le16(0);
220 *rpbuflen_ret = rpdatalen + REPARSE_DATA_OFFSET;
224 static const utf16lechar volume_junction_prefix[11] = {
239 SUBST_NAME_IS_RELATIVE_LINK = -1,
240 SUBST_NAME_IS_VOLUME_JUNCTION = -2,
241 SUBST_NAME_IS_UNKNOWN = -3,
244 /* Parse the "substitute name" (link target) from a symbolic link or junction
249 * Non-negative integer:
250 * The name is an absolute symbolic link in one of several formats,
251 * and the return value is the number of UTF-16LE characters that need to
252 * be advanced to reach a simple "absolute" path starting with a backslash
253 * (i.e. skip over \??\ and/or drive letter)
255 * SUBST_NAME_IS_VOLUME_JUNCTION:
256 * The name is a volume junction.
257 * SUBST_NAME_IS_RELATIVE_LINK:
258 * The name is a relative symbolic link.
259 * SUBST_NAME_IS_UNKNOWN:
260 * The name does not appear to be a valid symbolic link, junction,
264 parse_substitute_name(const utf16lechar *substitute_name,
265 u16 substitute_name_nbytes, u32 rptag)
267 u16 substitute_name_nchars = substitute_name_nbytes / 2;
269 if (substitute_name_nchars >= 7 &&
270 substitute_name[0] == cpu_to_le16('\\') &&
271 substitute_name[1] == cpu_to_le16('?') &&
272 substitute_name[2] == cpu_to_le16('?') &&
273 substitute_name[3] == cpu_to_le16('\\') &&
274 substitute_name[4] != cpu_to_le16('\0') &&
275 substitute_name[5] == cpu_to_le16(':') &&
276 substitute_name[6] == cpu_to_le16('\\'))
278 /* "Full" symlink or junction (\??\x:\ prefixed path) */
280 } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
281 substitute_name_nchars >= 12 &&
282 memcmp(substitute_name, volume_junction_prefix,
283 sizeof(volume_junction_prefix)) == 0 &&
284 substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\'))
286 /* Volume junction. Can't really do anything with it. */
287 return SUBST_NAME_IS_VOLUME_JUNCTION;
288 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
289 substitute_name_nchars >= 3 &&
290 substitute_name[0] != cpu_to_le16('\0') &&
291 substitute_name[1] == cpu_to_le16(':') &&
292 substitute_name[2] == cpu_to_le16('\\'))
294 /* "Absolute" symlink, with drive letter */
296 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
297 substitute_name_nchars >= 1)
299 if (substitute_name[0] == cpu_to_le16('\\'))
300 /* "Absolute" symlink, without drive letter */
303 /* "Relative" symlink, without drive letter */
304 return SUBST_NAME_IS_RELATIVE_LINK;
306 return SUBST_NAME_IS_UNKNOWN;
311 * Get the UNIX-style symlink target from the WIM inode for a reparse point.
312 * Specifically, this translates the target from UTF-16 to the current multibyte
313 * encoding, strips the drive prefix if present, and swaps backslashes and
317 * The inode to read the symlink from. It must be a reparse point with
318 * tag WIM_IO_REPARSE_TAG_SYMLINK (a real symlink) or
319 * WIM_IO_REPARSE_TAG_MOUNT_POINT (a mount point or junction point).
322 * Buffer into which to place the link target.
325 * Available space in @buf, in bytes.
328 * If not NULL, the blob from which to read the reparse data. Otherwise,
329 * the reparse data will be read from the reparse point stream of @inode.
331 * If the entire symbolic link target was placed in the buffer, returns the
332 * number of bytes written. The resulting string is not null-terminated. If
333 * the symbolic link target was too large to be placed in the buffer, the first
334 * @bufsize bytes of it are placed in the buffer and
335 * -ENAMETOOLONG is returned. Otherwise, a negative errno value indicating
336 * another error is returned.
339 wim_inode_readlink(const struct wim_inode * restrict inode,
340 char * restrict buf, size_t bufsize,
341 const struct blob_descriptor *blob_override)
344 struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8);
345 struct reparse_data rpdata;
347 char *translated_target;
348 size_t link_target_len;
351 wimlib_assert(inode_is_symlink(inode));
353 if (wim_inode_get_reparse_data(inode, (u8*)&rpbuf_disk, &rpbuflen,
357 if (parse_reparse_data((const u8*)&rpbuf_disk, rpbuflen, &rpdata))
360 ret = utf16le_to_tstr(rpdata.substitute_name,
361 rpdata.substitute_name_nbytes,
362 &link_target, &link_target_len);
366 translated_target = link_target;
367 ret = parse_substitute_name(rpdata.substitute_name,
368 rpdata.substitute_name_nbytes,
371 case SUBST_NAME_IS_RELATIVE_LINK:
372 goto out_translate_slashes;
373 case SUBST_NAME_IS_VOLUME_JUNCTION:
375 case SUBST_NAME_IS_UNKNOWN:
376 ERROR("Can't understand reparse point "
377 "substitute name \"%s\"", link_target);
379 goto out_free_link_target;
381 translated_target += ret;
382 link_target_len -= ret;
386 out_translate_slashes:
387 for (size_t i = 0; i < link_target_len; i++) {
388 if (translated_target[i] == '\\')
389 translated_target[i] = '/';
390 else if (translated_target[i] == '/')
391 translated_target[i] = '\\';
394 if (link_target_len > bufsize) {
395 link_target_len = bufsize;
398 ret = link_target_len;
400 memcpy(buf, translated_target, link_target_len);
401 out_free_link_target:
406 /* Given a UNIX-style symbolic link target, create a Windows-style reparse point
407 * buffer and assign it to the specified inode. */
409 wim_inode_set_symlink(struct wim_inode *inode, const char *target,
410 struct blob_table *blob_table)
413 struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8);
414 struct reparse_data rpdata;
415 static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
416 static const char abs_print_name_prefix[4] = "C\0:\0";
417 utf16lechar *name_utf16le;
418 size_t name_utf16le_nbytes;
422 DEBUG("Creating reparse point data buffer for UNIX "
423 "symlink target \"%s\"", target);
424 memset(&rpdata, 0, sizeof(rpdata));
425 ret = tstr_to_utf16le(target, strlen(target),
426 &name_utf16le, &name_utf16le_nbytes);
430 for (size_t i = 0; i < name_utf16le_nbytes / 2; i++) {
431 if (name_utf16le[i] == cpu_to_le16('/'))
432 name_utf16le[i] = cpu_to_le16('\\');
433 else if (name_utf16le[i] == cpu_to_le16('\\'))
434 name_utf16le[i] = cpu_to_le16('/');
437 /* Compatability notes:
439 * On UNIX, an absolute symbolic link begins with '/'; everything else
440 * is a relative symbolic link. (Quite simple compared to the various
441 * ways to provide Windows paths.)
443 * To change a UNIX relative symbolic link to Windows format, we need to
444 * translate it to UTF-16LE, swap forward slashes and backslashes, and
445 * set 'rpflags' to SYMBOLIC_LINK_RELATIVE.
447 * For UNIX absolute symbolic links, we must set the @flags field to 0.
448 * Then, there are multiple options as to actually represent the
449 * absolute link targets:
451 * (1) An absolute path beginning with one backslash character. similar
452 * to UNIX-style, just with a different path separator. Print name same
453 * as substitute name.
455 * (2) Absolute path beginning with drive letter followed by a
456 * backslash. Print name same as substitute name.
458 * (3) Absolute path beginning with drive letter followed by a
459 * backslash; substitute name prefixed with \??\, otherwise same as
462 * We choose option (3) here, and we just assume C: for the drive
463 * letter. The reasoning for this is:
465 * (1) Microsoft imagex.exe has a bug where it does not attempt to do
466 * reparse point fixups for these links, even though they are valid
467 * absolute links. (Note: in this case prefixing the substitute name
468 * with \??\ does not work; it just makes the data unable to be restored
470 * (2) Microsoft imagex.exe will fail when doing reparse point fixups
471 * for these. It apparently contains a bug that causes it to create an
472 * invalid reparse point, which then cannot be restored.
473 * (3) This is the only option I tested for which reparse point fixups
474 * worked properly in Microsoft imagex.exe.
476 * So option (3) it is.
479 rpdata.rptag = inode->i_reparse_tag;
480 if (target[0] == '/') {
481 rpdata.substitute_name_nbytes = name_utf16le_nbytes +
482 sizeof(abs_subst_name_prefix);
483 rpdata.print_name_nbytes = name_utf16le_nbytes +
484 sizeof(abs_print_name_prefix);
485 rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes);
486 rpdata.print_name = alloca(rpdata.print_name_nbytes);
487 memcpy(rpdata.substitute_name, abs_subst_name_prefix,
488 sizeof(abs_subst_name_prefix));
489 memcpy(rpdata.print_name, abs_print_name_prefix,
490 sizeof(abs_print_name_prefix));
491 memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix),
492 name_utf16le, name_utf16le_nbytes);
493 memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix),
494 name_utf16le, name_utf16le_nbytes);
496 rpdata.substitute_name_nbytes = name_utf16le_nbytes;
497 rpdata.print_name_nbytes = name_utf16le_nbytes;
498 rpdata.substitute_name = name_utf16le;
499 rpdata.print_name = name_utf16le;
500 rpdata.rpflags = SYMBOLIC_LINK_RELATIVE;
503 ret = make_reparse_buffer(&rpdata, (u8*)&rpbuf_disk, &rpbuflen);
507 ret = WIMLIB_ERR_NOMEM;
508 if (!inode_add_stream_with_data(inode,
509 STREAM_TYPE_REPARSE_POINT,
511 (u8*)&rpbuf_disk + REPARSE_DATA_OFFSET,
512 rpbuflen - REPARSE_DATA_OFFSET,
523 #endif /* !__WIN32__ */