2 * reparse.c - Handle reparse data.
6 * Copyright (C) 2012, 2013 Eric Biggers
8 * This file is part of wimlib, a library for working with WIM files.
10 * wimlib is free software; you can redistribute it and/or modify it under the
11 * terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option)
15 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17 * A PARTICULAR PURPOSE. See the GNU General Public License for more
20 * You should have received a copy of the GNU General Public License
21 * along with wimlib; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/assert.h"
29 #include "wimlib/compiler.h"
30 #include "wimlib/endianness.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/inode.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/reparse.h"
36 #include "wimlib/resource.h"
44 /* On-disk format of a symbolic link (WIM_IO_REPARSE_TAG_SYMLINK) or junction
45 * point (WIM_IO_REPARSE_TAG_MOUNT_POINT) reparse data buffer. */
46 struct reparse_buffer_disk {
50 le16 substitute_name_offset;
51 le16 substitute_name_nbytes;
52 le16 print_name_offset;
53 le16 print_name_nbytes;
57 u8 data[REPARSE_POINT_MAX_SIZE - 20];
58 } _packed_attribute symlink;
60 u8 data[REPARSE_POINT_MAX_SIZE - 16];
61 } _packed_attribute junction;
65 static const utf16lechar volume_junction_prefix[11] = {
79 /* Parse the "substitute name" (link target) from a symbolic link or junction
84 * Non-negative integer:
85 * The name is an absolute symbolic link in one of several formats,
86 * and the return value is the number of UTF-16LE characters that need to
87 * be advanced to reach a simple "absolute" path starting with a backslash
88 * (i.e. skip over \??\ and/or drive letter)
90 * SUBST_NAME_IS_VOLUME_JUNCTION:
91 * The name is a volume junction.
92 * SUBST_NAME_IS_RELATIVE_LINK:
93 * The name is a relative symbolic link.
94 * SUBST_NAME_IS_UNKNOWN:
95 * The name does not appear to be a valid symbolic link, junction,
99 parse_substitute_name(const utf16lechar *substitute_name,
100 u16 substitute_name_nbytes, u32 rptag)
102 u16 substitute_name_nchars = substitute_name_nbytes / 2;
104 if (substitute_name_nchars >= 7 &&
105 substitute_name[0] == cpu_to_le16('\\') &&
106 substitute_name[1] == cpu_to_le16('?') &&
107 substitute_name[2] == cpu_to_le16('?') &&
108 substitute_name[3] == cpu_to_le16('\\') &&
109 substitute_name[4] != cpu_to_le16('\0') &&
110 substitute_name[5] == cpu_to_le16(':') &&
111 substitute_name[6] == cpu_to_le16('\\'))
113 /* "Full" symlink or junction (\??\x:\ prefixed path) */
115 } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
116 substitute_name_nchars >= 12 &&
117 memcmp(substitute_name, volume_junction_prefix,
118 sizeof(volume_junction_prefix)) == 0 &&
119 substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\'))
121 /* Volume junction. Can't really do anything with it. */
122 return SUBST_NAME_IS_VOLUME_JUNCTION;
123 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
124 substitute_name_nchars >= 3 &&
125 substitute_name[0] != cpu_to_le16('\0') &&
126 substitute_name[1] == cpu_to_le16(':') &&
127 substitute_name[2] == cpu_to_le16('\\'))
129 /* "Absolute" symlink, with drive letter */
131 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
132 substitute_name_nchars >= 1)
134 if (substitute_name[0] == cpu_to_le16('\\'))
135 /* "Absolute" symlink, without drive letter */
138 /* "Relative" symlink, without drive letter */
139 return SUBST_NAME_IS_RELATIVE_LINK;
141 return SUBST_NAME_IS_UNKNOWN;
146 * Read the data from a symbolic link, junction, or mount point reparse point
147 * buffer into a `struct reparse_data'.
149 * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
150 * description of the format of the reparse point buffers.
153 parse_reparse_data(const u8 * restrict rpbuf, u16 rpbuflen,
154 struct reparse_data * restrict rpdata)
156 u16 substitute_name_offset;
157 u16 print_name_offset;
158 const struct reparse_buffer_disk *rpbuf_disk =
159 (const struct reparse_buffer_disk*)rpbuf;
162 memset(rpdata, 0, sizeof(*rpdata));
165 rpdata->rptag = le32_to_cpu(rpbuf_disk->rptag);
166 wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK ||
167 rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
168 rpdata->rpdatalen = le16_to_cpu(rpbuf_disk->rpdatalen);
169 rpdata->rpreserved = le16_to_cpu(rpbuf_disk->rpreserved);
170 substitute_name_offset = le16_to_cpu(rpbuf_disk->substitute_name_offset);
171 rpdata->substitute_name_nbytes = le16_to_cpu(rpbuf_disk->substitute_name_nbytes);
172 print_name_offset = le16_to_cpu(rpbuf_disk->print_name_offset);
173 rpdata->print_name_nbytes = le16_to_cpu(rpbuf_disk->print_name_nbytes);
175 if ((substitute_name_offset & 1) | (print_name_offset & 1) |
176 (rpdata->substitute_name_nbytes & 1) | (rpdata->print_name_nbytes & 1))
178 /* Names would be unaligned... */
182 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
185 rpdata->rpflags = le32_to_cpu(rpbuf_disk->symlink.rpflags);
186 data = rpbuf_disk->symlink.data;
188 data = rpbuf_disk->junction.data;
190 if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes +
191 (data - rpbuf) > rpbuflen)
193 if ((size_t)print_name_offset + rpdata->print_name_nbytes +
194 (data - rpbuf) > rpbuflen)
196 rpdata->substitute_name = (utf16lechar*)&data[substitute_name_offset];
197 rpdata->print_name = (utf16lechar*)&data[print_name_offset];
200 ERROR("Invalid reparse data");
201 return WIMLIB_ERR_INVALID_REPARSE_DATA;
205 * Create a reparse point data buffer.
207 * @rpdata: Structure that contains the data we need.
209 * @rpbuf: Buffer into which to write the reparse point data buffer. Must be
210 * at least REPARSE_POINT_MAX_SIZE bytes long.
213 make_reparse_buffer(const struct reparse_data * restrict rpdata,
215 u16 * restrict rpbuflen_ret)
217 struct reparse_buffer_disk *rpbuf_disk =
218 (struct reparse_buffer_disk*)rpbuf;
221 rpbuf_disk->rptag = cpu_to_le32(rpdata->rptag);
222 rpbuf_disk->rpreserved = cpu_to_le16(rpdata->rpreserved);
223 rpbuf_disk->substitute_name_offset = cpu_to_le16(0);
224 rpbuf_disk->substitute_name_nbytes = cpu_to_le16(rpdata->substitute_name_nbytes);
225 rpbuf_disk->print_name_offset = cpu_to_le16(rpdata->substitute_name_nbytes + 2);
226 rpbuf_disk->print_name_nbytes = cpu_to_le16(rpdata->print_name_nbytes);
228 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
229 rpbuf_disk->symlink.rpflags = cpu_to_le32(rpdata->rpflags);
230 data = rpbuf_disk->symlink.data;
232 data = rpbuf_disk->junction.data;
235 /* We null-terminate the substitute and print names, although this may
236 * not be strictly necessary. Note that the byte counts should not
237 * include the null terminators. */
238 if (data + rpdata->substitute_name_nbytes +
239 rpdata->print_name_nbytes +
240 2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE)
242 ERROR("Reparse data is too long!");
243 return WIMLIB_ERR_INVALID_REPARSE_DATA;
245 data = mempcpy(data, rpdata->substitute_name, rpdata->substitute_name_nbytes);
246 *(utf16lechar*)data = cpu_to_le16(0);
248 data = mempcpy(data, rpdata->print_name, rpdata->print_name_nbytes);
249 *(utf16lechar*)data = cpu_to_le16(0);
251 rpbuf_disk->rpdatalen = cpu_to_le16(data - rpbuf - 8);
252 *rpbuflen_ret = data - rpbuf;
257 * Read the reparse data from a WIM inode that is a reparse point.
259 * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which
260 * the reparse point data buffer will be reconstructed.
262 * Note: in the WIM format, the first 8 bytes of the reparse point data buffer
263 * are omitted, presumably because we already know the reparse tag from the
264 * dentry, and we already know the reparse tag length from the lookup table
265 * entry resource length. However, we reconstruct the first 8 bytes in the
266 * buffer returned by this function.
269 wim_inode_get_reparse_data(const struct wim_inode * restrict inode,
271 u16 * restrict rpbuflen_ret,
272 struct wim_lookup_table_entry *lte_override)
274 struct wim_lookup_table_entry *lte;
276 struct reparse_buffer_disk *rpbuf_disk;
279 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
282 lte = inode_unnamed_lte_resolved(inode);
284 ERROR("Reparse point has no reparse data!");
285 return WIMLIB_ERR_INVALID_REPARSE_DATA;
291 if (lte->size > REPARSE_POINT_MAX_SIZE - 8) {
292 ERROR("Reparse data is too long!");
293 return WIMLIB_ERR_INVALID_REPARSE_DATA;
295 rpdatalen = lte->size;
297 /* Read the data from the WIM file */
298 ret = read_full_stream_into_buf(lte, rpbuf + 8);
302 /* Reconstruct the first 8 bytes of the reparse point buffer */
303 rpbuf_disk = (struct reparse_buffer_disk*)rpbuf;
306 rpbuf_disk->rptag = cpu_to_le32(inode->i_reparse_tag);
308 /* ReparseDataLength */
309 rpbuf_disk->rpdatalen = cpu_to_le16(rpdatalen);
312 * XXX this could be one of the unknown fields in the WIM dentry. */
313 rpbuf_disk->rpreserved = cpu_to_le16(0);
315 *rpbuflen_ret = rpdatalen + 8;
319 /* UNIX version of getting and setting the data in reparse points */
323 * Get the UNIX-style symlink target from the WIM inode for a reparse point.
324 * Specifically, this translates the target from UTF-16 to the current multibyte
325 * encoding, strips the drive prefix if present, and replaces backslashes with
329 * The inode to read the symlink from. It must be a reparse point with
330 * tag WIM_IO_REPARSE_TAG_SYMLINK (a real symlink) or
331 * WIM_IO_REPARSE_TAG_MOUNT_POINT (a mount point or junction point).
334 * Buffer into which to place the link target.
337 * Available space in @buf, in bytes.
340 * If not NULL, the stream from which to read the reparse data. Otherwise,
341 * the reparse data will be read from the unnamed stream of @inode.
343 * If the entire symbolic link target was placed in the buffer, returns the
344 * number of bytes written. The resulting string is not null-terminated. If
345 * the symbolic link target was too large to be placed in the buffer, the first
346 * @bufsize bytes of it are placed in the buffer and
347 * -ENAMETOOLONG is returned. Otherwise, a negative errno value indicating
348 * another error is returned.
351 wim_inode_readlink(const struct wim_inode * restrict inode,
352 char * restrict buf, size_t bufsize,
353 struct wim_lookup_table_entry *lte_override)
356 struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8);
357 struct reparse_data rpdata;
359 char *translated_target;
360 size_t link_target_len;
363 wimlib_assert(inode_is_symlink(inode));
365 if (wim_inode_get_reparse_data(inode, (u8*)&rpbuf_disk, &rpbuflen,
369 if (parse_reparse_data((const u8*)&rpbuf_disk, rpbuflen, &rpdata))
372 ret = utf16le_to_tstr(rpdata.substitute_name,
373 rpdata.substitute_name_nbytes,
374 &link_target, &link_target_len);
378 translated_target = link_target;
379 ret = parse_substitute_name(rpdata.substitute_name,
380 rpdata.substitute_name_nbytes,
383 case SUBST_NAME_IS_RELATIVE_LINK:
384 goto out_translate_slashes;
385 case SUBST_NAME_IS_VOLUME_JUNCTION:
387 case SUBST_NAME_IS_UNKNOWN:
388 ERROR("Can't understand reparse point "
389 "substitute name \"%s\"", link_target);
391 goto out_free_link_target;
393 translated_target += ret;
394 link_target_len -= ret;
398 out_translate_slashes:
399 for (size_t i = 0; i < link_target_len; i++)
400 if (translated_target[i] == '\\')
401 translated_target[i] = '/';
403 if (link_target_len > bufsize) {
404 link_target_len = bufsize;
407 ret = link_target_len;
409 memcpy(buf, translated_target, link_target_len);
410 out_free_link_target:
416 wim_inode_set_symlink(struct wim_inode *inode,
418 struct wim_lookup_table *lookup_table)
421 struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8);
422 struct reparse_data rpdata;
423 static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
424 static const char abs_print_name_prefix[4] = "C\0:\0";
425 utf16lechar *name_utf16le;
426 size_t name_utf16le_nbytes;
430 DEBUG("Creating reparse point data buffer for UNIX "
431 "symlink target \"%s\"", target);
432 memset(&rpdata, 0, sizeof(rpdata));
433 ret = tstr_to_utf16le(target, strlen(target),
434 &name_utf16le, &name_utf16le_nbytes);
438 for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
439 if (name_utf16le[i] == cpu_to_le16('/'))
440 name_utf16le[i] = cpu_to_le16('\\');
442 /* Compatability notes:
444 * On UNIX, an absolute symbolic link begins with '/'; everything else
445 * is a relative symbolic link. (Quite simple compared to the various
446 * ways to provide Windows paths.)
448 * To change a UNIX relative symbolic link to Windows format, we only
449 * need to translate it to UTF-16LE and replace forward slashes with
450 * backslashes. We do not make any attempt to handle filename character
451 * problems, such as a link target that itself contains backslashes on
452 * UNIX. Then, for these relative links, we set the reparse header
453 * @flags field to SYMBOLIC_LINK_RELATIVE.
455 * For UNIX absolute symbolic links, we must set the @flags field to 0.
456 * Then, there are multiple options as to actually represent the
457 * absolute link targets:
459 * (1) An absolute path beginning with one backslash character. similar
460 * to UNIX-style, just with a different path separator. Print name same
461 * as substitute name.
463 * (2) Absolute path beginning with drive letter followed by a
464 * backslash. Print name same as substitute name.
466 * (3) Absolute path beginning with drive letter followed by a
467 * backslash; substitute name prefixed with \??\, otherwise same as
470 * We choose option (3) here, and we just assume C: for the drive
471 * letter. The reasoning for this is:
473 * (1) Microsoft imagex.exe has a bug where it does not attempt to do
474 * reparse point fixups for these links, even though they are valid
475 * absolute links. (Note: in this case prefixing the substitute name
476 * with \??\ does not work; it just makes the data unable to be restored
478 * (2) Microsoft imagex.exe will fail when doing reparse point fixups
479 * for these. It apparently contains a bug that causes it to create an
480 * invalid reparse point, which then cannot be restored.
481 * (3) This is the only option I tested for which reparse point fixups
482 * worked properly in Microsoft imagex.exe.
484 * So option (3) it is.
487 rpdata.rptag = inode->i_reparse_tag;
488 if (target[0] == '/') {
489 rpdata.substitute_name_nbytes = name_utf16le_nbytes +
490 sizeof(abs_subst_name_prefix);
491 rpdata.print_name_nbytes = name_utf16le_nbytes +
492 sizeof(abs_print_name_prefix);
493 rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes);
494 rpdata.print_name = alloca(rpdata.print_name_nbytes);
495 memcpy(rpdata.substitute_name, abs_subst_name_prefix,
496 sizeof(abs_subst_name_prefix));
497 memcpy(rpdata.print_name, abs_print_name_prefix,
498 sizeof(abs_print_name_prefix));
499 memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix),
500 name_utf16le, name_utf16le_nbytes);
501 memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix),
502 name_utf16le, name_utf16le_nbytes);
504 rpdata.substitute_name_nbytes = name_utf16le_nbytes;
505 rpdata.print_name_nbytes = name_utf16le_nbytes;
506 rpdata.substitute_name = name_utf16le;
507 rpdata.print_name = name_utf16le;
508 rpdata.rpflags = SYMBOLIC_LINK_RELATIVE;
511 ret = make_reparse_buffer(&rpdata, (u8*)&rpbuf_disk, &rpbuflen);
513 ret = inode_set_unnamed_stream(inode,
514 (u8*)&rpbuf_disk + 8,
522 #endif /* !__WIN32__ */