8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your option)
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more
22 * You should have received a copy of the GNU General Public License
23 * along with wimlib; if not, see http://www.gnu.org/licenses/.
27 #include "buffer_io.h"
28 #include "lookup_table.h"
32 static const utf16lechar volume_junction_prefix[11] = {
46 /* Parse the "substitute name" (link target) from a symbolic link or junction
51 * Non-negative integer:
52 * The name is an absolute symbolic link in one of several formats,
53 * and the return value is the number of UTF-16LE characters that need to
54 * be advanced to reach a simple "absolute" path starting with a backslash
55 * (i.e. skip over \??\ and/or drive letter)
57 * SUBST_NAME_IS_VOLUME_JUNCTION:
58 * The name is a volume junction.
59 * SUBST_NAME_IS_RELATIVE_LINK:
60 * The name is a relative symbolic link.
61 * SUBST_NAME_IS_UNKNOWN:
62 * The name does not appear to be a valid symbolic link, junction,
66 parse_substitute_name(const utf16lechar *substitute_name,
67 u16 substitute_name_nbytes, u32 rptag)
69 u16 substitute_name_nchars = substitute_name_nbytes / 2;
71 if (substitute_name_nchars >= 7 &&
72 substitute_name[0] == cpu_to_le16('\\') &&
73 substitute_name[1] == cpu_to_le16('?') &&
74 substitute_name[2] == cpu_to_le16('?') &&
75 substitute_name[3] == cpu_to_le16('\\') &&
76 substitute_name[4] != cpu_to_le16('\0') &&
77 substitute_name[5] == cpu_to_le16(':') &&
78 substitute_name[6] == cpu_to_le16('\\'))
80 /* "Full" symlink or junction (\??\x:\ prefixed path) */
82 } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
83 substitute_name_nchars >= 12 &&
84 memcmp(substitute_name, volume_junction_prefix,
85 sizeof(volume_junction_prefix)) == 0 &&
86 substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\'))
88 /* Volume junction. Can't really do anything with it. */
89 return SUBST_NAME_IS_VOLUME_JUNCTION;
90 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
91 substitute_name_nchars >= 3 &&
92 substitute_name[0] != cpu_to_le16('\0') &&
93 substitute_name[1] == cpu_to_le16(':') &&
94 substitute_name[2] == cpu_to_le16('\\'))
96 /* "Absolute" symlink, with drive letter */
98 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
99 substitute_name_nchars >= 1)
101 if (substitute_name[0] == cpu_to_le16('\\'))
102 /* "Absolute" symlink, without drive letter */
105 /* "Relative" symlink, without drive letter */
106 return SUBST_NAME_IS_RELATIVE_LINK;
108 return SUBST_NAME_IS_UNKNOWN;
113 * Read the data from a symbolic link, junction, or mount point reparse point
114 * buffer into a `struct reparse_data'.
116 * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
117 * description of the format of the reparse point buffers.
120 parse_reparse_data(const u8 *rpbuf, u16 rpbuflen, struct reparse_data *rpdata)
123 u16 substitute_name_offset;
124 u16 print_name_offset;
126 memset(rpdata, 0, sizeof(*rpdata));
129 p = get_u32(p, &rpdata->rptag);
130 wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK ||
131 rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
132 p = get_u16(p, &rpdata->rpdatalen);
133 p = get_u16(p, &rpdata->rpreserved);
134 p = get_u16(p, &substitute_name_offset);
135 p = get_u16(p, &rpdata->substitute_name_nbytes);
136 p = get_u16(p, &print_name_offset);
137 p = get_u16(p, &rpdata->print_name_nbytes);
138 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
141 p = get_u32(p, &rpdata->rpflags);
143 if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes +
144 (p - rpbuf) > rpbuflen)
146 if ((size_t)print_name_offset + rpdata->print_name_nbytes +
147 (p - rpbuf) > rpbuflen)
149 rpdata->substitute_name = (utf16lechar*)&p[substitute_name_offset];
150 rpdata->print_name = (utf16lechar*)&p[print_name_offset];
153 ERROR("Invalid reparse data");
154 return WIMLIB_ERR_INVALID_REPARSE_DATA;
158 * Create a reparse point data buffer.
160 * @rpdata: Structure that contains the data we need.
162 * @rpbuf: Buffer into which to write the reparse point data buffer. Must be
163 * at least REPARSE_POINT_MAX_SIZE bytes long.
166 make_reparse_buffer(const struct reparse_data *rpdata, u8 *rpbuf)
170 p = put_u32(p, rpdata->rptag);
171 p += 2; /* We set ReparseDataLength later */
172 p = put_u16(p, rpdata->rpreserved);
173 p = put_u16(p, 0); /* substitute name offset */
174 p = put_u16(p, rpdata->substitute_name_nbytes); /* substitute name nbytes */
175 p = put_u16(p, rpdata->substitute_name_nbytes + 2); /* print name offset */
176 p = put_u16(p, rpdata->print_name_nbytes); /* print name nbytes */
177 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
178 p = put_u32(p, rpdata->rpflags);
179 /* We null-terminate the substitute and print names, although this may
180 * not be strictly necessary. Note that the byte counts should not
181 * include the null terminators. */
182 if (p + rpdata->substitute_name_nbytes +
183 rpdata->print_name_nbytes +
184 2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE)
186 ERROR("Reparse data is too long!");
187 return WIMLIB_ERR_INVALID_REPARSE_DATA;
189 p = put_bytes(p, rpdata->substitute_name_nbytes, rpdata->substitute_name);
191 p = put_bytes(p, rpdata->print_name_nbytes, rpdata->print_name);
193 put_u16(rpbuf + 4, p - rpbuf - 8); /* Set ReparseDataLength */
198 * Read the reparse data from a WIM inode that is a reparse point.
200 * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which
201 * the reparse point data buffer will be reconstructed.
203 * Note: in the WIM format, the first 8 bytes of the reparse point data buffer
204 * are omitted, presumably because we already know the reparse tag from the
205 * dentry, and we already know the reparse tag length from the lookup table
206 * entry resource length. However, we reconstruct the first 8 bytes in the
207 * buffer returned by this function.
210 wim_inode_get_reparse_data(const struct wim_inode *inode, u8 *rpbuf)
212 struct wim_lookup_table_entry *lte;
215 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
217 lte = inode_unnamed_lte_resolved(inode);
219 ERROR("Reparse point has no reparse data!");
220 return WIMLIB_ERR_INVALID_REPARSE_DATA;
222 if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE - 8) {
223 ERROR("Reparse data is too long!");
224 return WIMLIB_ERR_INVALID_REPARSE_DATA;
227 /* Read the data from the WIM file */
228 ret = read_full_resource_into_buf(lte, rpbuf + 8, true);
232 /* Reconstruct the first 8 bytes of the reparse point buffer */
235 put_u32(rpbuf, inode->i_reparse_tag);
237 /* ReparseDataLength */
238 put_u16(rpbuf + 4, wim_resource_size(lte));
241 * XXX this could be one of the unknown fields in the WIM dentry. */
242 put_u16(rpbuf + 6, 0);
246 /* UNIX version of getting and setting the data in reparse points */
247 #if !defined(__WIN32__)
249 /* Get the UNIX symlink target from a WIM inode. The inode may be either a
250 * "real" symlink (reparse tag WIM_IO_REPARSE_TAG_SYMLINK), or it may be a
251 * junction point (reparse tag WIM_IO_REPARSE_TAG_MOUNT_POINT).
253 * This has similar semantics to the UNIX readlink() function, except the path
254 * argument is swapped out with the `struct wim_inode' for a reparse point, and
255 * on failure a negated error code is returned rather than -1 with errno set. */
257 wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize)
260 u8 rpbuf[REPARSE_POINT_MAX_SIZE];
262 struct reparse_data rpdata;
264 char *translated_target;
265 size_t link_target_len;
267 wimlib_assert(inode_is_symlink(inode));
269 if (wim_inode_get_reparse_data(inode, rpbuf))
272 get_u16(rpbuf + 4, &rpdatalen);
274 if (parse_reparse_data(rpbuf, rpdatalen + 8, &rpdata))
277 ret = utf16le_to_tstr(rpdata.substitute_name,
278 rpdata.substitute_name_nbytes,
279 &link_target, &link_target_len);
283 translated_target = link_target;
284 ret = parse_substitute_name(rpdata.substitute_name,
285 rpdata.substitute_name_nbytes,
288 case SUBST_NAME_IS_RELATIVE_LINK:
289 goto out_translate_slashes;
290 case SUBST_NAME_IS_VOLUME_JUNCTION:
292 case SUBST_NAME_IS_UNKNOWN:
293 ERROR("Can't understand reparse point "
294 "substitute name \"%s\"", link_target);
297 translated_target += ret;
298 link_target_len -= ret;
302 out_translate_slashes:
303 for (size_t i = 0; i < link_target_len; i++)
304 if (translated_target[i] == '\\')
305 translated_target[i] = '/';
307 if (link_target_len > bufsize) {
308 link_target_len = bufsize;
311 ret = link_target_len;
313 memcpy(buf, translated_target, link_target_len);
323 wim_inode_set_symlink(struct wim_inode *inode,
325 struct wim_lookup_table *lookup_table)
328 u8 rpbuf[REPARSE_POINT_MAX_SIZE];
330 struct reparse_data rpdata;
331 static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
332 static const char abs_print_name_prefix[4] = "C\0:\0";
333 utf16lechar *name_utf16le;
334 size_t name_utf16le_nbytes;
337 DEBUG("Creating reparse point data buffer for UNIX "
338 "symlink target \"%s\"", target);
339 memset(&rpdata, 0, sizeof(rpdata));
340 ret = tstr_to_utf16le(target, strlen(target),
341 &name_utf16le, &name_utf16le_nbytes);
345 for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
346 if (name_utf16le[i] == cpu_to_le16('/'))
347 name_utf16le[i] = cpu_to_le16('\\');
349 /* Compatability notes:
351 * On UNIX, an absolute symbolic link begins with '/'; everything else
352 * is a relative symbolic link. (Quite simple compared to the various
353 * ways to provide Windows paths.)
355 * To change a UNIX relative symbolic link to Windows format, we only
356 * need to translate it to UTF-16LE and replace backslashes with forward
357 * slashes. We do not make any attempt to handle filename character
358 * problems, such as a link target that itself contains backslashes on
359 * UNIX. Then, for these relative links, we set the reparse header
360 * @flags field to SYMBOLIC_LINK_RELATIVE.
362 * For UNIX absolute symbolic links, we must set the @flags field to 0.
363 * Then, there are multiple options as to actually represent the
364 * absolute link targets:
366 * (1) An absolute path beginning with one backslash character. similar
367 * to UNIX-style, just with a different path separator. Print name same
368 * as substitute name.
370 * (2) Absolute path beginning with drive letter followed by a
371 * backslash. Print name same as substitute name.
373 * (3) Absolute path beginning with drive letter followed by a
374 * backslash; substitute name prefixed with \??\, otherwise same as
377 * We choose option (3) here, and we just assume C: for the drive
378 * letter. The reasoning for this is:
380 * (1) Microsoft imagex.exe has a bug where it does not attempt to do
381 * reparse point fixups for these links, even though they are valid
382 * absolute links. (Note: in this case prefixing the substitute name
383 * with \??\ does not work; it just makes the data unable to be restored
385 * (2) Microsoft imagex.exe will fail when doing reparse point fixups
386 * for these. It apparently contains a bug that causes it to create an
387 * invalid reparse point, which then cannot be restored.
388 * (3) This is the only option I tested for which reparse point fixups
389 * worked properly in Microsoft imagex.exe.
391 * So option (3) it is.
394 rpdata.rptag = inode->i_reparse_tag;
395 if (target[0] == '/') {
396 rpdata.substitute_name_nbytes = name_utf16le_nbytes +
397 sizeof(abs_subst_name_prefix);
398 rpdata.print_name_nbytes = name_utf16le_nbytes +
399 sizeof(abs_print_name_prefix);
400 rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes);
401 rpdata.print_name = alloca(rpdata.print_name_nbytes);
402 memcpy(rpdata.substitute_name, abs_subst_name_prefix,
403 sizeof(abs_subst_name_prefix));
404 memcpy(rpdata.print_name, abs_print_name_prefix,
405 sizeof(abs_print_name_prefix));
406 memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix),
407 name_utf16le, name_utf16le_nbytes);
408 memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix),
409 name_utf16le, name_utf16le_nbytes);
411 rpdata.substitute_name_nbytes = name_utf16le_nbytes;
412 rpdata.print_name_nbytes = name_utf16le_nbytes;
413 rpdata.substitute_name = name_utf16le;
414 rpdata.print_name = name_utf16le;
415 rpdata.rpflags = SYMBOLIC_LINK_RELATIVE;
418 ret = make_reparse_buffer(&rpdata, rpbuf);
420 get_u16(rpbuf + 4, &rpdatalen);
421 ret = inode_set_unnamed_stream(inode, rpbuf + 8, rpdatalen,
428 #include <sys/stat.h>
431 unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
434 if (stat(path, &stbuf)) {
436 WARNING_WITH_ERRNO("Failed to stat \"%s\"", path);
437 /* Treat as a link pointing outside the capture root (it
438 * most likely is). */
439 return WIMLIB_ERR_STAT;
441 *ino_ret = stbuf.st_ino;
442 *dev_ret = stbuf.st_dev;
447 #endif /* !defined(__WIN32__) */
451 # define RP_PATH_SEPARATOR L'\\'
452 # define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
453 # define os_get_ino_and_dev win32_get_file_and_vol_ids
455 # define RP_PATH_SEPARATOR '/'
456 # define is_rp_path_separator(c) ((c) == '/')
457 # define os_get_ino_and_dev unix_get_ino_and_dev
460 /* Fix up absolute symbolic link targets--- mostly shared between UNIX and
463 capture_fixup_absolute_symlink(tchar *dest,
464 u64 capture_root_ino, u64 capture_root_dev)
468 DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
475 while (is_rp_path_separator(*p))
480 ret = os_get_ino_and_dev(dest, &ino, &dev);
483 if (ret) /* stat() failed before we got to the capture root---
484 assume the link points outside it. */
487 if (ino == capture_root_ino && dev == capture_root_dev) {
488 /* Link points inside capture root. Return abbreviated
491 *(p - 1) = RP_PATH_SEPARATOR;
492 while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
498 /* Link points outside capture root. */
504 } while (!is_rp_path_separator(*p) && *p != T('\0'));