8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your option)
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more
22 * You should have received a copy of the GNU General Public License
23 * along with wimlib; if not, see http://www.gnu.org/licenses/.
27 #include "buffer_io.h"
28 #include "lookup_table.h"
36 static const utf16lechar volume_junction_prefix[11] = {
50 /* Parse the "substitute name" (link target) from a symbolic link or junction
55 * Non-negative integer:
56 * The name is an absolute symbolic link in one of several formats,
57 * and the return value is the number of UTF-16LE characters that need to
58 * be advanced to reach a simple "absolute" path starting with a backslash
59 * (i.e. skip over \??\ and/or drive letter)
61 * SUBST_NAME_IS_VOLUME_JUNCTION:
62 * The name is a volume junction.
63 * SUBST_NAME_IS_RELATIVE_LINK:
64 * The name is a relative symbolic link.
65 * SUBST_NAME_IS_UNKNOWN:
66 * The name does not appear to be a valid symbolic link, junction,
70 parse_substitute_name(const utf16lechar *substitute_name,
71 u16 substitute_name_nbytes, u32 rptag)
73 u16 substitute_name_nchars = substitute_name_nbytes / 2;
75 if (substitute_name_nchars >= 7 &&
76 substitute_name[0] == cpu_to_le16('\\') &&
77 substitute_name[1] == cpu_to_le16('?') &&
78 substitute_name[2] == cpu_to_le16('?') &&
79 substitute_name[3] == cpu_to_le16('\\') &&
80 substitute_name[4] != cpu_to_le16('\0') &&
81 substitute_name[5] == cpu_to_le16(':') &&
82 substitute_name[6] == cpu_to_le16('\\'))
84 /* "Full" symlink or junction (\??\x:\ prefixed path) */
86 } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
87 substitute_name_nchars >= 12 &&
88 memcmp(substitute_name, volume_junction_prefix,
89 sizeof(volume_junction_prefix)) == 0 &&
90 substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\'))
92 /* Volume junction. Can't really do anything with it. */
93 return SUBST_NAME_IS_VOLUME_JUNCTION;
94 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
95 substitute_name_nchars >= 3 &&
96 substitute_name[0] != cpu_to_le16('\0') &&
97 substitute_name[1] == cpu_to_le16(':') &&
98 substitute_name[2] == cpu_to_le16('\\'))
100 /* "Absolute" symlink, with drive letter */
102 } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
103 substitute_name_nchars >= 1)
105 if (substitute_name[0] == cpu_to_le16('\\'))
106 /* "Absolute" symlink, without drive letter */
109 /* "Relative" symlink, without drive letter */
110 return SUBST_NAME_IS_RELATIVE_LINK;
112 return SUBST_NAME_IS_UNKNOWN;
117 * Read the data from a symbolic link, junction, or mount point reparse point
118 * buffer into a `struct reparse_data'.
120 * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
121 * description of the format of the reparse point buffers.
124 parse_reparse_data(const u8 *rpbuf, u16 rpbuflen, struct reparse_data *rpdata)
127 u16 substitute_name_offset;
128 u16 print_name_offset;
130 memset(rpdata, 0, sizeof(*rpdata));
133 p = get_u32(p, &rpdata->rptag);
134 wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK ||
135 rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
136 p = get_u16(p, &rpdata->rpdatalen);
137 p = get_u16(p, &rpdata->rpreserved);
138 p = get_u16(p, &substitute_name_offset);
139 p = get_u16(p, &rpdata->substitute_name_nbytes);
140 p = get_u16(p, &print_name_offset);
141 p = get_u16(p, &rpdata->print_name_nbytes);
142 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
145 p = get_u32(p, &rpdata->rpflags);
147 if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes +
148 (p - rpbuf) > rpbuflen)
150 if ((size_t)print_name_offset + rpdata->print_name_nbytes +
151 (p - rpbuf) > rpbuflen)
153 rpdata->substitute_name = (utf16lechar*)&p[substitute_name_offset];
154 rpdata->print_name = (utf16lechar*)&p[print_name_offset];
157 ERROR("Invalid reparse data");
158 return WIMLIB_ERR_INVALID_REPARSE_DATA;
162 * Create a reparse point data buffer.
164 * @rpdata: Structure that contains the data we need.
166 * @rpbuf: Buffer into which to write the reparse point data buffer. Must be
167 * at least REPARSE_POINT_MAX_SIZE bytes long.
170 make_reparse_buffer(const struct reparse_data *rpdata, u8 *rpbuf)
174 p = put_u32(p, rpdata->rptag);
175 p += 2; /* We set ReparseDataLength later */
176 p = put_u16(p, rpdata->rpreserved);
177 p = put_u16(p, 0); /* substitute name offset */
178 p = put_u16(p, rpdata->substitute_name_nbytes); /* substitute name nbytes */
179 p = put_u16(p, rpdata->substitute_name_nbytes + 2); /* print name offset */
180 p = put_u16(p, rpdata->print_name_nbytes); /* print name nbytes */
181 if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
182 p = put_u32(p, rpdata->rpflags);
183 /* We null-terminate the substitute and print names, although this may
184 * not be strictly necessary. Note that the byte counts should not
185 * include the null terminators. */
186 if (p + rpdata->substitute_name_nbytes +
187 rpdata->print_name_nbytes +
188 2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE)
190 ERROR("Reparse data is too long!");
191 return WIMLIB_ERR_INVALID_REPARSE_DATA;
193 p = put_bytes(p, rpdata->substitute_name_nbytes, rpdata->substitute_name);
195 p = put_bytes(p, rpdata->print_name_nbytes, rpdata->print_name);
197 put_u16(rpbuf + 4, p - rpbuf - 8); /* Set ReparseDataLength */
202 * Read the reparse data from a WIM inode that is a reparse point.
204 * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which
205 * the reparse point data buffer will be reconstructed.
207 * Note: in the WIM format, the first 8 bytes of the reparse point data buffer
208 * are omitted, presumably because we already know the reparse tag from the
209 * dentry, and we already know the reparse tag length from the lookup table
210 * entry resource length. However, we reconstruct the first 8 bytes in the
211 * buffer returned by this function.
214 wim_inode_get_reparse_data(const struct wim_inode *inode, u8 *rpbuf)
216 struct wim_lookup_table_entry *lte;
219 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
221 lte = inode_unnamed_lte_resolved(inode);
223 ERROR("Reparse point has no reparse data!");
224 return WIMLIB_ERR_INVALID_REPARSE_DATA;
226 if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE - 8) {
227 ERROR("Reparse data is too long!");
228 return WIMLIB_ERR_INVALID_REPARSE_DATA;
231 /* Read the data from the WIM file */
232 ret = read_full_resource_into_buf(lte, rpbuf + 8);
236 /* Reconstruct the first 8 bytes of the reparse point buffer */
239 put_u32(rpbuf, inode->i_reparse_tag);
241 /* ReparseDataLength */
242 put_u16(rpbuf + 4, wim_resource_size(lte));
245 * XXX this could be one of the unknown fields in the WIM dentry. */
246 put_u16(rpbuf + 6, 0);
250 /* UNIX version of getting and setting the data in reparse points */
251 #if !defined(__WIN32__)
253 /* Get the UNIX symlink target from a WIM inode. The inode may be either a
254 * "real" symlink (reparse tag WIM_IO_REPARSE_TAG_SYMLINK), or it may be a
255 * junction point (reparse tag WIM_IO_REPARSE_TAG_MOUNT_POINT).
257 * This has similar semantics to the UNIX readlink() function, except the path
258 * argument is swapped out with the `struct wim_inode' for a reparse point, and
259 * on failure a negated error code is returned rather than -1 with errno set. */
261 wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize)
264 u8 rpbuf[REPARSE_POINT_MAX_SIZE];
266 struct reparse_data rpdata;
268 char *translated_target;
269 size_t link_target_len;
271 wimlib_assert(inode_is_symlink(inode));
273 if (wim_inode_get_reparse_data(inode, rpbuf))
276 get_u16(rpbuf + 4, &rpdatalen);
278 if (parse_reparse_data(rpbuf, rpdatalen + 8, &rpdata))
281 ret = utf16le_to_tstr(rpdata.substitute_name,
282 rpdata.substitute_name_nbytes,
283 &link_target, &link_target_len);
287 translated_target = link_target;
288 ret = parse_substitute_name(rpdata.substitute_name,
289 rpdata.substitute_name_nbytes,
292 case SUBST_NAME_IS_RELATIVE_LINK:
293 goto out_translate_slashes;
294 case SUBST_NAME_IS_VOLUME_JUNCTION:
296 case SUBST_NAME_IS_UNKNOWN:
297 ERROR("Can't understand reparse point "
298 "substitute name \"%s\"", link_target);
301 translated_target += ret;
302 link_target_len -= ret;
306 out_translate_slashes:
307 for (size_t i = 0; i < link_target_len; i++)
308 if (translated_target[i] == '\\')
309 translated_target[i] = '/';
311 if (link_target_len > bufsize) {
312 link_target_len = bufsize;
315 ret = link_target_len;
317 memcpy(buf, translated_target, link_target_len);
323 wim_inode_set_symlink(struct wim_inode *inode,
325 struct wim_lookup_table *lookup_table)
328 u8 rpbuf[REPARSE_POINT_MAX_SIZE];
330 struct reparse_data rpdata;
331 static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
332 static const char abs_print_name_prefix[4] = "C\0:\0";
333 utf16lechar *name_utf16le;
334 size_t name_utf16le_nbytes;
337 DEBUG("Creating reparse point data buffer for UNIX "
338 "symlink target \"%s\"", target);
339 memset(&rpdata, 0, sizeof(rpdata));
340 ret = tstr_to_utf16le(target, strlen(target),
341 &name_utf16le, &name_utf16le_nbytes);
345 for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
346 if (name_utf16le[i] == cpu_to_le16('/'))
347 name_utf16le[i] = cpu_to_le16('\\');
349 /* Compatability notes:
351 * On UNIX, an absolute symbolic link begins with '/'; everything else
352 * is a relative symbolic link. (Quite simple compared to the various
353 * ways to provide Windows paths.)
355 * To change a UNIX relative symbolic link to Windows format, we only
356 * need to translate it to UTF-16LE and replace backslashes with forward
357 * slashes. We do not make any attempt to handle filename character
358 * problems, such as a link target that itself contains backslashes on
359 * UNIX. Then, for these relative links, we set the reparse header
360 * @flags field to SYMBOLIC_LINK_RELATIVE.
362 * For UNIX absolute symbolic links, we must set the @flags field to 0.
363 * Then, there are multiple options as to actually represent the
364 * absolute link targets:
366 * (1) An absolute path beginning with one backslash character. similar
367 * to UNIX-style, just with a different path separator. Print name same
368 * as substitute name.
370 * (2) Absolute path beginning with drive letter followed by a
371 * backslash. Print name same as substitute name.
373 * (3) Absolute path beginning with drive letter followed by a
374 * backslash; substitute name prefixed with \??\, otherwise same as
377 * We choose option (3) here, and we just assume C: for the drive
378 * letter. The reasoning for this is:
380 * (1) Microsoft imagex.exe has a bug where it does not attempt to do
381 * reparse point fixups for these links, even though they are valid
382 * absolute links. (Note: in this case prefixing the substitute name
383 * with \??\ does not work; it just makes the data unable to be restored
385 * (2) Microsoft imagex.exe will fail when doing reparse point fixups
386 * for these. It apparently contains a bug that causes it to create an
387 * invalid reparse point, which then cannot be restored.
388 * (3) This is the only option I tested for which reparse point fixups
389 * worked properly in Microsoft imagex.exe.
391 * So option (3) it is.
394 rpdata.rptag = inode->i_reparse_tag;
395 if (target[0] == '/') {
396 rpdata.substitute_name_nbytes = name_utf16le_nbytes +
397 sizeof(abs_subst_name_prefix);
398 rpdata.print_name_nbytes = name_utf16le_nbytes +
399 sizeof(abs_print_name_prefix);
400 rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes);
401 rpdata.print_name = alloca(rpdata.print_name_nbytes);
402 memcpy(rpdata.substitute_name, abs_subst_name_prefix,
403 sizeof(abs_subst_name_prefix));
404 memcpy(rpdata.print_name, abs_print_name_prefix,
405 sizeof(abs_print_name_prefix));
406 memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix),
407 name_utf16le, name_utf16le_nbytes);
408 memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix),
409 name_utf16le, name_utf16le_nbytes);
411 rpdata.substitute_name_nbytes = name_utf16le_nbytes;
412 rpdata.print_name_nbytes = name_utf16le_nbytes;
413 rpdata.substitute_name = name_utf16le;
414 rpdata.print_name = name_utf16le;
415 rpdata.rpflags = SYMBOLIC_LINK_RELATIVE;
418 ret = make_reparse_buffer(&rpdata, rpbuf);
420 get_u16(rpbuf + 4, &rpdatalen);
421 ret = inode_set_unnamed_stream(inode, rpbuf + 8, rpdatalen,
428 #include <sys/stat.h>
431 unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
434 if (stat(path, &stbuf)) {
436 WARNING_WITH_ERRNO("Failed to stat \"%s\"", path);
437 /* Treat as a link pointing outside the capture root (it
438 * most likely is). */
439 return WIMLIB_ERR_STAT;
441 *ino_ret = stbuf.st_ino;
442 *dev_ret = stbuf.st_dev;
447 #endif /* !defined(__WIN32__) */
451 # define RP_PATH_SEPARATOR L'\\'
452 # define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
453 # define os_get_ino_and_dev win32_get_file_and_vol_ids
455 # define RP_PATH_SEPARATOR '/'
456 # define is_rp_path_separator(c) ((c) == '/')
457 # define os_get_ino_and_dev unix_get_ino_and_dev
460 /* Fix up absolute symbolic link targets--- mostly shared between UNIX and
463 capture_fixup_absolute_symlink(tchar *dest,
464 u64 capture_root_ino, u64 capture_root_dev)
469 /* Skip drive letter */
470 if (!is_rp_path_separator(*dest))
474 DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
481 while (is_rp_path_separator(*p))
486 ret = os_get_ino_and_dev(dest, &ino, &dev);
489 if (ret) /* stat() failed before we got to the capture root---
490 assume the link points outside it. */
493 if (ino == capture_root_ino && dev == capture_root_dev) {
494 /* Link points inside capture root. Return abbreviated
497 *(p - 1) = RP_PATH_SEPARATOR;
498 while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
501 if (!is_rp_path_separator(dest[0])) {
506 wimlib_assert(p >= dest);
511 /* Link points outside capture root. */
517 } while (!is_rp_path_separator(*p) && *p != T('\0'));