4 * Code to read and set symbolic links in WIM files.
8 * Copyright (C) 2012, 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free
14 * Software Foundation; either version 3 of the License, or (at your option)
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more
22 * You should have received a copy of the GNU General Public License
23 * along with wimlib; if not, see http://www.gnu.org/licenses/.
27 #include "buffer_io.h"
28 #include "lookup_table.h"
32 /* UNIX version of getting and setting the data in reparse points */
33 #if !defined(__WIN32__)
42 * Find the symlink target of a symbolic link or junction point in the WIM.
44 * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
45 * description of the format of the so-called "reparse point data buffers".
47 * But, in the WIM format, the first 8 bytes of the reparse point data buffer
48 * are omitted, presumably because we already know the reparse tag from the
49 * dentry, and we already know the reparse tag length from the lookup table
50 * entry resource length.
53 get_symlink_name(const void *resource, size_t resource_len, char *buf,
54 size_t buf_len, u32 reparse_tag)
56 const void *p = resource;
57 u16 substitute_name_offset;
58 u16 substitute_name_len;
59 u16 print_name_offset;
62 char *translated_target;
63 size_t link_target_len;
66 bool translate_slashes;
68 if (resource_len < 12)
70 p = get_u16(p, &substitute_name_offset);
71 p = get_u16(p, &substitute_name_len);
72 p = get_u16(p, &print_name_offset);
73 p = get_u16(p, &print_name_len);
75 wimlib_assert(reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK ||
76 reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
78 if (reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT)
85 substitute_name_offset + substitute_name_len > resource_len)
88 ret = utf16le_to_tstr((const utf16lechar*)(p + substitute_name_offset),
90 &link_target, &link_target_len);
94 DEBUG("Interpeting substitute name \"%s\" (ReparseTag=0x%x)",
95 link_target, reparse_tag);
96 translate_slashes = true;
97 translated_target = link_target;
98 if (link_target_len >= 7 &&
99 translated_target[0] == '\\' &&
100 translated_target[1] == '?' &&
101 translated_target[2] == '?' &&
102 translated_target[3] == '\\' &&
103 translated_target[4] != '\0' &&
104 translated_target[5] == ':' &&
105 translated_target[6] == '\\')
107 /* "Full" symlink or junction (\??\x:\ prefixed path) */
108 translated_target += 6;
109 link_target_len -= 6;
110 } else if (reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
111 link_target_len >= 12 &&
112 memcmp(translated_target, "\\\\?\\Volume{", 11) == 0 &&
113 translated_target[link_target_len - 1] == '\\')
115 /* Volume junction. Can't really do anything with it. */
116 translate_slashes = false;
117 } else if (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK &&
118 link_target_len >= 3 &&
119 translated_target[0] != '\0' &&
120 translated_target[1] == ':' &&
121 translated_target[2] == '\\')
123 /* "Absolute" symlink, with drive letter */
124 translated_target += 2;
125 link_target_len -= 2;
126 } else if (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK &&
127 link_target_len >= 1)
129 if (translated_target[0] == '\\')
130 /* "Absolute" symlink, without drive letter */
133 /* "Relative" symlink, without drive letter */
136 ERROR("Invalid reparse point substitute name: \"%s\"", translated_target);
141 if (translate_slashes)
142 for (size_t i = 0; i < link_target_len; i++)
143 if (translated_target[i] == '\\')
144 translated_target[i] = '/';
146 if (link_target_len > buf_len) {
147 link_target_len = buf_len;
150 ret = link_target_len;
152 memcpy(buf, translated_target, link_target_len);
158 #define SYMBOLIC_LINK_RELATIVE 0x00000001
160 /* Given a UNIX symlink target, prepare the corresponding symbolic link reparse
163 make_symlink_reparse_data_buf(const char *symlink_target, void *rpdata,
167 utf16lechar *name_utf16le;
168 size_t name_utf16le_nbytes;
169 size_t substitute_name_nbytes;
170 size_t print_name_nbytes;
171 static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
172 static const char abs_print_name_prefix[4] = "C\0:\0";
177 ret = tstr_to_utf16le(symlink_target, strlen(symlink_target),
178 &name_utf16le, &name_utf16le_nbytes);
182 for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
183 if (name_utf16le[i] == cpu_to_le16('/'))
184 name_utf16le[i] = cpu_to_le16('\\');
186 /* Compatability notes:
188 * On UNIX, an absolute symbolic link begins with '/'; everything else
189 * is a relative symbolic link. (Quite simple compared to the various
190 * ways to provide Windows paths.)
192 * To change a UNIX relative symbolic link to Windows format, we only
193 * need to translate it to UTF-16LE and replace backslashes with forward
194 * slashes. We do not make any attempt to handle filename character
195 * problems, such as a link target that itself contains backslashes on
196 * UNIX. Then, for these relative links, we set the reparse header
197 * @flags field to SYMBOLIC_LINK_RELATIVE.
199 * For UNIX absolute symbolic links, we must set the @flags field to 0.
200 * Then, there are multiple options as to actually represent the
201 * absolute link targets:
203 * (1) An absolute path beginning with one backslash character. similar
204 * to UNIX-style, just with a different path separator. Print name same
205 * as substitute name.
207 * (2) Absolute path beginning with drive letter followed by a
208 * backslash. Print name same as substitute name.
210 * (3) Absolute path beginning with drive letter followed by a
211 * backslash; substitute name prefixed with \??\, otherwise same as
214 * We choose option (3) here, and we just assume C: for the drive
215 * letter. The reasoning for this is:
217 * (1) Microsoft imagex.exe has a bug where it does not attempt to do
218 * reparse point fixups for these links, even though they are valid
219 * absolute links. (Note: in this case prefixing the substitute name
220 * with \??\ does not work; it just makes the data unable to be restored
222 * (2) Microsoft imagex.exe will fail when doing reparse point fixups
223 * for these. It apparently contains a bug that causes it to create an
224 * invalid reparse point, which then cannot be restored.
225 * (3) This is the only option I tested for which reparse point fixups
226 * worked properly in Microsoft imagex.exe.
228 * So option (3) it is.
231 substitute_name_nbytes = name_utf16le_nbytes;
232 print_name_nbytes = name_utf16le_nbytes;
233 if (symlink_target[0] == '/') {
234 substitute_name_nbytes += sizeof(abs_subst_name_prefix);
235 print_name_nbytes += sizeof(abs_print_name_prefix);
238 rplen = 12 + substitute_name_nbytes + print_name_nbytes +
239 2 * sizeof(utf16lechar);
241 if (rplen > REPARSE_POINT_MAX_SIZE) {
242 ERROR("Symlink \"%s\" is too long!", symlink_target);
243 return WIMLIB_ERR_LINK;
248 /* Substitute name offset */
251 /* Substitute name length */
252 p = put_u16(p, substitute_name_nbytes);
254 /* Print name offset */
255 p = put_u16(p, substitute_name_nbytes + sizeof(utf16lechar));
257 /* Print name length */
258 p = put_u16(p, print_name_nbytes);
262 if (symlink_target[0] != '/')
263 flags |= SYMBOLIC_LINK_RELATIVE;
264 p = put_u32(p, flags);
266 /* Substitute name */
267 if (symlink_target[0] == '/')
268 p = put_bytes(p, sizeof(abs_subst_name_prefix), abs_subst_name_prefix);
269 p = put_bytes(p, name_utf16le_nbytes, name_utf16le);
273 if (symlink_target[0] == '/')
274 p = put_bytes(p, sizeof(abs_print_name_prefix), abs_print_name_prefix);
275 p = put_bytes(p, name_utf16le_nbytes, name_utf16le);
280 out_free_name_utf16le:
285 /* Get the symlink target from a WIM inode.
287 * The inode may be either a "real" symlink (reparse tag
288 * WIM_IO_REPARSE_TAG_SYMLINK), or it may be a junction point (reparse tag
289 * WIM_IO_REPARSE_TAG_MOUNT_POINT).
292 inode_readlink(const struct wim_inode *inode, char *buf, size_t buf_len,
293 const WIMStruct *w, bool threadsafe)
295 const struct wim_lookup_table_entry *lte;
299 wimlib_assert(inode_is_symlink(inode));
301 lte = inode_unnamed_lte(inode, w->lookup_table);
305 if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE)
308 res_buf = alloca(wim_resource_size(lte));
309 ret = read_full_resource_into_buf(lte, res_buf, threadsafe);
312 return get_symlink_name(res_buf, wim_resource_size(lte),
313 buf, buf_len, inode->i_reparse_tag);
317 * Sets @inode to be a symbolic link pointing to @target.
319 * A lookup table entry for the symbolic link data buffer is created and
320 * inserted into @lookup_table, unless there is an existing lookup table entry
321 * for the exact same data, in which its reference count is incremented.
323 * The lookup table entry is returned in @lte_ret.
325 * On failure @dentry and @lookup_table are not modified.
328 inode_set_symlink(struct wim_inode *inode,
330 struct wim_lookup_table *lookup_table,
331 struct wim_lookup_table_entry **lte_ret)
336 /* Buffer for reparse point data */
337 u8 rpdata[REPARSE_POINT_MAX_SIZE];
339 /* Actual length of the reparse point data (to be calculated by
340 * make_symlink_reparse_data_buf()) */
343 DEBUG("Creating reparse point data buffer "
344 "for UNIX symlink target \"%s\"", target);
346 ret = make_symlink_reparse_data_buf(target, rpdata, &rplen);
350 ret = inode_set_unnamed_stream(inode, rpdata, rplen, lookup_table);
355 *lte_ret = inode->i_lte;
360 unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
363 if (stat(path, &stbuf)) {
364 WARNING_WITH_ERRNO("Failed to stat \"%s\"", path);
365 /* Treat as a link pointing outside the capture root (it
366 * most likely is). */
367 return WIMLIB_ERR_STAT;
369 *ino_ret = stbuf.st_ino;
370 *dev_ret = stbuf.st_dev;
375 #endif /* !defined(__WIN32__) */
379 # define RP_PATH_SEPARATOR L'\\'
380 # define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
381 # define os_get_ino_and_dev win32_get_file_and_vol_ids
383 # define RP_PATH_SEPARATOR '/'
384 # define is_rp_path_separator(c) ((c) == '/')
385 # define os_get_ino_and_dev unix_get_ino_and_dev
388 /* Fix up absolute symbolic link targets--- mostly shared between UNIX and
391 fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev)
395 DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
402 while (is_rp_path_separator(*p))
407 ret = os_get_ino_and_dev(dest, &ino, &dev);
410 if (ret) /* stat() failed before we got to the capture root---
411 assume the link points outside it. */
414 if (ino == capture_root_ino && dev == capture_root_dev) {
415 /* Link points inside capture root. Return abbreviated
418 *(p - 1) = RP_PATH_SEPARATOR;
419 while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
425 /* Link points outside capture root. */
431 } while (!is_rp_path_separator(*p) && *p != T('\0'));