X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Freparse.c;h=f96fd49b7af592b63b3d6dd8f0b020a722489a3d;hp=446b6f9f3765fc74f1c89c5eca495338be9cc53f;hb=d1dd8bba3c12104e730ea1b2989ecbd6156658c5;hpb=f2f293a1759c81e7bd5deb904c3909368f3feaa5 diff --git a/src/reparse.c b/src/reparse.c index 446b6f9f..f96fd49b 100644 --- a/src/reparse.c +++ b/src/reparse.c @@ -1,583 +1,445 @@ /* - * reparse.c - Handle reparse data. + * reparse.c - Reparse point handling */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2015 Eric Biggers * - * This file is part of wimlib, a library for working with WIM files. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. - * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif -#include "wimlib/assert.h" -#include "wimlib/compiler.h" +#include + +#include "wimlib/alloca.h" +#include "wimlib/blob_table.h" #include "wimlib/endianness.h" -#include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" +#include "wimlib/guid.h" +#include "wimlib/inode.h" #include "wimlib/reparse.h" #include "wimlib/resource.h" -#ifdef __WIN32__ -# include "wimlib/win32.h" /* for win32_get_file_and_vol_ids() */ -#endif - -#ifdef HAVE_ALLOCA_H -# include -#endif -#include -#include - -struct reparse_buffer_disk { - le32 rptag; - le16 rpdatalen; - le16 rpreserved; - le16 substitute_name_offset; - le16 substitute_name_nbytes; - le16 print_name_offset; - le16 print_name_nbytes; - union { - struct { - le32 rpflags; - u8 data[REPARSE_POINT_MAX_SIZE - 20]; - } _packed_attribute symlink; - struct { - u8 data[REPARSE_POINT_MAX_SIZE - 16]; - } _packed_attribute junction; - }; -} _packed_attribute; - -static const utf16lechar volume_junction_prefix[11] = { - cpu_to_le16('\\'), - cpu_to_le16('\\'), - cpu_to_le16('?'), - cpu_to_le16('\\'), - cpu_to_le16('V'), - cpu_to_le16('o'), - cpu_to_le16('l'), - cpu_to_le16('u'), - cpu_to_le16('m'), - cpu_to_le16('e'), - cpu_to_le16('{'), -}; - -/* Parse the "substitute name" (link target) from a symbolic link or junction - * reparse point. - * - * Return value is: - * - * Non-negative integer: - * The name is an absolute symbolic link in one of several formats, - * and the return value is the number of UTF-16LE characters that need to - * be advanced to reach a simple "absolute" path starting with a backslash - * (i.e. skip over \??\ and/or drive letter) - * Negative integer: - * SUBST_NAME_IS_VOLUME_JUNCTION: - * The name is a volume junction. - * SUBST_NAME_IS_RELATIVE_LINK: - * The name is a relative symbolic link. - * SUBST_NAME_IS_UNKNOWN: - * The name does not appear to be a valid symbolic link, junction, - * or mount point. +/* + * Reconstruct the header of a reparse point buffer. This is necessary because + * only reparse data is stored in WIM files. The reparse tag is instead stored + * in the on-disk WIM dentry, and the reparse data length is equal to the size + * of the blob in which the reparse data was stored, minus the size of a GUID + * (16 bytes) if the reparse tag does not have the "Microsoft" bit set. */ -int -parse_substitute_name(const utf16lechar *substitute_name, - u16 substitute_name_nbytes, u32 rptag) +void +complete_reparse_point(struct reparse_buffer_disk *rpbuf, + const struct wim_inode *inode, u16 blob_size) { - u16 substitute_name_nchars = substitute_name_nbytes / 2; - - if (substitute_name_nchars >= 7 && - substitute_name[0] == cpu_to_le16('\\') && - substitute_name[1] == cpu_to_le16('?') && - substitute_name[2] == cpu_to_le16('?') && - substitute_name[3] == cpu_to_le16('\\') && - substitute_name[4] != cpu_to_le16('\0') && - substitute_name[5] == cpu_to_le16(':') && - substitute_name[6] == cpu_to_le16('\\')) - { - /* "Full" symlink or junction (\??\x:\ prefixed path) */ - return 6; - } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT && - substitute_name_nchars >= 12 && - memcmp(substitute_name, volume_junction_prefix, - sizeof(volume_junction_prefix)) == 0 && - substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\')) - { - /* Volume junction. Can't really do anything with it. */ - return SUBST_NAME_IS_VOLUME_JUNCTION; - } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK && - substitute_name_nchars >= 3 && - substitute_name[0] != cpu_to_le16('\0') && - substitute_name[1] == cpu_to_le16(':') && - substitute_name[2] == cpu_to_le16('\\')) - { - /* "Absolute" symlink, with drive letter */ - return 2; - } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK && - substitute_name_nchars >= 1) - { - if (substitute_name[0] == cpu_to_le16('\\')) - /* "Absolute" symlink, without drive letter */ - return 0; - else - /* "Relative" symlink, without drive letter */ - return SUBST_NAME_IS_RELATIVE_LINK; - } else { - return SUBST_NAME_IS_UNKNOWN; - } + rpbuf->rptag = cpu_to_le32(inode->i_reparse_tag); + if (blob_size >= GUID_SIZE && !(inode->i_reparse_tag & 0x80000000)) + blob_size -= GUID_SIZE; + rpbuf->rpdatalen = cpu_to_le16(blob_size); + rpbuf->rpreserved = cpu_to_le16(inode->i_rp_reserved); } -/* - * Read the data from a symbolic link, junction, or mount point reparse point - * buffer into a `struct reparse_data'. - * - * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a - * description of the format of the reparse point buffers. - */ +/* Parse the buffer for a symbolic link or junction reparse point and fill in a + * 'struct link_reparse_point'. */ int -parse_reparse_data(const u8 * restrict rpbuf, u16 rpbuflen, - struct reparse_data * restrict rpdata) +parse_link_reparse_point(const struct reparse_buffer_disk *rpbuf, u16 rpbuflen, + struct link_reparse_point *link) { u16 substitute_name_offset; u16 print_name_offset; - const struct reparse_buffer_disk *rpbuf_disk = - (const struct reparse_buffer_disk*)rpbuf; const u8 *data; - memset(rpdata, 0, sizeof(*rpdata)); - if (rpbuflen < 16) - goto out_invalid; - rpdata->rptag = le32_to_cpu(rpbuf_disk->rptag); - wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK || - rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT); - rpdata->rpdatalen = le16_to_cpu(rpbuf_disk->rpdatalen); - rpdata->rpreserved = le16_to_cpu(rpbuf_disk->rpreserved); - substitute_name_offset = le16_to_cpu(rpbuf_disk->substitute_name_offset); - rpdata->substitute_name_nbytes = le16_to_cpu(rpbuf_disk->substitute_name_nbytes); - print_name_offset = le16_to_cpu(rpbuf_disk->print_name_offset); - rpdata->print_name_nbytes = le16_to_cpu(rpbuf_disk->print_name_nbytes); - - if ((substitute_name_offset & 1) | (print_name_offset & 1) | - (rpdata->substitute_name_nbytes & 1) | (rpdata->print_name_nbytes & 1)) - { - /* Names would be unaligned... */ - goto out_invalid; - } + link->rptag = le32_to_cpu(rpbuf->rptag); + + /* Not a symbolic link or junction? */ + if (link->rptag != WIM_IO_REPARSE_TAG_SYMLINK && + link->rptag != WIM_IO_REPARSE_TAG_MOUNT_POINT) + return WIMLIB_ERR_INVALID_REPARSE_DATA; + + /* Is the buffer too small to be a symlink or a junction? */ + if (rpbuflen < offsetof(struct reparse_buffer_disk, link.junction.data)) + return WIMLIB_ERR_INVALID_REPARSE_DATA; - if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) { - if (rpbuflen < 20) - goto out_invalid; - rpdata->rpflags = le16_to_cpu(rpbuf_disk->symlink.rpflags); - data = rpbuf_disk->symlink.data; + link->rpreserved = le16_to_cpu(rpbuf->rpreserved); + link->substitute_name_nbytes = le16_to_cpu(rpbuf->link.substitute_name_nbytes); + substitute_name_offset = le16_to_cpu(rpbuf->link.substitute_name_offset); + link->print_name_nbytes = le16_to_cpu(rpbuf->link.print_name_nbytes); + print_name_offset = le16_to_cpu(rpbuf->link.print_name_offset); + + /* The names must be properly sized and aligned. */ + if ((substitute_name_offset | print_name_offset | + link->substitute_name_nbytes | link->print_name_nbytes) & 1) + return WIMLIB_ERR_INVALID_REPARSE_DATA; + + if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK) { + if (rpbuflen < offsetof(struct reparse_buffer_disk, link.symlink.data)) + return WIMLIB_ERR_INVALID_REPARSE_DATA; + link->symlink_flags = le32_to_cpu(rpbuf->link.symlink.flags); + data = rpbuf->link.symlink.data; } else { - data = rpbuf_disk->junction.data; + data = rpbuf->link.junction.data; } - if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes + - (data - rpbuf) > rpbuflen) - goto out_invalid; - if ((size_t)print_name_offset + rpdata->print_name_nbytes + - (data - rpbuf) > rpbuflen) - goto out_invalid; - rpdata->substitute_name = (utf16lechar*)&data[substitute_name_offset]; - rpdata->print_name = (utf16lechar*)&data[print_name_offset]; + + /* Verify that the names don't overflow the buffer. */ + if ((data - (const u8 *)rpbuf) + substitute_name_offset + + link->substitute_name_nbytes > rpbuflen) + return WIMLIB_ERR_INVALID_REPARSE_DATA; + + if ((data - (const u8 *)rpbuf) + print_name_offset + + link->print_name_nbytes > rpbuflen) + return WIMLIB_ERR_INVALID_REPARSE_DATA; + + /* Save the name pointers. */ + link->substitute_name = (utf16lechar *)&data[substitute_name_offset]; + link->print_name = (utf16lechar *)&data[print_name_offset]; return 0; -out_invalid: - ERROR("Invalid reparse data"); - return WIMLIB_ERR_INVALID_REPARSE_DATA; } -/* - * Create a reparse point data buffer. - * - * @rpdata: Structure that contains the data we need. - * - * @rpbuf: Buffer into which to write the reparse point data buffer. Must be - * at least REPARSE_POINT_MAX_SIZE bytes long. - */ +/* Translate a 'struct link_reparse_point' into a reparse point buffer. */ int -make_reparse_buffer(const struct reparse_data * restrict rpdata, - u8 * restrict rpbuf) +make_link_reparse_point(const struct link_reparse_point *link, + struct reparse_buffer_disk *rpbuf, u16 *rpbuflen_ret) { - struct reparse_buffer_disk *rpbuf_disk = - (struct reparse_buffer_disk*)rpbuf; u8 *data; - rpbuf_disk->rptag = cpu_to_le32(rpdata->rptag); - rpbuf_disk->rpreserved = cpu_to_le16(rpdata->rpreserved); - rpbuf_disk->substitute_name_offset = cpu_to_le16(0); - rpbuf_disk->substitute_name_nbytes = cpu_to_le16(rpdata->substitute_name_nbytes); - rpbuf_disk->print_name_offset = cpu_to_le16(rpdata->substitute_name_nbytes + 2); - rpbuf_disk->print_name_nbytes = cpu_to_le16(rpdata->print_name_nbytes); - - if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) { - rpbuf_disk->symlink.rpflags = cpu_to_le32(rpdata->rpflags); - data = rpbuf_disk->symlink.data; - } else { - data = rpbuf_disk->junction.data; - } + if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK) + data = rpbuf->link.symlink.data; + else if (link->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT) + data = rpbuf->link.junction.data; + else /* Callers should forbid this case, but check anyway. */ + return WIMLIB_ERR_INVALID_REPARSE_DATA; - /* We null-terminate the substitute and print names, although this may - * not be strictly necessary. Note that the byte counts should not - * include the null terminators. */ - if (data + rpdata->substitute_name_nbytes + - rpdata->print_name_nbytes + - 2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE) - { - ERROR("Reparse data is too long!"); + /* Check if the names are too long to fit in a reparse point. */ + if ((data - (u8 *)rpbuf) + link->substitute_name_nbytes + + link->print_name_nbytes + + 2 * sizeof(utf16lechar) > REPARSE_POINT_MAX_SIZE) return WIMLIB_ERR_INVALID_REPARSE_DATA; - } - data = mempcpy(data, rpdata->substitute_name, rpdata->substitute_name_nbytes); - *(utf16lechar*)data = cpu_to_le16(0); - data += 2; - data = mempcpy(data, rpdata->print_name, rpdata->print_name_nbytes); - *(utf16lechar*)data = cpu_to_le16(0); - data += 2; - rpbuf_disk->rpdatalen = cpu_to_le16(data - rpbuf - 8); + + rpbuf->rptag = cpu_to_le32(link->rptag); + rpbuf->rpreserved = cpu_to_le16(link->rpreserved); + rpbuf->link.substitute_name_offset = cpu_to_le16(0); + rpbuf->link.substitute_name_nbytes = cpu_to_le16(link->substitute_name_nbytes); + rpbuf->link.print_name_offset = cpu_to_le16(link->substitute_name_nbytes + + sizeof(utf16lechar)); + rpbuf->link.print_name_nbytes = cpu_to_le16(link->print_name_nbytes); + + if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK) + rpbuf->link.symlink.flags = cpu_to_le32(link->symlink_flags); + + /* We null-terminate the substitute and print names, although this isn't + * strictly necessary. Note that the nbytes fields do not include the + * null terminators. */ + data = mempcpy(data, link->substitute_name, link->substitute_name_nbytes); + *(utf16lechar *)data = cpu_to_le16(0); + data += sizeof(utf16lechar); + data = mempcpy(data, link->print_name, link->print_name_nbytes); + *(utf16lechar *)data = cpu_to_le16(0); + data += sizeof(utf16lechar); + rpbuf->rpdatalen = cpu_to_le16(data - rpbuf->rpdata); + + *rpbuflen_ret = data - (u8 *)rpbuf; return 0; } -/* - * Read the reparse data from a WIM inode that is a reparse point. - * - * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which - * the reparse point data buffer will be reconstructed. - * - * Note: in the WIM format, the first 8 bytes of the reparse point data buffer - * are omitted, presumably because we already know the reparse tag from the - * dentry, and we already know the reparse tag length from the lookup table - * entry resource length. However, we reconstruct the first 8 bytes in the - * buffer returned by this function. - */ -int -wim_inode_get_reparse_data(const struct wim_inode * restrict inode, - u8 * restrict rpbuf, - u16 * restrict rpbuflen_ret) +/* UNIX symlink <=> Windows reparse point translation */ +#ifndef __WIN32__ + +/* Retrieve the inode's reparse point buffer into @rpbuf and @rpbuflen_ret. + * This gets the reparse data from @blob if specified, otherwise from the + * inode's reparse point stream. The inode's streams must be resolved. */ +static int +wim_inode_get_reparse_point(const struct wim_inode *inode, + struct reparse_buffer_disk *rpbuf, + u16 *rpbuflen_ret, + const struct blob_descriptor *blob) { - struct wim_lookup_table_entry *lte; int ret; - struct reparse_buffer_disk *rpbuf_disk; - u16 rpdatalen; + u16 blob_size = 0; - wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT); + if (!blob) { + const struct wim_inode_stream *strm; - lte = inode_unnamed_lte_resolved(inode); - if (!lte) { - ERROR("Reparse point has no reparse data!"); - return WIMLIB_ERR_INVALID_REPARSE_DATA; + strm = inode_get_unnamed_stream(inode, STREAM_TYPE_REPARSE_POINT); + if (strm) + blob = stream_blob_resolved(strm); } - if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE - 8) { - ERROR("Reparse data is too long!"); - return WIMLIB_ERR_INVALID_REPARSE_DATA; + if (blob) { + if (blob->size > REPARSE_DATA_MAX_SIZE) + return WIMLIB_ERR_INVALID_REPARSE_DATA; + blob_size = blob->size; + ret = read_blob_into_buf(blob, rpbuf->rpdata); + if (ret) + return ret; } - rpdatalen = wim_resource_size(lte); - /* Read the data from the WIM file */ - ret = read_full_resource_into_buf(lte, rpbuf + 8); - if (ret) - return ret; - - /* Reconstruct the first 8 bytes of the reparse point buffer */ - rpbuf_disk = (struct reparse_buffer_disk*)rpbuf; - - /* ReparseTag */ - rpbuf_disk->rptag = cpu_to_le32(inode->i_reparse_tag); + complete_reparse_point(rpbuf, inode, blob_size); - /* ReparseDataLength */ - rpbuf_disk->rpdatalen = cpu_to_le16(rpdatalen); - - /* ReparseReserved - * XXX this could be one of the unknown fields in the WIM dentry. */ - rpbuf_disk->rpreserved = cpu_to_le16(0); - - *rpbuflen_ret = rpdatalen + 8; + *rpbuflen_ret = REPARSE_DATA_OFFSET + blob_size; return 0; } -/* UNIX version of getting and setting the data in reparse points */ -#if !defined(__WIN32__) +static void +copy(char **buf_p, size_t *bufsize_p, const char *src, size_t src_size) +{ + size_t n = min(*bufsize_p, src_size); + memcpy(*buf_p, src, n); + *buf_p += n; + *bufsize_p -= n; +} -/* Get the UNIX symlink target from a WIM inode. The inode may be either a - * "real" symlink (reparse tag WIM_IO_REPARSE_TAG_SYMLINK), or it may be a - * junction point (reparse tag WIM_IO_REPARSE_TAG_MOUNT_POINT). +/* + * Get a UNIX-style symlink target from the WIM inode for a reparse point. * - * This has similar semantics to the UNIX readlink() function, except the path - * argument is swapped out with the `struct wim_inode' for a reparse point, and - * on failure a negated error code is returned rather than -1 with errno set. */ -ssize_t -wim_inode_readlink(const struct wim_inode * restrict inode, - char * restrict buf, size_t bufsize) + * @inode + * The inode from which to read the symlink. If not a symbolic link or + * junction reparse point, then -EINVAL will be returned. + * @buf + * Buffer into which to place the link target. + * @bufsize + * Available space in @buf, in bytes. + * @blob + * If not NULL, the blob from which to read the reparse data. Otherwise, + * the reparse data will be read from the reparse point stream of @inode. + * @altroot + * If @altroot_len != 0 and the link is an absolute link that was stored as + * "fixed", then prepend this path to the link target. + * @altroot_len + * Length of the @altroot string or 0. + * + * Similar to POSIX readlink(), this function writes as much of the symlink + * target as possible (up to @bufsize bytes) to @buf with no null terminator and + * returns the number of bytes written or a negative errno value on error. Note + * that the target is truncated and @bufsize is returned in the overflow case. + */ +int +wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize, + const struct blob_descriptor *blob, + const char *altroot, size_t altroot_len) { - int ret; - struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8); - struct reparse_data rpdata; - char *link_target; - char *translated_target; - size_t link_target_len; + struct reparse_buffer_disk rpbuf; u16 rpbuflen; + struct link_reparse_point link; + char *target_buffer; + char *target; + size_t target_len; + char *buf_ptr; + bool rpfix_ok = false; - wimlib_assert(inode_is_symlink(inode)); + /* Not a symbolic link or junction? */ + if (!inode_is_symlink(inode)) + return -EINVAL; - if (wim_inode_get_reparse_data(inode, (u8*)&rpbuf_disk, &rpbuflen)) - return -EIO; + /* Retrieve the native Windows "substitute name". */ - if (parse_reparse_data((const u8*)&rpbuf_disk, rpbuflen, &rpdata)) + if (wim_inode_get_reparse_point(inode, &rpbuf, &rpbuflen, blob)) return -EIO; - ret = utf16le_to_tstr(rpdata.substitute_name, - rpdata.substitute_name_nbytes, - &link_target, &link_target_len); - if (ret) + if (parse_link_reparse_point(&rpbuf, rpbuflen, &link)) + return -EINVAL; + + /* Translate the substitute name to the current multibyte encoding. */ + if (utf16le_to_tstr(link.substitute_name, link.substitute_name_nbytes, + &target_buffer, &target_len)) return -errno; + target = target_buffer; + + /* + * The substitute name is a native Windows NT path. There are two cases: + * + * 1. The reparse point is a symlink (rptag=WIM_IO_REPARSE_TAG_SYMLINK) + * and SYMBOLIC_LINK_RELATIVE is set. Windows resolves the path + * relative to the directory containing the reparse point file. In + * this case, we just translate the path separators. + * 2. Otherwise, Windows resolves the path from the root of the Windows + * NT kernel object namespace. In this case, we attempt to strip the + * device name, in addition to translating the path separators; e.g. + * "\??\C:\Users\Public" is translated to "/Users/Public". + * + * Also in case (2) the link target may have been stored as "fixed", + * meaning that with the device portion stripped off it is effectively + * "relative to the root of the WIM image". If this is the case, and if + * the caller provided an alternate root directory, then rewrite the + * link to be relative to that directory. + */ + if (!link_is_relative_symlink(&link)) { + static const char *const nt_root_dirs[] = { + "\\??\\", "\\DosDevices\\", "\\Device\\", + }; + for (size_t i = 0; i < ARRAY_LEN(nt_root_dirs); i++) { + size_t len = strlen(nt_root_dirs[i]); + if (!strncmp(target, nt_root_dirs[i], len)) { + char *p = target + len; + while (*p == '\\') + p++; + while (*p && *p != '\\') + p++; + target_len -= (p - target); + target = p; + break; + } + } - translated_target = link_target; - ret = parse_substitute_name(rpdata.substitute_name, - rpdata.substitute_name_nbytes, - rpdata.rptag); - switch (ret) { - case SUBST_NAME_IS_RELATIVE_LINK: - goto out_translate_slashes; - case SUBST_NAME_IS_VOLUME_JUNCTION: - goto out_have_link; - case SUBST_NAME_IS_UNKNOWN: - ERROR("Can't understand reparse point " - "substitute name \"%s\"", link_target); - ret = -EIO; - goto out_free_link_target; - default: - translated_target += ret; - link_target_len -= ret; - break; + if (!(inode->i_rp_flags & WIM_RP_FLAG_NOT_FIXED)) + rpfix_ok = true; } -out_translate_slashes: - for (size_t i = 0; i < link_target_len; i++) - if (translated_target[i] == '\\') - translated_target[i] = '/'; -out_have_link: - if (link_target_len > bufsize) { - link_target_len = bufsize; - ret = -ENAMETOOLONG; - } else { - ret = link_target_len; + /* Translate backslashes (Windows NT path separator) to forward slashes + * (UNIX path separator). In addition, translate forwards slashes to + * backslashes; this enables lossless handling of UNIX symbolic link + * targets that contain the backslash character. */ + for (char *p = target; *p; p++) { + if (*p == '\\') + *p = '/'; + else if (*p == '/') + *p = '\\'; } - memcpy(buf, translated_target, link_target_len); -out_free_link_target: - FREE(link_target); - return ret; + + /* Copy as much of the link target as possible to the output buffer and + * return the number of bytes copied. */ + buf_ptr = buf; + if (rpfix_ok && altroot_len != 0) { + copy(&buf_ptr, &bufsize, altroot, altroot_len); + } else if (target_len == 0) { + /* An absolute link target that was made relative to the same + * directory pointed to will end up empty if the original target + * did not have a trailing slash. Here, we are reading this + * adjusted link target without prefixing it. This usually + * doesn't happen, but if it does then we need to change it to + * "/" so that it is a valid target. */ + target = "/"; + target_len = 1; + } + copy(&buf_ptr, &bufsize, target, target_len); + FREE(target_buffer); + return buf_ptr - buf; } +/* Given a UNIX-style symbolic link target, create a Windows-style reparse point + * buffer and assign it to the specified inode. */ int -wim_inode_set_symlink(struct wim_inode *inode, - const char *target, - struct wim_lookup_table *lookup_table) +wim_inode_set_symlink(struct wim_inode *inode, const char *_target, + struct blob_table *blob_table) { - struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8); - struct reparse_data rpdata; - static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0"; - static const char abs_print_name_prefix[4] = "C\0:\0"; - utf16lechar *name_utf16le; - size_t name_utf16le_nbytes; int ret; + utf16lechar *target; + size_t target_nbytes; + struct link_reparse_point link; + struct reparse_buffer_disk rpbuf; + u16 rpbuflen; - DEBUG("Creating reparse point data buffer for UNIX " - "symlink target \"%s\"", target); - memset(&rpdata, 0, sizeof(rpdata)); - ret = tstr_to_utf16le(target, strlen(target), - &name_utf16le, &name_utf16le_nbytes); + /* Translate the link target to UTF-16LE. */ + ret = tstr_to_utf16le(_target, strlen(_target), &target, &target_nbytes); if (ret) return ret; - for (size_t i = 0; i < name_utf16le_nbytes / 2; i++) - if (name_utf16le[i] == cpu_to_le16('/')) - name_utf16le[i] = cpu_to_le16('\\'); - - /* Compatability notes: - * - * On UNIX, an absolute symbolic link begins with '/'; everything else - * is a relative symbolic link. (Quite simple compared to the various - * ways to provide Windows paths.) - * - * To change a UNIX relative symbolic link to Windows format, we only - * need to translate it to UTF-16LE and replace backslashes with forward - * slashes. We do not make any attempt to handle filename character - * problems, such as a link target that itself contains backslashes on - * UNIX. Then, for these relative links, we set the reparse header - * @flags field to SYMBOLIC_LINK_RELATIVE. - * - * For UNIX absolute symbolic links, we must set the @flags field to 0. - * Then, there are multiple options as to actually represent the - * absolute link targets: - * - * (1) An absolute path beginning with one backslash character. similar - * to UNIX-style, just with a different path separator. Print name same - * as substitute name. - * - * (2) Absolute path beginning with drive letter followed by a - * backslash. Print name same as substitute name. - * - * (3) Absolute path beginning with drive letter followed by a - * backslash; substitute name prefixed with \??\, otherwise same as - * print name. - * - * We choose option (3) here, and we just assume C: for the drive - * letter. The reasoning for this is: - * - * (1) Microsoft imagex.exe has a bug where it does not attempt to do - * reparse point fixups for these links, even though they are valid - * absolute links. (Note: in this case prefixing the substitute name - * with \??\ does not work; it just makes the data unable to be restored - * at all.) - * (2) Microsoft imagex.exe will fail when doing reparse point fixups - * for these. It apparently contains a bug that causes it to create an - * invalid reparse point, which then cannot be restored. - * (3) This is the only option I tested for which reparse point fixups - * worked properly in Microsoft imagex.exe. - * - * So option (3) it is. - */ + /* Translate forward slashes (UNIX path separator) to backslashes + * (Windows NT path separator). In addition, translate backslashes to + * forward slashes; this enables lossless handling of UNIX symbolic link + * targets that contain the backslash character. */ + for (utf16lechar *p = target; *p; p++) { + if (*p == cpu_to_le16('/')) + *p = cpu_to_le16('\\'); + else if (*p == cpu_to_le16('\\')) + *p = cpu_to_le16('/'); + } - rpdata.rptag = inode->i_reparse_tag; - if (target[0] == '/') { - rpdata.substitute_name_nbytes = name_utf16le_nbytes + - sizeof(abs_subst_name_prefix); - rpdata.print_name_nbytes = name_utf16le_nbytes + - sizeof(abs_print_name_prefix); - rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes); - rpdata.print_name = alloca(rpdata.print_name_nbytes); - memcpy(rpdata.substitute_name, abs_subst_name_prefix, - sizeof(abs_subst_name_prefix)); - memcpy(rpdata.print_name, abs_print_name_prefix, - sizeof(abs_print_name_prefix)); - memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix), - name_utf16le, name_utf16le_nbytes); - memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix), - name_utf16le, name_utf16le_nbytes); + link.rptag = WIM_IO_REPARSE_TAG_SYMLINK; + link.rpreserved = 0; + + /* Note: an absolute link that was rewritten to be relative to another + * directory is assumed to either be empty or to have a leading slash. + * See unix_relativize_link_target(). */ + if (*target == cpu_to_le16('\\') || !*target) { + /* + * UNIX link target was absolute. In this case we represent the + * link as a symlink reparse point with SYMBOLIC_LINK_RELATIVE + * cleared. For this to work we need to assign it a path that + * can be resolved from the root of the Windows NT kernel object + * namespace. We do this by using "\??\C:" as a dummy prefix. + * + * Note that we could instead represent UNIX absolute links by + * setting SYMBOLIC_LINK_RELATIVE and then leaving the path + * backslash-prefixed like "\Users\Public". On Windows this is + * valid and denotes a path relative to the root of the + * filesystem on which the reparse point resides. The problem + * with this is that neither WIMGAPI nor wimlib (on Windows) + * will do "reparse point fixups" when extracting such links + * (modifying the link target to point into the actual + * extraction directory). So for the greatest cross-platform + * consistency, we have to use the fake C: drive approach. + */ + static const utf16lechar prefix[6] = { + cpu_to_le16('\\'), + cpu_to_le16('?'), + cpu_to_le16('?'), + cpu_to_le16('\\'), + cpu_to_le16('C'), + cpu_to_le16(':'), + }; + + /* Do not show \??\ in print name */ + const size_t num_unprintable_chars = 4; + + link.symlink_flags = 0; + link.substitute_name_nbytes = sizeof(prefix) + target_nbytes; + link.substitute_name = alloca(link.substitute_name_nbytes); + memcpy(link.substitute_name, prefix, sizeof(prefix)); + memcpy(link.substitute_name + ARRAY_LEN(prefix), target, target_nbytes); + link.print_name_nbytes = link.substitute_name_nbytes - + (num_unprintable_chars * sizeof(utf16lechar)); + link.print_name = link.substitute_name + num_unprintable_chars; } else { - rpdata.substitute_name_nbytes = name_utf16le_nbytes; - rpdata.print_name_nbytes = name_utf16le_nbytes; - rpdata.substitute_name = name_utf16le; - rpdata.print_name = name_utf16le; - rpdata.rpflags = SYMBOLIC_LINK_RELATIVE; + /* UNIX link target was relative. In this case we represent the + * link as a symlink reparse point with SYMBOLIC_LINK_RELATIVE + * set. This causes Windows to interpret the link relative to + * the directory containing the reparse point file. */ + link.symlink_flags = SYMBOLIC_LINK_RELATIVE; + link.substitute_name_nbytes = target_nbytes; + link.substitute_name = target; + link.print_name_nbytes = target_nbytes; + link.print_name = target; } - ret = make_reparse_buffer(&rpdata, (u8*)&rpbuf_disk); - if (ret == 0) { - ret = inode_set_unnamed_stream(inode, - (u8*)&rpbuf_disk + 8, - le16_to_cpu(rpbuf_disk.rpdatalen), - lookup_table); - } - FREE(name_utf16le); + /* Generate the reparse buffer. */ + ret = make_link_reparse_point(&link, &rpbuf, &rpbuflen); + if (ret) + goto out_free_target; + + /* Save the reparse data with the inode. */ + ret = WIMLIB_ERR_NOMEM; + if (!inode_add_stream_with_data(inode, + STREAM_TYPE_REPARSE_POINT, + NO_STREAM_NAME, + rpbuf.rpdata, + rpbuflen - REPARSE_DATA_OFFSET, + blob_table)) + goto out_free_target; + + /* The inode is now a reparse point. */ + inode->i_reparse_tag = link.rptag; + inode->i_attributes &= ~FILE_ATTRIBUTE_NORMAL; + inode->i_attributes |= FILE_ATTRIBUTE_REPARSE_POINT; + + ret = 0; +out_free_target: + FREE(target); return ret; } -#include - -static int -unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret) -{ - struct stat stbuf; - if (stat(path, &stbuf)) { - if (errno != ENOENT) - WARNING_WITH_ERRNO("Failed to stat \"%s\"", path); - /* Treat as a link pointing outside the capture root (it - * most likely is). */ - return WIMLIB_ERR_STAT; - } else { - *ino_ret = stbuf.st_ino; - *dev_ret = stbuf.st_dev; - return 0; - } -} - -#endif /* !defined(__WIN32__) */ - -#ifdef __WIN32__ -# define RP_PATH_SEPARATOR L'\\' -# define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/') -# define os_get_ino_and_dev win32_get_file_and_vol_ids -#else -# define RP_PATH_SEPARATOR '/' -# define is_rp_path_separator(c) ((c) == '/') -# define os_get_ino_and_dev unix_get_ino_and_dev -#endif - -/* Fix up absolute symbolic link targets--- mostly shared between UNIX and - * Windows */ -tchar * -capture_fixup_absolute_symlink(tchar *dest, - u64 capture_root_ino, u64 capture_root_dev) -{ - tchar *p = dest; - -#ifdef __WIN32__ - /* Skip drive letter */ - if (!is_rp_path_separator(*dest)) - p += 2; -#endif - - DEBUG("Fixing symlink or junction \"%"TS"\"", dest); - for (;;) { - tchar save; - int ret; - u64 ino; - u64 dev; - - while (is_rp_path_separator(*p)) - p++; - - save = *p; - *p = T('\0'); - ret = os_get_ino_and_dev(dest, &ino, &dev); - *p = save; - - if (ret) /* stat() failed before we got to the capture root--- - assume the link points outside it. */ - return NULL; - - if (ino == capture_root_ino && dev == capture_root_dev) { - /* Link points inside capture root. Return abbreviated - * path. */ - if (*p == T('\0')) - *(p - 1) = RP_PATH_SEPARATOR; - while (p - 1 >= dest && is_rp_path_separator(*(p - 1))) - p--; - #ifdef __WIN32__ - if (!is_rp_path_separator(dest[0])) { - *--p = dest[1]; - *--p = dest[0]; - } - #endif - wimlib_assert(p >= dest); - return p; - } - - if (*p == T('\0')) { - /* Link points outside capture root. */ - return NULL; - } - - do { - p++; - } while (!is_rp_path_separator(*p) && *p != T('\0')); - } -} +#endif /* !__WIN32__ */