]> wimlib.net Git - wimlib/blobdiff - src/reparse.c
Fix extracting non-Microsoft reparse points
[wimlib] / src / reparse.c
index 4cbe69d9fbda8b7ce7ab43571240b4f2a1a94548..f96fd49b7af592b63b3d6dd8f0b020a722489a3d 100644 (file)
 /*
- * reparse.c
- *
- * Handle reparse data.
+ * reparse.c - Reparse point handling
  */
 
 /*
- * Copyright (C) 2012, 2013 Eric Biggers
- *
- * This file is part of wimlib, a library for working with WIM files.
+ * Copyright (C) 2012, 2013, 2015 Eric Biggers
  *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
  *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
-#include "dentry.h"
-#include "buffer_io.h"
-#include "lookup_table.h"
-#include "sha1.h"
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
 #include <errno.h>
 
-static const utf16lechar volume_junction_prefix[11] = {
-       cpu_to_le16('\\'),
-       cpu_to_le16('\\'),
-       cpu_to_le16('?'),
-       cpu_to_le16('\\'),
-       cpu_to_le16('V'),
-       cpu_to_le16('o'),
-       cpu_to_le16('l'),
-       cpu_to_le16('u'),
-       cpu_to_le16('m'),
-       cpu_to_le16('e'),
-       cpu_to_le16('{'),
-};
-
-/* Parse the "substitute name" (link target) from a symbolic link or junction
- * reparse point.
- *
- * Return value is:
- *
- * Non-negative integer:
- *     The name is an absolute symbolic link in one of several formats,
- *     and the return value is the number of UTF-16LE characters that need to
- *     be advanced to reach a simple "absolute" path starting with a backslash
- *     (i.e. skip over \??\ and/or drive letter)
- * Negative integer:
- *     SUBST_NAME_IS_VOLUME_JUNCTION:
- *             The name is a volume junction.
- *     SUBST_NAME_IS_RELATIVE_LINK:
- *             The name is a relative symbolic link.
- *     SUBST_NAME_IS_UNKNOWN:
- *             The name does not appear to be a valid symbolic link, junction,
- *             or mount point.
+#include "wimlib/alloca.h"
+#include "wimlib/blob_table.h"
+#include "wimlib/endianness.h"
+#include "wimlib/encoding.h"
+#include "wimlib/error.h"
+#include "wimlib/guid.h"
+#include "wimlib/inode.h"
+#include "wimlib/reparse.h"
+#include "wimlib/resource.h"
+
+/*
+ * Reconstruct the header of a reparse point buffer.  This is necessary because
+ * only reparse data is stored in WIM files.  The reparse tag is instead stored
+ * in the on-disk WIM dentry, and the reparse data length is equal to the size
+ * of the blob in which the reparse data was stored, minus the size of a GUID
+ * (16 bytes) if the reparse tag does not have the "Microsoft" bit set.
  */
-int
-parse_substitute_name(const utf16lechar *substitute_name,
-                     u16 substitute_name_nbytes, u32 rptag)
+void
+complete_reparse_point(struct reparse_buffer_disk *rpbuf,
+                      const struct wim_inode *inode, u16 blob_size)
 {
-       u16 substitute_name_nchars = substitute_name_nbytes / 2;
-
-       if (substitute_name_nchars >= 7 &&
-           substitute_name[0] == cpu_to_le16('\\') &&
-           substitute_name[1] == cpu_to_le16('?') &&
-           substitute_name[2] == cpu_to_le16('?') &&
-           substitute_name[3] == cpu_to_le16('\\') &&
-           substitute_name[4] != cpu_to_le16('\0') &&
-           substitute_name[5] == cpu_to_le16(':') &&
-           substitute_name[6] == cpu_to_le16('\\'))
-       {
-               /* "Full" symlink or junction (\??\x:\ prefixed path) */
-               return 6;
-       } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
-                  substitute_name_nchars >= 12 &&
-                  memcmp(substitute_name, volume_junction_prefix,
-                         sizeof(volume_junction_prefix)) == 0 &&
-                  substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\'))
-       {
-               /* Volume junction.  Can't really do anything with it. */
-               return SUBST_NAME_IS_VOLUME_JUNCTION;
-       } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
-                  substitute_name_nchars >= 3 &&
-                  substitute_name[0] != cpu_to_le16('\0') &&
-                  substitute_name[1] == cpu_to_le16(':') &&
-                  substitute_name[2] == cpu_to_le16('\\'))
-       {
-               /* "Absolute" symlink, with drive letter */
-               return 2;
-       } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
-                  substitute_name_nchars >= 1)
-       {
-               if (substitute_name[0] == cpu_to_le16('\\'))
-                       /* "Absolute" symlink, without drive letter */
-                       return 0;
-               else
-                       /* "Relative" symlink, without drive letter */
-                       return SUBST_NAME_IS_RELATIVE_LINK;
-       } else {
-               return SUBST_NAME_IS_UNKNOWN;
-       }
+       rpbuf->rptag = cpu_to_le32(inode->i_reparse_tag);
+       if (blob_size >= GUID_SIZE && !(inode->i_reparse_tag & 0x80000000))
+               blob_size -= GUID_SIZE;
+       rpbuf->rpdatalen = cpu_to_le16(blob_size);
+       rpbuf->rpreserved = cpu_to_le16(inode->i_rp_reserved);
 }
 
-/*
- * Read the data from a symbolic link, junction, or mount point reparse point
- * buffer into a `struct reparse_data'.
- *
- * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
- * description of the format of the reparse point buffers.
- */
+/* Parse the buffer for a symbolic link or junction reparse point and fill in a
+ * 'struct link_reparse_point'.  */
 int
-parse_reparse_data(const u8 *rpbuf, u16 rpbuflen, struct reparse_data *rpdata)
+parse_link_reparse_point(const struct reparse_buffer_disk *rpbuf, u16 rpbuflen,
+                        struct link_reparse_point *link)
 {
-       const u8 *p = rpbuf;
        u16 substitute_name_offset;
        u16 print_name_offset;
+       const u8 *data;
 
-       memset(rpdata, 0, sizeof(*rpdata));
-       if (rpbuflen < 16)
-               goto out_invalid;
-       p = get_u32(p, &rpdata->rptag);
-       wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK ||
-                     rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
-       p = get_u16(p, &rpdata->rpdatalen);
-       p = get_u16(p, &rpdata->rpreserved);
-       p = get_u16(p, &substitute_name_offset);
-       p = get_u16(p, &rpdata->substitute_name_nbytes);
-       p = get_u16(p, &print_name_offset);
-       p = get_u16(p, &rpdata->print_name_nbytes);
-       if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
-               if (rpbuflen < 20)
-                       goto out_invalid;
-               p = get_u32(p, &rpdata->rpflags);
-       }
-       if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes +
-           (p - rpbuf) > rpbuflen)
-               goto out_invalid;
-       if ((size_t)print_name_offset + rpdata->print_name_nbytes +
-           (p - rpbuf) > rpbuflen)
-               goto out_invalid;
-       rpdata->substitute_name = (utf16lechar*)&p[substitute_name_offset];
-       rpdata->print_name = (utf16lechar*)&p[print_name_offset];
-       return 0;
-out_invalid:
-       ERROR("Invalid reparse data");
-       return WIMLIB_ERR_INVALID_REPARSE_DATA;
-}
+       link->rptag = le32_to_cpu(rpbuf->rptag);
 
-/*
- * Create a reparse point data buffer.
- *
- * @rpdata:  Structure that contains the data we need.
- *
- * @rpbuf:     Buffer into which to write the reparse point data buffer.  Must be
- *             at least REPARSE_POINT_MAX_SIZE bytes long.
- */
-int
-make_reparse_buffer(const struct reparse_data *rpdata, u8 *rpbuf)
-{
-       u8 *p = rpbuf;
-
-       p = put_u32(p, rpdata->rptag);
-       p += 2; /* We set ReparseDataLength later */
-       p = put_u16(p, rpdata->rpreserved);
-       p = put_u16(p, 0); /* substitute name offset */
-       p = put_u16(p, rpdata->substitute_name_nbytes); /* substitute name nbytes */
-       p = put_u16(p, rpdata->substitute_name_nbytes + 2); /* print name offset */
-       p = put_u16(p, rpdata->print_name_nbytes); /* print name nbytes */
-       if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
-               p = put_u32(p, rpdata->rpflags);
-       /* We null-terminate the substitute and print names, although this may
-        * not be strictly necessary.  Note that the byte counts should not
-        * include the null terminators. */
-       if (p + rpdata->substitute_name_nbytes +
-           rpdata->print_name_nbytes +
-           2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE)
-       {
-               ERROR("Reparse data is too long!");
+       /* Not a symbolic link or junction?  */
+       if (link->rptag != WIM_IO_REPARSE_TAG_SYMLINK &&
+           link->rptag != WIM_IO_REPARSE_TAG_MOUNT_POINT)
                return WIMLIB_ERR_INVALID_REPARSE_DATA;
-       }
-       p = put_bytes(p, rpdata->substitute_name_nbytes, rpdata->substitute_name);
-       p = put_u16(p, 0);
-       p = put_bytes(p, rpdata->print_name_nbytes, rpdata->print_name);
-       p = put_u16(p, 0);
-       put_u16(rpbuf + 4, p - rpbuf - 8); /* Set ReparseDataLength */
-       return 0;
-}
 
-/*
- * Read the reparse data from a WIM inode that is a reparse point.
- *
- * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which
- * the reparse point data buffer will be reconstructed.
- *
- * Note: in the WIM format, the first 8 bytes of the reparse point data buffer
- * are omitted, presumably because we already know the reparse tag from the
- * dentry, and we already know the reparse tag length from the lookup table
- * entry resource length.  However, we reconstruct the first 8 bytes in the
- * buffer returned by this function.
- */
-int
-wim_inode_get_reparse_data(const struct wim_inode *inode, u8 *rpbuf)
-{
-       struct wim_lookup_table_entry *lte;
-       int ret;
+       /* Is the buffer too small to be a symlink or a junction?  */
+       if (rpbuflen < offsetof(struct reparse_buffer_disk, link.junction.data))
+               return WIMLIB_ERR_INVALID_REPARSE_DATA;
 
-       wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
+       link->rpreserved = le16_to_cpu(rpbuf->rpreserved);
+       link->substitute_name_nbytes = le16_to_cpu(rpbuf->link.substitute_name_nbytes);
+       substitute_name_offset = le16_to_cpu(rpbuf->link.substitute_name_offset);
+       link->print_name_nbytes = le16_to_cpu(rpbuf->link.print_name_nbytes);
+       print_name_offset = le16_to_cpu(rpbuf->link.print_name_offset);
 
-       lte = inode_unnamed_lte_resolved(inode);
-       if (!lte) {
-               ERROR("Reparse point has no reparse data!");
+       /* The names must be properly sized and aligned.  */
+       if ((substitute_name_offset | print_name_offset |
+            link->substitute_name_nbytes | link->print_name_nbytes) & 1)
                return WIMLIB_ERR_INVALID_REPARSE_DATA;
+
+       if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
+               if (rpbuflen < offsetof(struct reparse_buffer_disk, link.symlink.data))
+                       return WIMLIB_ERR_INVALID_REPARSE_DATA;
+               link->symlink_flags = le32_to_cpu(rpbuf->link.symlink.flags);
+               data = rpbuf->link.symlink.data;
+       } else {
+               data = rpbuf->link.junction.data;
        }
-       if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE - 8) {
-               ERROR("Reparse data is too long!");
+
+       /* Verify that the names don't overflow the buffer.  */
+       if ((data - (const u8 *)rpbuf) + substitute_name_offset +
+           link->substitute_name_nbytes > rpbuflen)
                return WIMLIB_ERR_INVALID_REPARSE_DATA;
-       }
 
-       /* Read the data from the WIM file */
-       ret = read_full_resource_into_buf(lte, rpbuf + 8, true);
-       if (ret)
-               return ret;
+       if ((data - (const u8 *)rpbuf) + print_name_offset +
+           link->print_name_nbytes > rpbuflen)
+               return WIMLIB_ERR_INVALID_REPARSE_DATA;
 
-       /* Reconstruct the first 8 bytes of the reparse point buffer */
+       /* Save the name pointers.  */
+       link->substitute_name = (utf16lechar *)&data[substitute_name_offset];
+       link->print_name = (utf16lechar *)&data[print_name_offset];
+       return 0;
+}
 
-       /* ReparseTag */
-       put_u32(rpbuf, inode->i_reparse_tag);
+/* Translate a 'struct link_reparse_point' into a reparse point buffer.  */
+int
+make_link_reparse_point(const struct link_reparse_point *link,
+                       struct reparse_buffer_disk *rpbuf, u16 *rpbuflen_ret)
+{
+       u8 *data;
 
-       /* ReparseDataLength */
-       put_u16(rpbuf + 4, wim_resource_size(lte));
+       if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
+               data = rpbuf->link.symlink.data;
+       else if (link->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT)
+               data = rpbuf->link.junction.data;
+       else /* Callers should forbid this case, but check anyway.  */
+               return WIMLIB_ERR_INVALID_REPARSE_DATA;
 
-       /* ReparseReserved
-        * XXX this could be one of the unknown fields in the WIM dentry. */
-       put_u16(rpbuf + 6, 0);
+       /* Check if the names are too long to fit in a reparse point.  */
+       if ((data - (u8 *)rpbuf) + link->substitute_name_nbytes +
+           link->print_name_nbytes +
+           2 * sizeof(utf16lechar) > REPARSE_POINT_MAX_SIZE)
+               return WIMLIB_ERR_INVALID_REPARSE_DATA;
+
+       rpbuf->rptag = cpu_to_le32(link->rptag);
+       rpbuf->rpreserved = cpu_to_le16(link->rpreserved);
+       rpbuf->link.substitute_name_offset = cpu_to_le16(0);
+       rpbuf->link.substitute_name_nbytes = cpu_to_le16(link->substitute_name_nbytes);
+       rpbuf->link.print_name_offset = cpu_to_le16(link->substitute_name_nbytes +
+                                                   sizeof(utf16lechar));
+       rpbuf->link.print_name_nbytes = cpu_to_le16(link->print_name_nbytes);
+
+       if (link->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
+               rpbuf->link.symlink.flags = cpu_to_le32(link->symlink_flags);
+
+       /* We null-terminate the substitute and print names, although this isn't
+        * strictly necessary.  Note that the nbytes fields do not include the
+        * null terminators.  */
+       data = mempcpy(data, link->substitute_name, link->substitute_name_nbytes);
+       *(utf16lechar *)data = cpu_to_le16(0);
+       data += sizeof(utf16lechar);
+       data = mempcpy(data, link->print_name, link->print_name_nbytes);
+       *(utf16lechar *)data = cpu_to_le16(0);
+       data += sizeof(utf16lechar);
+       rpbuf->rpdatalen = cpu_to_le16(data - rpbuf->rpdata);
+
+       *rpbuflen_ret = data - (u8 *)rpbuf;
        return 0;
 }
 
-/* UNIX version of getting and setting the data in reparse points */
-#if !defined(__WIN32__)
+/* UNIX symlink <=> Windows reparse point translation  */
+#ifndef __WIN32__
 
-/* Get the UNIX symlink target from a WIM inode.  The inode may be either a
- * "real" symlink (reparse tag WIM_IO_REPARSE_TAG_SYMLINK), or it may be a
- * junction point (reparse tag WIM_IO_REPARSE_TAG_MOUNT_POINT).
- *
- * This has similar semantics to the UNIX readlink() function, except the path
- * argument is swapped out with the `struct wim_inode' for a reparse point, and
- * on failure a negated error code is returned rather than -1 with errno set.  */
-ssize_t
-wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize)
+/* Retrieve the inode's reparse point buffer into @rpbuf and @rpbuflen_ret.
+ * This gets the reparse data from @blob if specified, otherwise from the
+ * inode's reparse point stream.  The inode's streams must be resolved.  */
+static int
+wim_inode_get_reparse_point(const struct wim_inode *inode,
+                           struct reparse_buffer_disk *rpbuf,
+                           u16 *rpbuflen_ret,
+                           const struct blob_descriptor *blob)
 {
        int ret;
-       u8 rpbuf[REPARSE_POINT_MAX_SIZE];
-       u16 rpdatalen;
-       struct reparse_data rpdata;
-       char *link_target;
-       char *translated_target;
-       size_t link_target_len;
-
-       wimlib_assert(inode_is_symlink(inode));
-
-       if (wim_inode_get_reparse_data(inode, rpbuf))
-               return -EIO;
-
-       get_u16(rpbuf + 4, &rpdatalen);
-
-       if (parse_reparse_data(rpbuf, rpdatalen + 8, &rpdata))
-               return -EIO;
+       u16 blob_size = 0;
 
-       ret = utf16le_to_tstr(rpdata.substitute_name,
-                             rpdata.substitute_name_nbytes,
-                             &link_target, &link_target_len);
-       if (ret)
-               return -errno;
+       if (!blob) {
+               const struct wim_inode_stream *strm;
 
-       translated_target = link_target;
-       ret = parse_substitute_name(rpdata.substitute_name,
-                                   rpdata.substitute_name_nbytes,
-                                   rpdata.rptag);
-       switch (ret) {
-       case SUBST_NAME_IS_RELATIVE_LINK:
-               goto out_translate_slashes;
-       case SUBST_NAME_IS_VOLUME_JUNCTION:
-               goto out_have_link;
-       case SUBST_NAME_IS_UNKNOWN:
-               ERROR("Can't understand reparse point "
-                     "substitute name \"%s\"", link_target);
-               return -EIO;
-       default:
-               translated_target += ret;
-               link_target_len -= ret;
-               break;
+               strm = inode_get_unnamed_stream(inode, STREAM_TYPE_REPARSE_POINT);
+               if (strm)
+                       blob = stream_blob_resolved(strm);
        }
 
-out_translate_slashes:
-       for (size_t i = 0; i < link_target_len; i++)
-               if (translated_target[i] == '\\')
-                       translated_target[i] = '/';
-out_have_link:
-       if (link_target_len > bufsize) {
-               link_target_len = bufsize;
-               ret = -ENAMETOOLONG;
-       } else {
-               ret = link_target_len;
+       if (blob) {
+               if (blob->size > REPARSE_DATA_MAX_SIZE)
+                       return WIMLIB_ERR_INVALID_REPARSE_DATA;
+               blob_size = blob->size;
+               ret = read_blob_into_buf(blob, rpbuf->rpdata);
+               if (ret)
+                       return ret;
        }
-       memcpy(buf, translated_target, link_target_len);
-       FREE(link_target);
-       return ret;
+
+       complete_reparse_point(rpbuf, inode, blob_size);
+
+       *rpbuflen_ret = REPARSE_DATA_OFFSET + blob_size;
+       return 0;
 }
 
-#ifdef HAVE_ALLOCA_H
-#  include <alloca.h>
-#endif
+static void
+copy(char **buf_p, size_t *bufsize_p, const char *src, size_t src_size)
+{
+       size_t n = min(*bufsize_p, src_size);
+       memcpy(*buf_p, src, n);
+       *buf_p += n;
+       *bufsize_p -= n;
+}
 
+/*
+ * Get a UNIX-style symlink target from the WIM inode for a reparse point.
+ *
+ * @inode
+ *     The inode from which to read the symlink.  If not a symbolic link or
+ *     junction reparse point, then -EINVAL will be returned.
+ * @buf
+ *     Buffer into which to place the link target.
+ * @bufsize
+ *     Available space in @buf, in bytes.
+ * @blob
+ *     If not NULL, the blob from which to read the reparse data.  Otherwise,
+ *     the reparse data will be read from the reparse point stream of @inode.
+ * @altroot
+ *     If @altroot_len != 0 and the link is an absolute link that was stored as
+ *     "fixed", then prepend this path to the link target.
+ * @altroot_len
+ *     Length of the @altroot string or 0.
+ *
+ * Similar to POSIX readlink(), this function writes as much of the symlink
+ * target as possible (up to @bufsize bytes) to @buf with no null terminator and
+ * returns the number of bytes written or a negative errno value on error.  Note
+ * that the target is truncated and @bufsize is returned in the overflow case.
+ */
 int
-wim_inode_set_symlink(struct wim_inode *inode,
-                     const char *target,
-                     struct wim_lookup_table *lookup_table)
-
+wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize,
+                  const struct blob_descriptor *blob,
+                  const char *altroot, size_t altroot_len)
 {
-       u8 rpbuf[REPARSE_POINT_MAX_SIZE];
-       u16 rpdatalen;
-       struct reparse_data rpdata;
-       static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
-       static const char abs_print_name_prefix[4] = "C\0:\0";
-       utf16lechar *name_utf16le;
-       size_t name_utf16le_nbytes;
-       int ret;
+       struct reparse_buffer_disk rpbuf;
+       u16 rpbuflen;
+       struct link_reparse_point link;
+       char *target_buffer;
+       char *target;
+       size_t target_len;
+       char *buf_ptr;
+       bool rpfix_ok = false;
+
+       /* Not a symbolic link or junction?  */
+       if (!inode_is_symlink(inode))
+               return -EINVAL;
+
+       /* Retrieve the native Windows "substitute name".  */
+
+       if (wim_inode_get_reparse_point(inode, &rpbuf, &rpbuflen, blob))
+               return -EIO;
 
-       DEBUG("Creating reparse point data buffer for UNIX "
-             "symlink target \"%s\"", target);
-       memset(&rpdata, 0, sizeof(rpdata));
-       ret = tstr_to_utf16le(target, strlen(target),
-                             &name_utf16le, &name_utf16le_nbytes);
-       if (ret)
-               return ret;
+       if (parse_link_reparse_point(&rpbuf, rpbuflen, &link))
+               return -EINVAL;
 
-       for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
-               if (name_utf16le[i] == cpu_to_le16('/'))
-                       name_utf16le[i] = cpu_to_le16('\\');
+       /* Translate the substitute name to the current multibyte encoding.  */
+       if (utf16le_to_tstr(link.substitute_name, link.substitute_name_nbytes,
+                           &target_buffer, &target_len))
+               return -errno;
+       target = target_buffer;
 
-       /* Compatability notes:
-        *
-        * On UNIX, an absolute symbolic link begins with '/'; everything else
-        * is a relative symbolic link.  (Quite simple compared to the various
-        * ways to provide Windows paths.)
-        *
-        * To change a UNIX relative symbolic link to Windows format, we only
-        * need to translate it to UTF-16LE and replace backslashes with forward
-        * slashes.  We do not make any attempt to handle filename character
-        * problems, such as a link target that itself contains backslashes on
-        * UNIX.  Then, for these relative links, we set the reparse header
-        * @flags field to SYMBOLIC_LINK_RELATIVE.
-        *
-        * For UNIX absolute symbolic links, we must set the @flags field to 0.
-        * Then, there are multiple options as to actually represent the
-        * absolute link targets:
-        *
-        * (1) An absolute path beginning with one backslash character. similar
-        * to UNIX-style, just with a different path separator.  Print name same
-        * as substitute name.
-        *
-        * (2) Absolute path beginning with drive letter followed by a
-        * backslash.  Print name same as substitute name.
-        *
-        * (3) Absolute path beginning with drive letter followed by a
-        * backslash; substitute name prefixed with \??\, otherwise same as
-        * print name.
+       /*
+        * The substitute name is a native Windows NT path. There are two cases:
         *
-        * We choose option (3) here, and we just assume C: for the drive
-        * letter.  The reasoning for this is:
+        * 1. The reparse point is a symlink (rptag=WIM_IO_REPARSE_TAG_SYMLINK)
+        *    and SYMBOLIC_LINK_RELATIVE is set.  Windows resolves the path
+        *    relative to the directory containing the reparse point file.  In
+        *    this case, we just translate the path separators.
+        * 2. Otherwise, Windows resolves the path from the root of the Windows
+        *    NT kernel object namespace.  In this case, we attempt to strip the
+        *    device name, in addition to translating the path separators; e.g.
+        *    "\??\C:\Users\Public" is translated to "/Users/Public".
         *
-        * (1) Microsoft imagex.exe has a bug where it does not attempt to do
-        * reparse point fixups for these links, even though they are valid
-        * absolute links.  (Note: in this case prefixing the substitute name
-        * with \??\ does not work; it just makes the data unable to be restored
-        * at all.)
-        * (2) Microsoft imagex.exe will fail when doing reparse point fixups
-        * for these.  It apparently contains a bug that causes it to create an
-        * invalid reparse point, which then cannot be restored.
-        * (3) This is the only option I tested for which reparse point fixups
-        * worked properly in Microsoft imagex.exe.
-        *
-        * So option (3) it is.
+        * Also in case (2) the link target may have been stored as "fixed",
+        * meaning that with the device portion stripped off it is effectively
+        * "relative to the root of the WIM image".  If this is the case, and if
+        * the caller provided an alternate root directory, then rewrite the
+        * link to be relative to that directory.
         */
+       if (!link_is_relative_symlink(&link)) {
+               static const char *const nt_root_dirs[] = {
+                       "\\??\\", "\\DosDevices\\", "\\Device\\",
+               };
+               for (size_t i = 0; i < ARRAY_LEN(nt_root_dirs); i++) {
+                       size_t len = strlen(nt_root_dirs[i]);
+                       if (!strncmp(target, nt_root_dirs[i], len)) {
+                               char *p = target + len;
+                               while (*p == '\\')
+                                       p++;
+                               while (*p && *p != '\\')
+                                       p++;
+                               target_len -= (p - target);
+                               target = p;
+                               break;
+                       }
+               }
 
-       rpdata.rptag = inode->i_reparse_tag;
-       if (target[0] == '/') {
-               rpdata.substitute_name_nbytes = name_utf16le_nbytes +
-                                               sizeof(abs_subst_name_prefix);
-               rpdata.print_name_nbytes = name_utf16le_nbytes +
-                                          sizeof(abs_print_name_prefix);
-               rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes);
-               rpdata.print_name = alloca(rpdata.print_name_nbytes);
-               memcpy(rpdata.substitute_name, abs_subst_name_prefix,
-                      sizeof(abs_subst_name_prefix));
-               memcpy(rpdata.print_name, abs_print_name_prefix,
-                      sizeof(abs_print_name_prefix));
-               memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix),
-                      name_utf16le, name_utf16le_nbytes);
-               memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix),
-                      name_utf16le, name_utf16le_nbytes);
-       } else {
-               rpdata.substitute_name_nbytes = name_utf16le_nbytes;
-               rpdata.print_name_nbytes = name_utf16le_nbytes;
-               rpdata.substitute_name = name_utf16le;
-               rpdata.print_name = name_utf16le;
-               rpdata.rpflags = SYMBOLIC_LINK_RELATIVE;
+               if (!(inode->i_rp_flags & WIM_RP_FLAG_NOT_FIXED))
+                       rpfix_ok = true;
        }
 
-       ret = make_reparse_buffer(&rpdata, rpbuf);
-       if (ret == 0) {
-               get_u16(rpbuf + 4, &rpdatalen);
-               ret = inode_set_unnamed_stream(inode, rpbuf + 8, rpdatalen,
-                                              lookup_table);
+       /* Translate backslashes (Windows NT path separator) to forward slashes
+        * (UNIX path separator).  In addition, translate forwards slashes to
+        * backslashes; this enables lossless handling of UNIX symbolic link
+        * targets that contain the backslash character.  */
+       for (char *p = target; *p; p++) {
+               if (*p == '\\')
+                       *p = '/';
+               else if (*p == '/')
+                       *p = '\\';
        }
-       FREE(name_utf16le);
-       return ret;
-}
-
-#include <sys/stat.h>
 
-static int
-unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
-{
-       struct stat stbuf;
-       if (stat(path, &stbuf)) {
-               if (errno != ENOENT)
-                       WARNING_WITH_ERRNO("Failed to stat \"%s\"", path);
-               /* Treat as a link pointing outside the capture root (it
-                * most likely is). */
-               return WIMLIB_ERR_STAT;
-       } else {
-               *ino_ret = stbuf.st_ino;
-               *dev_ret = stbuf.st_dev;
-               return 0;
+       /* Copy as much of the link target as possible to the output buffer and
+        * return the number of bytes copied.  */
+       buf_ptr = buf;
+       if (rpfix_ok && altroot_len != 0) {
+               copy(&buf_ptr, &bufsize, altroot, altroot_len);
+       } else if (target_len == 0) {
+               /* An absolute link target that was made relative to the same
+                * directory pointed to will end up empty if the original target
+                * did not have a trailing slash.  Here, we are reading this
+                * adjusted link target without prefixing it.  This usually
+                * doesn't happen, but if it does then we need to change it to
+                * "/" so that it is a valid target.  */
+               target = "/";
+               target_len = 1;
        }
+       copy(&buf_ptr, &bufsize, target, target_len);
+       FREE(target_buffer);
+       return buf_ptr - buf;
 }
 
-#endif /* !defined(__WIN32__) */
-
-#ifdef __WIN32__
-#  include "win32.h"
-#  define RP_PATH_SEPARATOR L'\\'
-#  define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
-#  define os_get_ino_and_dev win32_get_file_and_vol_ids
-#else
-#  define RP_PATH_SEPARATOR '/'
-#  define is_rp_path_separator(c) ((c) == '/')
-#  define os_get_ino_and_dev unix_get_ino_and_dev
-#endif
+/* Given a UNIX-style symbolic link target, create a Windows-style reparse point
+ * buffer and assign it to the specified inode.  */
+int
+wim_inode_set_symlink(struct wim_inode *inode, const char *_target,
+                     struct blob_table *blob_table)
 
-/* Fix up absolute symbolic link targets--- mostly shared between UNIX and
- * Windows */
-tchar *
-capture_fixup_absolute_symlink(tchar *dest,
-                              u64 capture_root_ino, u64 capture_root_dev)
 {
-       tchar *p = dest;
-
-#ifdef __WIN32__
-       /* Skip drive letter */
-       if (!is_rp_path_separator(*dest))
-               p += 2;
-#endif
-
-       DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
-       for (;;) {
-               tchar save;
-               int ret;
-               u64 ino;
-               u64 dev;
-
-               while (is_rp_path_separator(*p))
-                       p++;
-
-               save = *p;
-               *p = T('\0');
-               ret = os_get_ino_and_dev(dest, &ino, &dev);
-               *p = save;
-
-               if (ret) /* stat() failed before we got to the capture root---
-                           assume the link points outside it. */
-                       return NULL;
-
-               if (ino == capture_root_ino && dev == capture_root_dev) {
-                       /* Link points inside capture root.  Return abbreviated
-                        * path. */
-                       if (*p == T('\0'))
-                               *(p - 1) = RP_PATH_SEPARATOR;
-                       while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
-                               p--;
-               #ifdef __WIN32__
-                       if (!is_rp_path_separator(dest[0])) {
-                               *--p = dest[1];
-                               *--p = dest[0];
-                       }
-               #endif
-                       wimlib_assert(p >= dest);
-                       return p;
-               }
+       int ret;
+       utf16lechar *target;
+       size_t target_nbytes;
+       struct link_reparse_point link;
+       struct reparse_buffer_disk rpbuf;
+       u16 rpbuflen;
+
+       /* Translate the link target to UTF-16LE.  */
+       ret = tstr_to_utf16le(_target, strlen(_target), &target, &target_nbytes);
+       if (ret)
+               return ret;
 
-               if (*p == T('\0')) {
-                       /* Link points outside capture root. */
-                       return NULL;
-               }
+       /* Translate forward slashes (UNIX path separator) to backslashes
+        * (Windows NT path separator).  In addition, translate backslashes to
+        * forward slashes; this enables lossless handling of UNIX symbolic link
+        * targets that contain the backslash character.  */
+       for (utf16lechar *p = target; *p; p++) {
+               if (*p == cpu_to_le16('/'))
+                       *p = cpu_to_le16('\\');
+               else if (*p == cpu_to_le16('\\'))
+                       *p = cpu_to_le16('/');
+       }
 
-               do {
-                       p++;
-               } while (!is_rp_path_separator(*p) && *p != T('\0'));
+       link.rptag = WIM_IO_REPARSE_TAG_SYMLINK;
+       link.rpreserved = 0;
+
+       /* Note: an absolute link that was rewritten to be relative to another
+        * directory is assumed to either be empty or to have a leading slash.
+        * See unix_relativize_link_target().  */
+       if (*target == cpu_to_le16('\\') || !*target) {
+               /*
+                * UNIX link target was absolute.  In this case we represent the
+                * link as a symlink reparse point with SYMBOLIC_LINK_RELATIVE
+                * cleared.  For this to work we need to assign it a path that
+                * can be resolved from the root of the Windows NT kernel object
+                * namespace.  We do this by using "\??\C:" as a dummy prefix.
+                *
+                * Note that we could instead represent UNIX absolute links by
+                * setting SYMBOLIC_LINK_RELATIVE and then leaving the path
+                * backslash-prefixed like "\Users\Public".  On Windows this is
+                * valid and denotes a path relative to the root of the
+                * filesystem on which the reparse point resides.  The problem
+                * with this is that neither WIMGAPI nor wimlib (on Windows)
+                * will do "reparse point fixups" when extracting such links
+                * (modifying the link target to point into the actual
+                * extraction directory).  So for the greatest cross-platform
+                * consistency, we have to use the fake C: drive approach.
+                */
+               static const utf16lechar prefix[6] = {
+                       cpu_to_le16('\\'),
+                       cpu_to_le16('?'),
+                       cpu_to_le16('?'),
+                       cpu_to_le16('\\'),
+                       cpu_to_le16('C'),
+                       cpu_to_le16(':'),
+               };
+
+               /* Do not show \??\ in print name  */
+               const size_t num_unprintable_chars = 4;
+
+               link.symlink_flags = 0;
+               link.substitute_name_nbytes = sizeof(prefix) + target_nbytes;
+               link.substitute_name = alloca(link.substitute_name_nbytes);
+               memcpy(link.substitute_name, prefix, sizeof(prefix));
+               memcpy(link.substitute_name + ARRAY_LEN(prefix), target, target_nbytes);
+               link.print_name_nbytes = link.substitute_name_nbytes -
+                                        (num_unprintable_chars * sizeof(utf16lechar));
+               link.print_name = link.substitute_name + num_unprintable_chars;
+       } else {
+               /* UNIX link target was relative.  In this case we represent the
+                * link as a symlink reparse point with SYMBOLIC_LINK_RELATIVE
+                * set.  This causes Windows to interpret the link relative to
+                * the directory containing the reparse point file.  */
+               link.symlink_flags = SYMBOLIC_LINK_RELATIVE;
+               link.substitute_name_nbytes = target_nbytes;
+               link.substitute_name = target;
+               link.print_name_nbytes = target_nbytes;
+               link.print_name = target;
        }
+
+       /* Generate the reparse buffer.  */
+       ret = make_link_reparse_point(&link, &rpbuf, &rpbuflen);
+       if (ret)
+               goto out_free_target;
+
+       /* Save the reparse data with the inode.  */
+       ret = WIMLIB_ERR_NOMEM;
+       if (!inode_add_stream_with_data(inode,
+                                       STREAM_TYPE_REPARSE_POINT,
+                                       NO_STREAM_NAME,
+                                       rpbuf.rpdata,
+                                       rpbuflen - REPARSE_DATA_OFFSET,
+                                       blob_table))
+               goto out_free_target;
+
+       /* The inode is now a reparse point.  */
+       inode->i_reparse_tag = link.rptag;
+       inode->i_attributes &= ~FILE_ATTRIBUTE_NORMAL;
+       inode->i_attributes |= FILE_ATTRIBUTE_REPARSE_POINT;
+
+       ret = 0;
+out_free_target:
+       FREE(target);
+       return ret;
 }
+
+#endif /* !__WIN32__ */