refactor reparse point code; Win32: working extract rpfix
authorEric Biggers <ebiggers3@gmail.com>
Sun, 28 Apr 2013 01:52:48 +0000 (20:52 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Sun, 28 Apr 2013 01:52:48 +0000 (20:52 -0500)
13 files changed:
Makefile.am
src/add_image.c
src/extract_image.c
src/mount_image.c
src/reparse.c [new file with mode: 0644]
src/resource.c
src/symlink.c [deleted file]
src/util.c
src/wim.c
src/wimlib.h
src/wimlib_internal.h
src/win32.c
src/xml.c

index 24c0764..1ba1c79 100644 (file)
@@ -44,7 +44,7 @@ libwim_la_SOURCES =           \
        src/sha1.c              \
        src/sha1.h              \
        src/split.c             \
-       src/symlink.c           \
+       src/reparse.c           \
        src/timestamp.h         \
        src/util.c              \
        src/util.h              \
index ff97bc1..d9e4ca2 100644 (file)
@@ -225,8 +225,7 @@ unix_capture_symlink(struct wim_dentry **root_p,
                        }
                        inode->i_not_rpfixed = 0;
                }
-               ret = inode_set_symlink(inode, dest,
-                                       params->lookup_table, NULL);
+               ret = wim_inode_set_symlink(inode, dest, params->lookup_table);
                if (ret == 0) {
                        /* Unfortunately, Windows seems to have the concept of
                         * "file" symbolic links as being different from
index 7effb4f..0d83a18 100644 (file)
@@ -326,11 +326,11 @@ extract_symlink(struct wim_dentry *dentry,
 {
        char target[4096 + args->target_realpath_len];
        char *fixed_target;
+       const struct wim_inode *inode = dentry->d_inode;
 
-       ssize_t ret = inode_readlink(dentry->d_inode,
-                                    target + args->target_realpath_len,
-                                    sizeof(target) - args->target_realpath_len - 1,
-                                    args->w, false);
+       ssize_t ret = wim_inode_readlink(inode,
+                                        target + args->target_realpath_len,
+                                        sizeof(target) - args->target_realpath_len - 1);
        struct wim_lookup_table_entry *lte;
 
        if (ret <= 0) {
@@ -357,11 +357,9 @@ extract_symlink(struct wim_dentry *dentry,
                                 output_path, fixed_target);
                return WIMLIB_ERR_LINK;
        }
-       lte = inode_unnamed_lte_resolved(dentry->d_inode);
-       wimlib_assert(lte != NULL);
        if (args->extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
                struct wimlib_unix_data unix_data;
-               ret = inode_get_unix_data(dentry->d_inode, &unix_data, NULL);
+               ret = inode_get_unix_data(inode, &unix_data, NULL);
                if (ret > 0)
                        ;
                else if (ret < 0)
@@ -371,6 +369,8 @@ extract_symlink(struct wim_dentry *dentry,
                if (ret)
                        return ret;
        }
+       lte = inode_unnamed_lte_resolved(inode);
+       wimlib_assert(lte != NULL);
        args->progress.extract.completed_bytes += wim_resource_size(lte);
        return 0;
 }
index b00a2a6..66e668a 100644 (file)
@@ -2026,7 +2026,7 @@ wimfs_readlink(const char *path, char *buf, size_t buf_len)
                return -EINVAL;
        if (buf_len == 0)
                return -ENAMETOOLONG;
-       ret = inode_readlink(inode, buf, buf_len - 1, ctx->wim, true);
+       ret = wim_inode_readlink(inode, buf, buf_len - 1);
        if (ret >= 0) {
                wimlib_assert(ret <= buf_len - 1);
                buf[ret] = '\0';
@@ -2217,11 +2217,14 @@ wimfs_symlink(const char *to, const char *from)
                            FILE_ATTRIBUTE_REPARSE_POINT, &dentry);
        if (ret == 0) {
                dentry->d_inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK;
-               if (inode_set_symlink(dentry->d_inode, to,
-                                     wimfs_ctx->wim->lookup_table, NULL))
-               {
+               ret = wim_inode_set_symlink(dentry->d_inode, to,
+                                           wimfs_ctx->wim->lookup_table);
+               if (ret) {
                        remove_dentry(dentry, wimfs_ctx->wim->lookup_table);
-                       ret = -ENOMEM;
+                       if (ret == WIMLIB_ERR_NOMEM)
+                               ret = -ENOMEM;
+                       else
+                               ret = -EIO;
                }
        }
        return ret;
diff --git a/src/reparse.c b/src/reparse.c
new file mode 100644 (file)
index 0000000..84890c7
--- /dev/null
@@ -0,0 +1,504 @@
+/*
+ * reparse.c
+ *
+ * Handle reparse data.
+ */
+
+/*
+ * Copyright (C) 2012, 2013 Eric Biggers
+ *
+ * This file is part of wimlib, a library for working with WIM files.
+ *
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ */
+
+#include "dentry.h"
+#include "buffer_io.h"
+#include "lookup_table.h"
+#include "sha1.h"
+#include <errno.h>
+
+static const utf16lechar volume_junction_prefix[11] = {
+       cpu_to_le16('\\'),
+       cpu_to_le16('\\'),
+       cpu_to_le16('?'),
+       cpu_to_le16('\\'),
+       cpu_to_le16('V'),
+       cpu_to_le16('o'),
+       cpu_to_le16('l'),
+       cpu_to_le16('u'),
+       cpu_to_le16('m'),
+       cpu_to_le16('e'),
+       cpu_to_le16('{'),
+};
+
+/* Parse the "substitute name" (link target) from a symbolic link or junction
+ * reparse point.
+ *
+ * Return value is:
+ *
+ * Non-negative integer:
+ *     The name is an absolute symbolic link in one of several formats,
+ *     and the return value is the number of UTF-16LE characters that need to
+ *     be advanced to reach a simple "absolute" path starting with a backslash
+ *     (i.e. skip over \??\ and/or drive letter)
+ * Negative integer:
+ *     SUBST_NAME_IS_VOLUME_JUNCTION:
+ *             The name is a volume junction.
+ *     SUBST_NAME_IS_RELATIVE_LINK:
+ *             The name is a relative symbolic link.
+ *     SUBST_NAME_IS_UNKNOWN:
+ *             The name does not appear to be a valid symbolic link, junction,
+ *             or mount point.
+ */
+int
+parse_substitute_name(const utf16lechar *substitute_name,
+                     u16 substitute_name_nbytes, u32 rptag)
+{
+       u16 substitute_name_nchars = substitute_name_nbytes / 2;
+
+       if (substitute_name_nchars >= 7 &&
+           substitute_name[0] == cpu_to_le16('\\') &&
+           substitute_name[1] == cpu_to_le16('?') &&
+           substitute_name[2] == cpu_to_le16('?') &&
+           substitute_name[3] == cpu_to_le16('\\') &&
+           substitute_name[4] != cpu_to_le16('\0') &&
+           substitute_name[5] == cpu_to_le16(':') &&
+           substitute_name[6] == cpu_to_le16('\\'))
+       {
+               /* "Full" symlink or junction (\??\x:\ prefixed path) */
+               return 6;
+       } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
+                  substitute_name_nchars >= 12 &&
+                  memcmp(substitute_name, volume_junction_prefix,
+                         sizeof(volume_junction_prefix)) == 0 &&
+                  substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\'))
+       {
+               /* Volume junction.  Can't really do anything with it. */
+               return SUBST_NAME_IS_VOLUME_JUNCTION;
+       } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
+                  substitute_name_nchars >= 3 &&
+                  substitute_name[0] != cpu_to_le16('\0') &&
+                  substitute_name[1] == cpu_to_le16(':') &&
+                  substitute_name[2] == cpu_to_le16('\\'))
+       {
+               /* "Absolute" symlink, with drive letter */
+               return 2;
+       } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK &&
+                  substitute_name_nchars >= 1)
+       {
+               if (substitute_name[0] == cpu_to_le16('\\'))
+                       /* "Absolute" symlink, without drive letter */
+                       return 0;
+               else
+                       /* "Relative" symlink, without drive letter */
+                       return SUBST_NAME_IS_RELATIVE_LINK;
+       } else {
+               return SUBST_NAME_IS_UNKNOWN;
+       }
+}
+
+/*
+ * Read the data from a symbolic link, junction, or mount point reparse point
+ * buffer into a `struct reparse_data'.
+ *
+ * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
+ * description of the format of the reparse point buffers.
+ */
+int
+parse_reparse_data(const u8 *rpbuf, u16 rpbuflen, struct reparse_data *rpdata)
+{
+       const u8 *p = rpbuf;
+       u16 substitute_name_offset;
+       u16 print_name_offset;
+
+       memset(rpdata, 0, sizeof(*rpdata));
+       if (rpbuflen < 16)
+               goto out_invalid;
+       p = get_u32(p, &rpdata->rptag);
+       wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK ||
+                     rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
+       p = get_u16(p, &rpdata->rpdatalen);
+       p = get_u16(p, &rpdata->rpreserved);
+       p = get_u16(p, &substitute_name_offset);
+       p = get_u16(p, &rpdata->substitute_name_nbytes);
+       p = get_u16(p, &print_name_offset);
+       p = get_u16(p, &rpdata->print_name_nbytes);
+       if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
+               if (rpbuflen < 20)
+                       goto out_invalid;
+               p = get_u32(p, &rpdata->rpflags);
+       }
+       if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes +
+           (p - rpbuf) > rpbuflen)
+               goto out_invalid;
+       if ((size_t)print_name_offset + rpdata->print_name_nbytes +
+           (p - rpbuf) > rpbuflen)
+               goto out_invalid;
+       rpdata->substitute_name = (utf16lechar*)&p[substitute_name_offset];
+       rpdata->print_name = (utf16lechar*)&p[print_name_offset];
+       return 0;
+out_invalid:
+       ERROR("Invalid reparse data");
+       return WIMLIB_ERR_INVALID_REPARSE_DATA;
+}
+
+/*
+ * Create a reparse point data buffer.
+ *
+ * @rpdata:  Structure that contains the data we need.
+ *
+ * @rpbuf:     Buffer into which to write the reparse point data buffer.  Must be
+ *             at least REPARSE_POINT_MAX_SIZE bytes long.
+ */
+int
+make_reparse_buffer(const struct reparse_data *rpdata, u8 *rpbuf)
+{
+       u8 *p = rpbuf;
+
+       p = put_u32(p, rpdata->rptag);
+       p += 2; /* We set ReparseDataLength later */
+       p = put_u16(p, rpdata->rpreserved);
+       p = put_u16(p, 0); /* substitute name offset */
+       p = put_u16(p, rpdata->substitute_name_nbytes); /* substitute name nbytes */
+       p = put_u16(p, rpdata->substitute_name_nbytes + 2); /* print name offset */
+       p = put_u16(p, rpdata->print_name_nbytes); /* print name nbytes */
+       if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK)
+               p = put_u32(p, rpdata->rpflags);
+       /* We null-terminate the substitute and print names, although this may
+        * not be strictly necessary.  Note that the byte counts should not
+        * include the null terminators. */
+       if (p + rpdata->substitute_name_nbytes +
+           rpdata->print_name_nbytes +
+           2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE)
+       {
+               ERROR("Reparse data is too long!");
+               return WIMLIB_ERR_INVALID_REPARSE_DATA;
+       }
+       p = put_bytes(p, rpdata->substitute_name_nbytes, rpdata->substitute_name);
+       p = put_u16(p, 0);
+       p = put_bytes(p, rpdata->print_name_nbytes, rpdata->print_name);
+       p = put_u16(p, 0);
+       put_u16(rpbuf + 4, p - rpbuf - 8); /* Set ReparseDataLength */
+       return 0;
+}
+
+/*
+ * Read the reparse data from a WIM inode that is a reparse point.
+ *
+ * @rpbuf points to a buffer at least REPARSE_POINT_MAX_SIZE bytes into which
+ * the reparse point data buffer will be reconstructed.
+ *
+ * Note: in the WIM format, the first 8 bytes of the reparse point data buffer
+ * are omitted, presumably because we already know the reparse tag from the
+ * dentry, and we already know the reparse tag length from the lookup table
+ * entry resource length.  However, we reconstruct the first 8 bytes in the
+ * buffer returned by this function.
+ */
+int
+wim_inode_get_reparse_data(const struct wim_inode *inode, u8 *rpbuf)
+{
+       struct wim_lookup_table_entry *lte;
+       int ret;
+
+       wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
+
+       lte = inode_unnamed_lte_resolved(inode);
+       if (!lte) {
+               ERROR("Reparse point has no reparse data!");
+               return WIMLIB_ERR_INVALID_REPARSE_DATA;
+       }
+       if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE - 8) {
+               ERROR("Reparse data is too long!");
+               return WIMLIB_ERR_INVALID_REPARSE_DATA;
+       }
+
+       /* Read the data from the WIM file */
+       ret = read_full_resource_into_buf(lte, rpbuf + 8, true);
+       if (ret)
+               return ret;
+
+       /* Reconstruct the first 8 bytes of the reparse point buffer */
+
+       /* ReparseTag */
+       put_u32(rpbuf, inode->i_reparse_tag);
+
+       /* ReparseDataLength */
+       put_u16(rpbuf + 4, wim_resource_size(lte));
+
+       /* ReparseReserved
+        * XXX this could be one of the unknown fields in the WIM dentry. */
+       put_u16(rpbuf + 6, 0);
+       return 0;
+}
+
+/* UNIX version of getting and setting the data in reparse points */
+#if !defined(__WIN32__)
+
+/* Get the UNIX symlink target from a WIM inode.  The inode may be either a
+ * "real" symlink (reparse tag WIM_IO_REPARSE_TAG_SYMLINK), or it may be a
+ * junction point (reparse tag WIM_IO_REPARSE_TAG_MOUNT_POINT).
+ *
+ * This has similar semantics to the UNIX readlink() function, except the path
+ * argument is swapped out with the `struct wim_inode' for a reparse point, and
+ * on failure a negated error code is returned rather than -1 with errno set.  */
+ssize_t
+wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize)
+{
+       int ret;
+       u8 rpbuf[REPARSE_POINT_MAX_SIZE];
+       u16 rpdatalen;
+       struct reparse_data rpdata;
+       char *link_target;
+       char *translated_target;
+       size_t link_target_len;
+
+       wimlib_assert(inode_is_symlink(inode));
+
+       if (wim_inode_get_reparse_data(inode, rpbuf))
+               return -EIO;
+
+       get_u16(rpbuf + 4, &rpdatalen);
+
+       if (parse_reparse_data(rpbuf, rpdatalen + 8, &rpdata))
+               return -EIO;
+
+       ret = utf16le_to_tstr(rpdata.substitute_name,
+                             rpdata.substitute_name_nbytes,
+                             &link_target, &link_target_len);
+       if (ret)
+               return -errno;
+
+       translated_target = link_target;
+       ret = parse_substitute_name(rpdata.substitute_name,
+                                   rpdata.substitute_name_nbytes,
+                                   rpdata.rptag);
+       switch (ret) {
+       case SUBST_NAME_IS_RELATIVE_LINK:
+               goto out_translate_slashes;
+       case SUBST_NAME_IS_VOLUME_JUNCTION:
+               goto out_have_link;
+       case SUBST_NAME_IS_UNKNOWN:
+               ERROR("Can't understand reparse point "
+                     "substitute name \"%s\"", link_target);
+               return -EIO;
+       default:
+               translated_target += ret;
+               link_target_len -= ret;
+               break;
+       }
+
+out_translate_slashes:
+       for (size_t i = 0; i < link_target_len; i++)
+               if (translated_target[i] == '\\')
+                       translated_target[i] = '/';
+out_have_link:
+       if (link_target_len > bufsize) {
+               link_target_len = bufsize;
+               ret = -ENAMETOOLONG;
+       } else {
+               ret = link_target_len;
+       }
+       memcpy(buf, translated_target, link_target_len);
+       FREE(link_target);
+       return ret;
+}
+
+#ifdef HAVE_ALLOCA_H
+#  include <alloca.h>
+#endif
+
+int
+wim_inode_set_symlink(struct wim_inode *inode,
+                     const char *target,
+                     struct wim_lookup_table *lookup_table)
+
+{
+       u8 rpbuf[REPARSE_POINT_MAX_SIZE];
+       u16 rpdatalen;
+       struct reparse_data rpdata;
+       static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
+       static const char abs_print_name_prefix[4] = "C\0:\0";
+       utf16lechar *name_utf16le;
+       size_t name_utf16le_nbytes;
+       int ret;
+
+       DEBUG("Creating reparse point data buffer for UNIX "
+             "symlink target \"%s\"", target);
+       memset(&rpdata, 0, sizeof(rpdata));
+       ret = tstr_to_utf16le(target, strlen(target),
+                             &name_utf16le, &name_utf16le_nbytes);
+       if (ret)
+               return ret;
+
+       for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
+               if (name_utf16le[i] == cpu_to_le16('/'))
+                       name_utf16le[i] = cpu_to_le16('\\');
+
+       /* Compatability notes:
+        *
+        * On UNIX, an absolute symbolic link begins with '/'; everything else
+        * is a relative symbolic link.  (Quite simple compared to the various
+        * ways to provide Windows paths.)
+        *
+        * To change a UNIX relative symbolic link to Windows format, we only
+        * need to translate it to UTF-16LE and replace backslashes with forward
+        * slashes.  We do not make any attempt to handle filename character
+        * problems, such as a link target that itself contains backslashes on
+        * UNIX.  Then, for these relative links, we set the reparse header
+        * @flags field to SYMBOLIC_LINK_RELATIVE.
+        *
+        * For UNIX absolute symbolic links, we must set the @flags field to 0.
+        * Then, there are multiple options as to actually represent the
+        * absolute link targets:
+        *
+        * (1) An absolute path beginning with one backslash character. similar
+        * to UNIX-style, just with a different path separator.  Print name same
+        * as substitute name.
+        *
+        * (2) Absolute path beginning with drive letter followed by a
+        * backslash.  Print name same as substitute name.
+        *
+        * (3) Absolute path beginning with drive letter followed by a
+        * backslash; substitute name prefixed with \??\, otherwise same as
+        * print name.
+        *
+        * We choose option (3) here, and we just assume C: for the drive
+        * letter.  The reasoning for this is:
+        *
+        * (1) Microsoft imagex.exe has a bug where it does not attempt to do
+        * reparse point fixups for these links, even though they are valid
+        * absolute links.  (Note: in this case prefixing the substitute name
+        * with \??\ does not work; it just makes the data unable to be restored
+        * at all.)
+        * (2) Microsoft imagex.exe will fail when doing reparse point fixups
+        * for these.  It apparently contains a bug that causes it to create an
+        * invalid reparse point, which then cannot be restored.
+        * (3) This is the only option I tested for which reparse point fixups
+        * worked properly in Microsoft imagex.exe.
+        *
+        * So option (3) it is.
+        */
+
+       rpdata.rptag = inode->i_reparse_tag;
+       if (target[0] == '/') {
+               rpdata.substitute_name_nbytes = name_utf16le_nbytes +
+                                               sizeof(abs_subst_name_prefix);
+               rpdata.print_name_nbytes = name_utf16le_nbytes +
+                                          sizeof(abs_print_name_prefix);
+               rpdata.substitute_name = alloca(rpdata.substitute_name_nbytes);
+               rpdata.print_name = alloca(rpdata.print_name_nbytes);
+               memcpy(rpdata.substitute_name, abs_subst_name_prefix,
+                      sizeof(abs_subst_name_prefix));
+               memcpy(rpdata.print_name, abs_print_name_prefix,
+                      sizeof(abs_print_name_prefix));
+               memcpy((void*)rpdata.substitute_name + sizeof(abs_subst_name_prefix),
+                      name_utf16le, name_utf16le_nbytes);
+               memcpy((void*)rpdata.print_name + sizeof(abs_print_name_prefix),
+                      name_utf16le, name_utf16le_nbytes);
+       } else {
+               rpdata.substitute_name_nbytes = name_utf16le_nbytes;
+               rpdata.print_name_nbytes = name_utf16le_nbytes;
+               rpdata.substitute_name = name_utf16le;
+               rpdata.print_name = name_utf16le;
+               rpdata.rpflags = SYMBOLIC_LINK_RELATIVE;
+       }
+
+       ret = make_reparse_buffer(&rpdata, rpbuf);
+       if (ret == 0) {
+               get_u16(rpbuf + 4, &rpdatalen);
+               ret = inode_set_unnamed_stream(inode, rpbuf + 8, rpdatalen,
+                                              lookup_table);
+       }
+       FREE(name_utf16le);
+       return ret;
+}
+
+#include <sys/stat.h>
+
+static int
+unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
+{
+       struct stat stbuf;
+       if (stat(path, &stbuf)) {
+               WARNING_WITH_ERRNO("Failed to stat \"%s\"", path);
+               /* Treat as a link pointing outside the capture root (it
+                * most likely is). */
+               return WIMLIB_ERR_STAT;
+       } else {
+               *ino_ret = stbuf.st_ino;
+               *dev_ret = stbuf.st_dev;
+               return 0;
+       }
+}
+
+#endif /* !defined(__WIN32__) */
+
+#ifdef __WIN32__
+#  include "win32.h"
+#  define RP_PATH_SEPARATOR L'\\'
+#  define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
+#  define os_get_ino_and_dev win32_get_file_and_vol_ids
+#else
+#  define RP_PATH_SEPARATOR '/'
+#  define is_rp_path_separator(c) ((c) == '/')
+#  define os_get_ino_and_dev unix_get_ino_and_dev
+#endif
+
+/* Fix up absolute symbolic link targets--- mostly shared between UNIX and
+ * Windows */
+tchar *
+fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev)
+{
+       tchar *p = dest;
+
+       DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
+       for (;;) {
+               tchar save;
+               int ret;
+               u64 ino;
+               u64 dev;
+
+               while (is_rp_path_separator(*p))
+                       p++;
+
+               save = *p;
+               *p = T('\0');
+               ret = os_get_ino_and_dev(dest, &ino, &dev);
+               *p = save;
+
+               if (ret) /* stat() failed before we got to the capture root---
+                           assume the link points outside it. */
+                       return NULL;
+
+               if (ino == capture_root_ino && dev == capture_root_dev) {
+                       /* Link points inside capture root.  Return abbreviated
+                        * path. */
+                       if (*p == T('\0'))
+                               *(p - 1) = RP_PATH_SEPARATOR;
+                       while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
+                               p--;
+                       return p;
+               }
+
+               if (*p == T('\0')) {
+                       /* Link points outside capture root. */
+                       return NULL;
+               }
+
+               do {
+                       p++;
+               } while (!is_rp_path_separator(*p) && *p != T('\0'));
+       }
+}
index 8c88129..c7eedab 100644 (file)
@@ -463,9 +463,9 @@ put_resource_entry(void *p, const struct resource_entry *entry)
 static FILE *
 wim_get_fp(WIMStruct *w)
 {
+       FILE *fp;
 #if defined(WITH_FUSE) || defined(ENABLE_MULTITHREADED_COMPRESSION)
        pthread_mutex_lock(&w->fp_tab_mutex);
-       FILE *fp;
 
        wimlib_assert(w->filename != NULL);
 
diff --git a/src/symlink.c b/src/symlink.c
deleted file mode 100644 (file)
index f005cd7..0000000
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * symlink.c
- *
- * Code to read and set symbolic links in WIM files.
- */
-
-/*
- * Copyright (C) 2012, 2013 Eric Biggers
- *
- * This file is part of wimlib, a library for working with WIM files.
- *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
- */
-
-#include "dentry.h"
-#include "buffer_io.h"
-#include "lookup_table.h"
-#include "sha1.h"
-#include <errno.h>
-
-/* UNIX version of getting and setting the data in reparse points */
-#if !defined(__WIN32__)
-
-#include <sys/stat.h>
-
-#ifdef HAVE_ALLOCA_H
-#  include <alloca.h>
-#endif
-
-/*
- * Find the symlink target of a symbolic link or junction point in the WIM.
- *
- * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a
- * description of the format of the so-called "reparse point data buffers".
- *
- * But, in the WIM format, the first 8 bytes of the reparse point data buffer
- * are omitted, presumably because we already know the reparse tag from the
- * dentry, and we already know the reparse tag length from the lookup table
- * entry resource length.
- */
-static ssize_t
-get_symlink_name(const void *resource, size_t resource_len, char *buf,
-                size_t buf_len, u32 reparse_tag)
-{
-       const void *p = resource;
-       u16 substitute_name_offset;
-       u16 substitute_name_len;
-       u16 print_name_offset;
-       u16 print_name_len;
-       char *link_target;
-       char *translated_target;
-       size_t link_target_len;
-       ssize_t ret;
-       unsigned header_size;
-       bool translate_slashes;
-
-       if (resource_len < 12)
-               return -EIO;
-       p = get_u16(p, &substitute_name_offset);
-       p = get_u16(p, &substitute_name_len);
-       p = get_u16(p, &print_name_offset);
-       p = get_u16(p, &print_name_len);
-
-       wimlib_assert(reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK ||
-                     reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
-
-       if (reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT)
-               header_size = 8;
-       else {
-               header_size = 12;
-               p += 4;
-       }
-       if (header_size +
-           substitute_name_offset + substitute_name_len > resource_len)
-               return -EIO;
-
-       ret = utf16le_to_tstr((const utf16lechar*)(p + substitute_name_offset),
-                             substitute_name_len,
-                             &link_target, &link_target_len);
-       if (ret)
-               return -errno;
-
-       DEBUG("Interpeting substitute name \"%s\" (ReparseTag=0x%x)",
-             link_target, reparse_tag);
-       translate_slashes = true;
-       translated_target = link_target;
-       if (link_target_len >= 7 &&
-           translated_target[0] == '\\' &&
-           translated_target[1] == '?' &&
-           translated_target[2] == '?' &&
-           translated_target[3] == '\\' &&
-           translated_target[4] != '\0' &&
-           translated_target[5] == ':' &&
-           translated_target[6] == '\\')
-       {
-               /* "Full" symlink or junction (\??\x:\ prefixed path) */
-               translated_target += 6;
-               link_target_len -= 6;
-       } else if (reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT &&
-                  link_target_len >= 12 &&
-                  memcmp(translated_target, "\\\\?\\Volume{", 11) == 0 &&
-                  translated_target[link_target_len - 1] == '\\')
-       {
-               /* Volume junction.  Can't really do anything with it. */
-               translate_slashes = false;
-       } else if (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK &&
-                  link_target_len >= 3 &&
-                  translated_target[0] != '\0' &&
-                  translated_target[1] == ':' &&
-                  translated_target[2] == '\\')
-       {
-               /* "Absolute" symlink, with drive letter */
-               translated_target += 2;
-               link_target_len -= 2;
-       } else if (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK &&
-                  link_target_len >= 1)
-       {
-               if (translated_target[0] == '\\')
-                       /* "Absolute" symlink, without drive letter */
-                       ;
-               else
-                       /* "Relative" symlink, without drive letter */
-                       ;
-       } else {
-               ERROR("Invalid reparse point substitute name: \"%s\"", translated_target);
-               ret = -EIO;
-               goto out;
-       }
-
-       if (translate_slashes)
-               for (size_t i = 0; i < link_target_len; i++)
-                       if (translated_target[i] == '\\')
-                               translated_target[i] = '/';
-
-       if (link_target_len > buf_len) {
-               link_target_len = buf_len;
-               ret = -ENAMETOOLONG;
-       } else {
-               ret = link_target_len;
-       }
-       memcpy(buf, translated_target, link_target_len);
-out:
-       FREE(link_target);
-       return ret;
-}
-
-#define SYMBOLIC_LINK_RELATIVE 0x00000001
-
-/* Given a UNIX symlink target, prepare the corresponding symbolic link reparse
- * data buffer. */
-static int
-make_symlink_reparse_data_buf(const char *symlink_target, void *rpdata,
-                             size_t *rplen_ret)
-{
-       int ret;
-       utf16lechar *name_utf16le;
-       size_t name_utf16le_nbytes;
-       size_t substitute_name_nbytes;
-       size_t print_name_nbytes;
-       static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
-       static const char abs_print_name_prefix[4] = "C\0:\0";
-       u32 flags;
-       size_t rplen;
-       void *p;
-
-       ret = tstr_to_utf16le(symlink_target, strlen(symlink_target),
-                             &name_utf16le, &name_utf16le_nbytes);
-       if (ret)
-               return ret;
-
-       for (size_t i = 0; i < name_utf16le_nbytes / 2; i++)
-               if (name_utf16le[i] == cpu_to_le16('/'))
-                       name_utf16le[i] = cpu_to_le16('\\');
-
-       /* Compatability notes:
-        *
-        * On UNIX, an absolute symbolic link begins with '/'; everything else
-        * is a relative symbolic link.  (Quite simple compared to the various
-        * ways to provide Windows paths.)
-        *
-        * To change a UNIX relative symbolic link to Windows format, we only
-        * need to translate it to UTF-16LE and replace backslashes with forward
-        * slashes.  We do not make any attempt to handle filename character
-        * problems, such as a link target that itself contains backslashes on
-        * UNIX.  Then, for these relative links, we set the reparse header
-        * @flags field to SYMBOLIC_LINK_RELATIVE.
-        *
-        * For UNIX absolute symbolic links, we must set the @flags field to 0.
-        * Then, there are multiple options as to actually represent the
-        * absolute link targets:
-        *
-        * (1) An absolute path beginning with one backslash character. similar
-        * to UNIX-style, just with a different path separator.  Print name same
-        * as substitute name.
-        *
-        * (2) Absolute path beginning with drive letter followed by a
-        * backslash.  Print name same as substitute name.
-        *
-        * (3) Absolute path beginning with drive letter followed by a
-        * backslash; substitute name prefixed with \??\, otherwise same as
-        * print name.
-        *
-        * We choose option (3) here, and we just assume C: for the drive
-        * letter.  The reasoning for this is:
-        *
-        * (1) Microsoft imagex.exe has a bug where it does not attempt to do
-        * reparse point fixups for these links, even though they are valid
-        * absolute links.  (Note: in this case prefixing the substitute name
-        * with \??\ does not work; it just makes the data unable to be restored
-        * at all.)
-        * (2) Microsoft imagex.exe will fail when doing reparse point fixups
-        * for these.  It apparently contains a bug that causes it to create an
-        * invalid reparse point, which then cannot be restored.
-        * (3) This is the only option I tested for which reparse point fixups
-        * worked properly in Microsoft imagex.exe.
-        *
-        * So option (3) it is.
-        */
-
-       substitute_name_nbytes = name_utf16le_nbytes;
-       print_name_nbytes = name_utf16le_nbytes;
-       if (symlink_target[0] == '/') {
-               substitute_name_nbytes += sizeof(abs_subst_name_prefix);
-               print_name_nbytes += sizeof(abs_print_name_prefix);
-       }
-
-       rplen = 12 + substitute_name_nbytes + print_name_nbytes +
-                       2 * sizeof(utf16lechar);
-
-       if (rplen > REPARSE_POINT_MAX_SIZE) {
-               ERROR("Symlink \"%s\" is too long!", symlink_target);
-               return WIMLIB_ERR_LINK;
-       }
-
-       p = rpdata;
-
-       /* Substitute name offset */
-       p = put_u16(p, 0);
-
-       /* Substitute name length */
-       p = put_u16(p, substitute_name_nbytes);
-
-       /* Print name offset */
-       p = put_u16(p, substitute_name_nbytes + sizeof(utf16lechar));
-
-       /* Print name length */
-       p = put_u16(p, print_name_nbytes);
-
-       /* Flags */
-       flags = 0;
-       if (symlink_target[0] != '/')
-               flags |= SYMBOLIC_LINK_RELATIVE;
-       p = put_u32(p, flags);
-
-       /* Substitute name */
-       if (symlink_target[0] == '/')
-               p = put_bytes(p, sizeof(abs_subst_name_prefix), abs_subst_name_prefix);
-       p = put_bytes(p, name_utf16le_nbytes, name_utf16le);
-       p = put_u16(p, 0);
-
-       /* Print name */
-       if (symlink_target[0] == '/')
-               p = put_bytes(p, sizeof(abs_print_name_prefix), abs_print_name_prefix);
-       p = put_bytes(p, name_utf16le_nbytes, name_utf16le);
-       p = put_u16(p, 0);
-
-       *rplen_ret = rplen;
-       ret = 0;
-out_free_name_utf16le:
-       FREE(name_utf16le);
-       return ret;
-}
-
-/* Get the symlink target from a WIM inode.
- *
- * The inode may be either a "real" symlink (reparse tag
- * WIM_IO_REPARSE_TAG_SYMLINK), or it may be a junction point (reparse tag
- * WIM_IO_REPARSE_TAG_MOUNT_POINT).
- */
-ssize_t
-inode_readlink(const struct wim_inode *inode, char *buf, size_t buf_len,
-              const WIMStruct *w, bool threadsafe)
-{
-       const struct wim_lookup_table_entry *lte;
-       int ret;
-       u8 *res_buf;
-
-       wimlib_assert(inode_is_symlink(inode));
-
-       lte = inode_unnamed_lte(inode, w->lookup_table);
-       if (!lte)
-               return -EIO;
-
-       if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE)
-               return -EIO;
-
-       res_buf = alloca(wim_resource_size(lte));
-       ret = read_full_resource_into_buf(lte, res_buf, threadsafe);
-       if (ret)
-               return -EIO;
-       return get_symlink_name(res_buf, wim_resource_size(lte),
-                               buf, buf_len, inode->i_reparse_tag);
-}
-
-/*
- * Sets @inode to be a symbolic link pointing to @target.
- *
- * A lookup table entry for the symbolic link data buffer is created and
- * inserted into @lookup_table, unless there is an existing lookup table entry
- * for the exact same data, in which its reference count is incremented.
- *
- * The lookup table entry is returned in @lte_ret.
- *
- * On failure @dentry and @lookup_table are not modified.
- */
-int
-inode_set_symlink(struct wim_inode *inode,
-                 const char *target,
-                 struct wim_lookup_table *lookup_table,
-                 struct wim_lookup_table_entry **lte_ret)
-
-{
-       int ret;
-
-       /* Buffer for reparse point data */
-       u8 rpdata[REPARSE_POINT_MAX_SIZE];
-
-       /* Actual length of the reparse point data (to be calculated by
-        * make_symlink_reparse_data_buf()) */
-       size_t rplen;
-
-       DEBUG("Creating reparse point data buffer "
-             "for UNIX symlink target \"%s\"", target);
-
-       ret = make_symlink_reparse_data_buf(target, rpdata, &rplen);
-       if (ret)
-               return ret;
-
-       ret = inode_set_unnamed_stream(inode, rpdata, rplen, lookup_table);
-       if (ret)
-               return ret;
-
-       if (lte_ret)
-               *lte_ret = inode->i_lte;
-       return 0;
-}
-
-static int
-unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
-{
-       struct stat stbuf;
-       if (stat(path, &stbuf)) {
-               WARNING_WITH_ERRNO("Failed to stat \"%s\"", path);
-               /* Treat as a link pointing outside the capture root (it
-                * most likely is). */
-               return WIMLIB_ERR_STAT;
-       } else {
-               *ino_ret = stbuf.st_ino;
-               *dev_ret = stbuf.st_dev;
-               return 0;
-       }
-}
-
-#endif /* !defined(__WIN32__) */
-
-#ifdef __WIN32__
-#  include "win32.h"
-#  define RP_PATH_SEPARATOR L'\\'
-#  define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
-#  define os_get_ino_and_dev win32_get_file_and_vol_ids
-#else
-#  define RP_PATH_SEPARATOR '/'
-#  define is_rp_path_separator(c) ((c) == '/')
-#  define os_get_ino_and_dev unix_get_ino_and_dev
-#endif
-
-/* Fix up absolute symbolic link targets--- mostly shared between UNIX and
- * Windows */
-tchar *
-fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev)
-{
-       tchar *p = dest;
-
-       DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
-       for (;;) {
-               tchar save;
-               int ret;
-               u64 ino;
-               u64 dev;
-
-               while (is_rp_path_separator(*p))
-                       p++;
-
-               save = *p;
-               *p = T('\0');
-               ret = os_get_ino_and_dev(dest, &ino, &dev);
-               *p = save;
-
-               if (ret) /* stat() failed before we got to the capture root---
-                           assume the link points outside it. */
-                       return NULL;
-
-               if (ino == capture_root_ino && dev == capture_root_dev) {
-                       /* Link points inside capture root.  Return abbreviated
-                        * path. */
-                       if (*p == T('\0'))
-                               *(p - 1) = RP_PATH_SEPARATOR;
-                       while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
-                               p--;
-                       return p;
-               }
-
-               if (*p == T('\0')) {
-                       /* Link points outside capture root. */
-                       return NULL;
-               }
-
-               do {
-                       p++;
-               } while (!is_rp_path_separator(*p) && *p != T('\0'));
-       }
-}
index 23dfff2..f778fba 100644 (file)
@@ -301,6 +301,8 @@ static const tchar *error_strings[] = {
                = T("An invalid parameter was given"),
        [WIMLIB_ERR_INVALID_PART_NUMBER]
                = T("The part number or total parts of the WIM is invalid"),
+       [WIMLIB_ERR_INVALID_REPARSE_DATA]
+               = T("The reparse data of a reparse point was invalid"),
        [WIMLIB_ERR_INVALID_RESOURCE_HASH]
                = T("The SHA1 message digest of a WIM resource did not match the expected value"),
        [WIMLIB_ERR_INVALID_RESOURCE_SIZE]
@@ -347,6 +349,8 @@ static const tchar *error_strings[] = {
                = T("Could not rename a file"),
        [WIMLIB_ERR_REOPEN]
                = T("Could not re-open the WIM after overwriting it"),
+       [WIMLIB_ERR_REPARSE_POINT_FIXUP_FAILED]
+               = T("Unable to complete reparse point fixup"),
        [WIMLIB_ERR_RESOURCE_ORDER]
                = T("The components of the WIM were arranged in an unexpected order"),
        [WIMLIB_ERR_SPECIAL_FILE]
index 0900fea..a257581 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -650,15 +650,13 @@ wimlib_free(WIMStruct *w)
        if (w->out_fp)
                fclose(w->out_fp);
 
-#ifdef WITH_FUSE
+#if defined(WITH_FUSE) || defined(ENABLE_MULTITHREADED_COMPRESSION)
        if (w->fp_tab) {
                for (size_t i = 0; i < w->num_allocated_fps; i++)
                        if (w->fp_tab[i])
                                fclose(w->fp_tab[i]);
                FREE(w->fp_tab);
        }
-#endif
-#if defined(WITH_FUSE) || defined(ENABLE_MULTITHREADED_COMPRESSION)
        pthread_mutex_destroy(&w->fp_tab_mutex);
 #endif
 
index 84ee830..89d6456 100644 (file)
@@ -956,6 +956,7 @@ enum wimlib_error_code {
        WIMLIB_ERR_INVALID_OVERLAY,
        WIMLIB_ERR_INVALID_PARAM,
        WIMLIB_ERR_INVALID_PART_NUMBER,
+       WIMLIB_ERR_INVALID_REPARSE_DATA,
        WIMLIB_ERR_INVALID_RESOURCE_HASH,
        WIMLIB_ERR_INVALID_RESOURCE_SIZE,
        WIMLIB_ERR_INVALID_SECURITY_DATA,
@@ -977,6 +978,7 @@ enum wimlib_error_code {
        WIMLIB_ERR_READLINK,
        WIMLIB_ERR_RENAME,
        WIMLIB_ERR_REOPEN,
+       WIMLIB_ERR_REPARSE_POINT_FIXUP_FAILED,
        WIMLIB_ERR_RESOURCE_ORDER,
        WIMLIB_ERR_SPECIAL_FILE,
        WIMLIB_ERR_SPLIT_INVALID,
index 1dfc436..4650f4d 100644 (file)
@@ -601,6 +601,73 @@ extern int
 do_ntfs_umount(struct _ntfs_volume *vol);
 #endif
 
+/* reparse.c */
+
+/* Structured format for symbolic link, junction point, or mount point reparse
+ * data. */
+struct reparse_data {
+       /* Reparse point tag (see WIM_IO_REPARSE_TAG_* values) */
+       u32 rptag;
+
+       /* Length of reparse data, not including the 8-byte header (ReparseTag,
+        * ReparseDataLength, ReparseReserved) */
+       u16 rpdatalen;
+
+       /* ReparseReserved */
+       u16 rpreserved;
+
+       /* Flags (only for WIM_IO_REPARSE_TAG_SYMLINK reparse points).
+        * SYMBOLIC_LINK_RELATIVE means this is a relative symbolic link;
+        * otherwise should be set to 0. */
+#define SYMBOLIC_LINK_RELATIVE 0x00000001
+       u32 rpflags;
+
+       /* Pointer to the substitute name of the link (UTF-16LE). */
+       utf16lechar *substitute_name;
+
+       /* Pointer to the print name of the link (UTF-16LE). */
+       utf16lechar *print_name;
+
+       /* Number of bytes of the substitute name, not including null terminator
+        * if present */
+       u16 substitute_name_nbytes;
+
+       /* Number of bytes of the print name, not including null terminator if
+        * present */
+       u16 print_name_nbytes;
+};
+
+enum {
+       SUBST_NAME_IS_RELATIVE_LINK = -1,
+       SUBST_NAME_IS_VOLUME_JUNCTION = -2,
+       SUBST_NAME_IS_UNKNOWN = -3,
+};
+extern int
+parse_substitute_name(const utf16lechar *substitute_name,
+                     u16 substitute_name_nbytes,
+                     u32 rptag);
+
+extern int
+parse_reparse_data(const u8 *rpbuf, u16 rpbuflen, struct reparse_data *rpdata);
+
+extern int
+make_reparse_buffer(const struct reparse_data *rpdata, u8 *buf);
+
+extern int
+wim_inode_get_reparse_data(const struct wim_inode *inode, u8 *rpbuf);
+
+#ifndef __WIN32__
+ssize_t
+wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t buf_len);
+
+extern int
+wim_inode_set_symlink(struct wim_inode *inode, const char *target,
+                     struct wim_lookup_table *lookup_table);
+#endif
+extern tchar *
+fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev);
+
+
 /* resource.c */
 
 #define WIMLIB_RESOURCE_FLAG_RAW               0x1
@@ -663,21 +730,6 @@ write_security_data(const struct wim_security_data *sd, u8 *p);
 extern void
 free_security_data(struct wim_security_data *sd);
 
-/* symlink.c */
-
-#ifndef __WIN32__
-ssize_t
-inode_readlink(const struct wim_inode *inode, char *buf, size_t buf_len,
-              const WIMStruct *w, bool threadsafe);
-
-extern int
-inode_set_symlink(struct wim_inode *inode, const char *target,
-                 struct wim_lookup_table *lookup_table,
-                 struct wim_lookup_table_entry **lte_ret);
-#endif
-extern tchar *
-fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev);
-
 /* verify.c */
 
 extern int
index ae2dae8..cbc72a6 100644 (file)
@@ -660,62 +660,31 @@ enum rp_status {
  * be reached via multiple destinations due to other symbolic links).  This may
  * not work properly on FAT, which doesn't seem to supply proper inode numbers
  * or file IDs.  However, FAT doesn't support reparse points so this function
- * wouldn't even be called anyway.  */
+ * wouldn't even be called anyway.
+ */
 static enum rp_status
-win32_maybe_rpfix_target(wchar_t *target, size_t *target_nchars_p,
-                        u64 capture_root_ino, u64 capture_root_dev)
+win32_capture_maybe_rpfix_target(wchar_t *target, u16 *target_nbytes_p,
+                                u64 capture_root_ino, u64 capture_root_dev,
+                                u32 rptag)
 {
-       size_t target_nchars= *target_nchars_p;
+       u16 target_nchars = *target_nbytes_p / 2;
        size_t stripped_chars;
        wchar_t *orig_target;
+       int ret;
 
-       if (target_nchars == 0)
-               /* Invalid reparse point (empty target) */
+       ret = parse_substitute_name(target, *target_nbytes_p, rptag);
+       if (ret < 0)
                return RP_NOT_FIXED;
-
-       if (target[0] == L'\\') {
-               if (target_nchars >= 2 && target[1] == L'\\') {
-                       /* Probably a volume.  Can't do anything with it. */
-                       DEBUG("Not fixing target (probably a volume)");
-                       return RP_NOT_FIXED;
-               } else if (target_nchars >= 7 &&
-                          target[1] == '?' &&
-                          target[2] == '?' &&
-                          target[3] == '\\' &&
-                          target[4] != '\0' &&
-                          target[5] == ':' &&
-                          target[6] == '\\')
-               {
-                       DEBUG("Full style path");
-                       /* Full \??\x:\ style path (may be junction or symlink)
-                        * */
-                       stripped_chars = 6;
-               } else {
-                       DEBUG("Absolute target without drive letter");
-                       /* Absolute target, without drive letter */
-                       stripped_chars = 0;
-               }
-       } else if (target_nchars >= 3 &&
-                  target[0] != L'\0' &&
-                  target[1] == L':' &&
-                  target[2] == L'\\')
-       {
-               DEBUG("Absolute target with drive letter");
-               /* Absolute target, with drive letter */
-               stripped_chars = 2;
-       } else {
-               DEBUG("Relative symlink or other link");
-               /* Relative symlink or other unexpected format */
-               return RP_NOT_FIXED;
-       }
+       stripped_chars = ret;
        target[target_nchars] = L'\0';
        orig_target = target;
-       target = fixup_symlink(target + stripped_chars, capture_root_ino, capture_root_dev);
+       target = fixup_symlink(target + stripped_chars,
+                              capture_root_ino, capture_root_dev);
        if (!target)
                return RP_EXCLUDED;
        target_nchars = wcslen(target);
        wmemmove(orig_target + stripped_chars, target, target_nchars + 1);
-       *target_nchars_p = target_nchars + stripped_chars;
+       *target_nbytes_p = (target_nchars + stripped_chars) * sizeof(wchar_t);
        DEBUG("Fixed reparse point (new target: \"%ls\")", orig_target);
        if (stripped_chars == 6)
                return RP_FIXED_FULLPATH;
@@ -723,92 +692,89 @@ win32_maybe_rpfix_target(wchar_t *target, size_t *target_nchars_p,
                return RP_FIXED_ABSPATH;
 }
 
-static enum rp_status
-win32_try_capture_rpfix(char *rpbuf, DWORD *rpbuflen_p,
+/* Returns: `enum rp_status' value on success; negative WIMLIB_ERR_* value on
+ * failure. */
+static int
+win32_capture_try_rpfix(u8 *rpbuf, u16 *rpbuflen_p,
                        u64 capture_root_ino, u64 capture_root_dev)
 {
-       const char *p_get;
-       char *p_put;
-       u16 substitute_name_offset;
-       u16 substitute_name_len;
-       wchar_t *target;
-       size_t target_nchars;
-       enum rp_status status;
-       u32 rptag;
-       DWORD rpbuflen = *rpbuflen_p;
-
-       if (rpbuflen < 16) /* Invalid reparse point (length too small) */
-               return RP_NOT_FIXED;
-       p_get = get_u32(rpbuf, &rptag);
-       p_get += 4;
-       p_get = get_u16(p_get, &substitute_name_offset);
-       p_get = get_u16(p_get, &substitute_name_len);
-       p_get += 4;
-       if (rptag == WIM_IO_REPARSE_TAG_SYMLINK) {
-               if (rpbuflen < 20) /* Invalid reparse point (length too small) */
-                       return RP_NOT_FIXED;
-               p_get += 4;
-       }
-       if ((DWORD)substitute_name_offset +
-           substitute_name_len + (p_get - rpbuf) > rpbuflen)
-               /* Invalid reparse point (length too small) */
-               return RP_NOT_FIXED;
-
-       target = (wchar_t*)&p_get[substitute_name_offset];
-       target_nchars = substitute_name_len / 2;
-       /* Note: target is not necessarily null-terminated */
-
-       status = win32_maybe_rpfix_target(target, &target_nchars,
-                                         capture_root_ino, capture_root_dev);
-       if (status & RP_FIXED) {
-               size_t target_nbytes = target_nchars * 2;
-               size_t print_nbytes = target_nbytes;
-               wchar_t target_copy[target_nchars];
-               wchar_t *print_name = target_copy;
+       struct reparse_data rpdata;
+       DWORD rpbuflen;
+       int ret;
+       enum rp_status rp_status;
 
-               if (status == RP_FIXED_FULLPATH) {
+       rpbuflen = *rpbuflen_p;
+       ret = parse_reparse_data(rpbuf, rpbuflen, &rpdata);
+       if (ret)
+               return -ret;
+
+       rp_status = win32_capture_maybe_rpfix_target(rpdata.substitute_name,
+                                                    &rpdata.substitute_name_nbytes,
+                                                    capture_root_ino,
+                                                    capture_root_dev,
+                                                    le32_to_cpu(*(u32*)rpbuf));
+       if (rp_status & RP_FIXED) {
+               wimlib_assert(rpdata.substitute_name_nbytes % 2 == 0);
+               utf16lechar substitute_name_copy[rpdata.substitute_name_nbytes / 2];
+               wmemcpy(substitute_name_copy, rpdata.substitute_name,
+                       rpdata.substitute_name_nbytes / 2);
+               rpdata.substitute_name = substitute_name_copy;
+               rpdata.print_name = substitute_name_copy;
+               rpdata.print_name_nbytes = rpdata.substitute_name_nbytes;
+               if (rp_status == RP_FIXED_FULLPATH) {
                        /* "full path", meaning \??\ prefixed.  We should not
                         * include this prefix in the print name, as it is
                         * apparently meant for the filesystem driver only. */
-                       print_nbytes -= 8;
-                       print_name += 4;
+                       rpdata.print_name += 4;
+                       rpdata.print_name_nbytes -= 8;
                }
-               wmemcpy(target_copy, target, target_nchars);
-               p_put = rpbuf + 8;
-               p_put = put_u16(p_put, 0); /* Substitute name offset */
-               p_put = put_u16(p_put, target_nbytes); /* Substitute name length */
-               p_put = put_u16(p_put, target_nbytes + 2); /* Print name offset */
-               p_put = put_u16(p_put, print_nbytes); /* Print name length */
-               if (rptag == WIM_IO_REPARSE_TAG_SYMLINK)
-                       p_put = put_u32(p_put, 1);
-               p_put = put_bytes(p_put, target_nbytes, target_copy);
-               p_put = put_u16(p_put, 0);
-               p_put = put_bytes(p_put, print_nbytes, print_name);
-               p_put = put_u16(p_put, 0);
-
-               /* Wrote the end of the reparse data.  Recalculate the length,
-                * set the length field correctly, and return it. */
-               rpbuflen = p_put - rpbuf;
-               put_u16(rpbuf + 4, rpbuflen - 8);
-               *rpbuflen_p = rpbuflen;
-       }
-       return status;
+               ret = make_reparse_buffer(&rpdata, rpbuf);
+               if (ret == 0)
+                       ret = rp_status;
+               else
+                       ret = -ret;
+       } else {
+               ret = rp_status;
+       }
+       return ret;
 }
 
+/*
+ * Loads the reparse point data from a reparse point into memory, optionally
+ * fixing the targets of absolute symbolic links and junction points to be
+ * relative to the root of capture.
+ *
+ * @hFile:  Open handle to the reparse point.
+ * @path:   Path to the reparse point.  Used for error messages only.
+ * @params: Additional parameters, including whether to do reparse point fixups
+ *          or not.
+ * @rpbuf:  Buffer of length at least REPARSE_POINT_MAX_SIZE bytes into which
+ *          the reparse point buffer will be loaded.
+ * @rpbuflen_ret:  On success, the length of the reparse point buffer in bytes
+ *                 is written to this location.
+ *
+ * Returns:
+ *     On success, returns an `enum rp_status' value that indicates if and/or
+ *     how the reparse point fixup was done.
+ *
+ *     On failure, returns a negative value that is a negated WIMLIB_ERR_*
+ *     code.
+ */
 static int
 win32_get_reparse_data(HANDLE hFile, const wchar_t *path,
                       struct add_image_params *params,
-                      void *reparse_data, size_t *reparse_data_len_ret)
+                      u8 *rpbuf, u16 *rpbuflen_ret)
 {
        DWORD bytesReturned;
        u32 reparse_tag;
-       enum rp_status status;
+       int ret;
+       u16 rpbuflen;
 
        DEBUG("Loading reparse data from \"%ls\"", path);
        if (!DeviceIoControl(hFile, FSCTL_GET_REPARSE_POINT,
                             NULL, /* "Not used with this operation; set to NULL" */
                             0, /* "Not used with this operation; set to 0" */
-                            reparse_data, /* "A pointer to a buffer that
+                            rpbuf, /* "A pointer to a buffer that
                                                   receives the reparse point data */
                             REPARSE_POINT_MAX_SIZE, /* "The size of the output
                                                        buffer, in bytes */
@@ -820,26 +786,27 @@ win32_get_reparse_data(HANDLE hFile, const wchar_t *path,
                win32_error(err);
                return -WIMLIB_ERR_READ;
        }
-       if (bytesReturned < 8) {
+       if (bytesReturned < 8 || bytesReturned > REPARSE_POINT_MAX_SIZE) {
                ERROR("Reparse data on \"%ls\" is invalid", path);
-               return -WIMLIB_ERR_READ;
+               return -WIMLIB_ERR_INVALID_REPARSE_DATA;
        }
 
-       reparse_tag = le32_to_cpu(*(u32*)reparse_data);
+       rpbuflen = bytesReturned;
+       reparse_tag = le32_to_cpu(*(u32*)rpbuf);
        if (params->add_image_flags & WIMLIB_ADD_IMAGE_FLAG_RPFIX &&
            (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK ||
             reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT))
        {
                /* Try doing reparse point fixup */
-               status = win32_try_capture_rpfix(reparse_data,
-                                                &bytesReturned,
-                                                params->capture_root_ino,
-                                                params->capture_root_dev);
+               ret = win32_capture_try_rpfix(rpbuf,
+                                             &rpbuflen,
+                                             params->capture_root_ino,
+                                             params->capture_root_dev);
        } else {
-               status = RP_NOT_FIXED;
+               ret = RP_NOT_FIXED;
        }
-       *reparse_data_len_ret = bytesReturned;
-       return status;
+       *rpbuflen_ret = rpbuflen;
+       return ret;
 }
 
 static DWORD WINAPI
@@ -1140,8 +1107,8 @@ win32_build_dentry_tree_recursive(struct wim_dentry **root_ret,
        DWORD err;
        u64 file_size;
        int ret;
-       void *reparse_data;
-       size_t reparse_data_len;
+       u8 *rpbuf;
+       u16 rpbuflen;
        u16 not_rpfixed;
 
        if (exclude_path(path, path_num_chars, params->config, true)) {
@@ -1192,9 +1159,9 @@ win32_build_dentry_tree_recursive(struct wim_dentry **root_ret,
        }
 
        if (file_info.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) {
-               reparse_data = alloca(REPARSE_POINT_MAX_SIZE);
+               rpbuf = alloca(REPARSE_POINT_MAX_SIZE);
                ret = win32_get_reparse_data(hFile, path, params,
-                                            reparse_data, &reparse_data_len);
+                                            rpbuf, &rpbuflen);
                if (ret < 0) {
                        /* WIMLIB_ERR_* (inverted) */
                        ret = -ret;
@@ -1273,9 +1240,8 @@ win32_build_dentry_tree_recursive(struct wim_dentry **root_ret,
                /* Reparse point: set the reparse data (which we read already)
                 * */
                inode->i_not_rpfixed = not_rpfixed;
-               inode->i_reparse_tag = le32_to_cpu(*(u32*)reparse_data);
-               ret = inode_set_unnamed_stream(inode, reparse_data + 8,
-                                              reparse_data_len - 8,
+               inode->i_reparse_tag = le32_to_cpu(*(u32*)rpbuf);
+               ret = inode_set_unnamed_stream(inode, rpbuf + 8, rpbuflen - 8,
                                               params->lookup_table);
        } else if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) {
                /* Directory (not a reparse point) --- recurse to children */
@@ -1374,39 +1340,122 @@ win32_build_dentry_tree(struct wim_dentry **root_ret,
        return ret;
 }
 
+static int
+win32_extract_try_rpfix(u8 *rpbuf,
+                       const wchar_t *extract_root_realpath,
+                       unsigned extract_root_realpath_nchars)
+{
+       struct reparse_data rpdata;
+       wchar_t *target;
+       size_t target_nchars;
+       size_t stripped_nchars;
+       wchar_t *stripped_target;
+       wchar_t stripped_target_nchars;
+       int ret;
+
+       utf16lechar *new_target;
+       utf16lechar *new_print_name;
+       size_t new_target_nchars;
+       size_t new_print_name_nchars;
+       utf16lechar *p;
+
+       ret = parse_reparse_data(rpbuf, 8 + le16_to_cpu(*(u16*)(rpbuf + 4)),
+                                &rpdata);
+       if (ret)
+               return ret;
+
+       if (extract_root_realpath[0] == L'\0' ||
+           extract_root_realpath[1] != L':' ||
+           extract_root_realpath[2] != L'\\')
+       {
+               ERROR("Can't understand full path format \"%ls\".  "
+                     "Try turning reparse point fixups off...",
+                     extract_root_realpath);
+               return WIMLIB_ERR_REPARSE_POINT_FIXUP_FAILED;
+       }
+
+       ret = parse_substitute_name(rpdata.substitute_name,
+                                   rpdata.substitute_name_nbytes,
+                                   rpdata.rptag);
+       if (ret < 0)
+               return 0;
+       stripped_nchars = ret;
+       target = rpdata.substitute_name;
+       target_nchars = rpdata.substitute_name_nbytes / sizeof(utf16lechar);
+       stripped_target = target + 6;
+       stripped_target_nchars = target_nchars - stripped_nchars;
+
+       new_target = alloca((6 + extract_root_realpath_nchars +
+                            stripped_target_nchars) * sizeof(utf16lechar));
+
+       p = new_target;
+       if (stripped_nchars == 6) {
+               /* Include \??\ prefix if it was present before */
+               wmemcpy(p, L"\\??\\", 4);
+               p += 4;
+       }
+
+       /* Print name excludes the \??\ if present. */
+       new_print_name = p;
+       if (target_nchars - stripped_target_nchars != 0) {
+               /* Get drive letter from real path to extract root, if a drive
+                * letter was present before. */
+               *p++ = extract_root_realpath[0];
+               *p++ = extract_root_realpath[1];
+       }
+       /* Copy the rest of the extract root */
+       wmemcpy(p, extract_root_realpath + 2, extract_root_realpath_nchars - 2);
+       p += extract_root_realpath_nchars - 2;
+
+       /* Append the stripped target */
+       wmemcpy(p, stripped_target, stripped_target_nchars);
+       p += stripped_target_nchars;
+       new_target_nchars = p - new_target;
+       new_print_name_nchars = p - new_print_name;
+
+       if (new_target_nchars * sizeof(utf16lechar) >= REPARSE_POINT_MAX_SIZE ||
+           new_print_name_nchars * sizeof(utf16lechar) >= REPARSE_POINT_MAX_SIZE)
+       {
+               ERROR("Path names too long to do reparse point fixup!");
+               return WIMLIB_ERR_REPARSE_POINT_FIXUP_FAILED;
+       }
+       rpdata.substitute_name = new_target;
+       rpdata.substitute_name_nbytes = new_target_nchars * sizeof(utf16lechar);
+       rpdata.print_name = new_print_name;
+       rpdata.print_name_nbytes = new_print_name_nchars * sizeof(utf16lechar);
+       return make_reparse_buffer(&rpdata, rpbuf);
+}
+
 /* Wrapper around the FSCTL_SET_REPARSE_POINT ioctl to set the reparse data on
  * an extracted reparse point. */
 static int
 win32_set_reparse_data(HANDLE h,
-                      u32 reparse_tag,
+                      const struct wim_inode *inode,
                       const struct wim_lookup_table_entry *lte,
-                      const wchar_t *path)
+                      const wchar_t *path,
+                      const struct apply_args *args)
 {
        int ret;
-       u8 *buf;
-       size_t len;
+       u8 rpbuf[REPARSE_POINT_MAX_SIZE];
+       DWORD bytesReturned;
 
-       if (!lte) {
-               WARNING("\"%ls\" is marked as a reparse point but had no reparse data",
-                       path);
-               return 0;
-       }
-       len = wim_resource_size(lte);
-       if (len > 16 * 1024 - 8) {
-               WARNING("\"%ls\": reparse data too long!", path);
-               return 0;
-       }
+       DEBUG("Setting reparse data on \"%ls\"", path);
 
-       /* The WIM stream omits the ReparseTag and ReparseDataLength fields, so
-        * leave 8 bytes of space for them at the beginning of the buffer, then
-        * set them manually. */
-       buf = alloca(len + 8);
-       ret = read_full_resource_into_buf(lte, buf + 8, false);
+       ret = wim_inode_get_reparse_data(inode, rpbuf);
        if (ret)
                return ret;
-       *(u32*)(buf + 0) = cpu_to_le32(reparse_tag);
-       *(u16*)(buf + 4) = cpu_to_le16(len);
-       *(u16*)(buf + 6) = 0;
+
+       if (args->extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX &&
+           (inode->i_reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK ||
+            inode->i_reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT) &&
+           !inode->i_not_rpfixed)
+       {
+               ret = win32_extract_try_rpfix(rpbuf,
+                                             args->target_realpath,
+                                             args->target_realpath_len);
+               if (ret)
+                       return WIMLIB_ERR_REPARSE_POINT_FIXUP_FAILED;
+       }
 
        /* Set the reparse data on the open file using the
         * FSCTL_SET_REPARSE_POINT ioctl.
@@ -1429,8 +1478,8 @@ win32_set_reparse_data(HANDLE h,
         *
         *  "Not used with this operation; set to NULL."
         */
-       DWORD bytesReturned;
-       if (!DeviceIoControl(h, FSCTL_SET_REPARSE_POINT, buf, len + 8,
+       if (!DeviceIoControl(h, FSCTL_SET_REPARSE_POINT, rpbuf,
+                            8 + le16_to_cpu(*(u16*)(rpbuf + 4)),
                             NULL, 0,
                             &bytesReturned /* lpBytesReturned */,
                             NULL /* lpOverlapped */))
@@ -1440,8 +1489,8 @@ win32_set_reparse_data(HANDLE h,
                win32_error(err);
                if (err == ERROR_ACCESS_DENIED || err == ERROR_PRIVILEGE_NOT_HELD)
                        return WIMLIB_ERR_INSUFFICIENT_PRIVILEGES_TO_EXTRACT;
-               else if (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK ||
-                        reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT)
+               else if (inode->i_reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK ||
+                        inode->i_reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT)
                        return WIMLIB_ERR_LINK;
                else
                        return WIMLIB_ERR_WRITE;
@@ -2028,11 +2077,9 @@ win32_finish_extract_stream(HANDLE h, const struct wim_inode *inode,
                 * entirely on such volumes.) */
                if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
                        if (args->vol_flags & FILE_SUPPORTS_REPARSE_POINTS) {
-                               DEBUG("Setting reparse data on \"%ls\"",
-                                     stream_path);
-                               ret = win32_set_reparse_data(h,
-                                                            inode->i_reparse_tag,
-                                                            lte, stream_path);
+                               ret = win32_set_reparse_data(h, inode,
+                                                            lte, stream_path,
+                                                            args);
                                if (ret)
                                        return ret;
                        } else {
index f189183..72eca4b 100644 (file)
--- a/src/xml.c
+++ b/src/xml.c
@@ -1486,7 +1486,6 @@ wimlib_image_name_in_use(const WIMStruct *w, const tchar *name)
 WIMLIBAPI int
 wimlib_extract_xml_data(WIMStruct *w, FILE *fp)
 {
-       size_t bytes_written;
        size_t size;
        void *buf;
        int ret;