]> wimlib.net Git - wimlib/blobdiff - src/symlink.c
Win32: Fix, comment capture reparse-point fixups
[wimlib] / src / symlink.c
index 366e54ed36af853517d0d43b5eac0e53619c82c3..f005cd715f025ef2730472001669f1e68007afab 100644 (file)
 
 #include <sys/stat.h>
 
+#ifdef HAVE_ALLOCA_H
+#  include <alloca.h>
+#endif
+
 /*
  * Find the symlink target of a symbolic link or junction point in the WIM.
  *
@@ -77,7 +81,8 @@ get_symlink_name(const void *resource, size_t resource_len, char *buf,
                header_size = 12;
                p += 4;
        }
-       if (header_size + substitute_name_offset + substitute_name_len > resource_len)
+       if (header_size +
+           substitute_name_offset + substitute_name_len > resource_len)
                return -EIO;
 
        ret = utf16le_to_tstr((const utf16lechar*)(p + substitute_name_offset),
@@ -86,11 +91,6 @@ get_symlink_name(const void *resource, size_t resource_len, char *buf,
        if (ret)
                return -errno;
 
-       if (link_target_len + 1 > buf_len) {
-               ret = -ENAMETOOLONG;
-               goto out;
-       }
-
        DEBUG("Interpeting substitute name \"%s\" (ReparseTag=0x%x)",
              link_target, reparse_tag);
        translate_slashes = true;
@@ -118,7 +118,7 @@ get_symlink_name(const void *resource, size_t resource_len, char *buf,
                   link_target_len >= 3 &&
                   translated_target[0] != '\0' &&
                   translated_target[1] == ':' &&
-                  translated_target[2] == '/')
+                  translated_target[2] == '\\')
        {
                /* "Absolute" symlink, with drive letter */
                translated_target += 2;
@@ -126,14 +126,14 @@ get_symlink_name(const void *resource, size_t resource_len, char *buf,
        } else if (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK &&
                   link_target_len >= 1)
        {
-               if (translated_target[0] == '/')
+               if (translated_target[0] == '\\')
                        /* "Absolute" symlink, without drive letter */
                        ;
                else
                        /* "Relative" symlink, without drive letter */
                        ;
        } else {
-               ERROR("Invalid reparse point: \"%s\"", translated_target);
+               ERROR("Invalid reparse point substitute name: \"%s\"", translated_target);
                ret = -EIO;
                goto out;
        }
@@ -142,20 +142,37 @@ get_symlink_name(const void *resource, size_t resource_len, char *buf,
                for (size_t i = 0; i < link_target_len; i++)
                        if (translated_target[i] == '\\')
                                translated_target[i] = '/';
-       memcpy(buf, translated_target, link_target_len + 1);
-       ret = link_target_len;
+
+       if (link_target_len > buf_len) {
+               link_target_len = buf_len;
+               ret = -ENAMETOOLONG;
+       } else {
+               ret = link_target_len;
+       }
+       memcpy(buf, translated_target, link_target_len);
 out:
        FREE(link_target);
        return ret;
 }
 
+#define SYMBOLIC_LINK_RELATIVE 0x00000001
+
+/* Given a UNIX symlink target, prepare the corresponding symbolic link reparse
+ * data buffer. */
 static int
-make_symlink_reparse_data_buf(const char *symlink_target,
-                             size_t *len_ret, void **buf_ret)
+make_symlink_reparse_data_buf(const char *symlink_target, void *rpdata,
+                             size_t *rplen_ret)
 {
+       int ret;
        utf16lechar *name_utf16le;
        size_t name_utf16le_nbytes;
-       int ret;
+       size_t substitute_name_nbytes;
+       size_t print_name_nbytes;
+       static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0";
+       static const char abs_print_name_prefix[4] = "C\0:\0";
+       u32 flags;
+       size_t rplen;
+       void *p;
 
        ret = tstr_to_utf16le(symlink_target, strlen(symlink_target),
                              &name_utf16le, &name_utf16le_nbytes);
@@ -166,25 +183,101 @@ make_symlink_reparse_data_buf(const char *symlink_target,
                if (name_utf16le[i] == cpu_to_le16('/'))
                        name_utf16le[i] = cpu_to_le16('\\');
 
-       size_t len = 12 + (name_utf16le_nbytes + 2) * 2;
-       void *buf = MALLOC(len);
-       if (buf) {
-               void *p = buf;
-               p = put_u16(p, 0); /* Substitute name offset */
-               p = put_u16(p, name_utf16le_nbytes); /* Substitute name length */
-               p = put_u16(p, name_utf16le_nbytes + 2); /* Print name offset */
-               p = put_u16(p, name_utf16le_nbytes); /* Print name length */
-               p = put_u32(p, 1); /* flags: 0 if relative link, otherwise 1 */
-               p = put_bytes(p, name_utf16le_nbytes, name_utf16le);
-               p = put_u16(p, 0);
-               p = put_bytes(p, name_utf16le_nbytes, name_utf16le);
-               p = put_u16(p, 0);
-               *len_ret = len;
-               *buf_ret = buf;
-               ret = 0;
-       } else {
-               ret = WIMLIB_ERR_NOMEM;
+       /* Compatability notes:
+        *
+        * On UNIX, an absolute symbolic link begins with '/'; everything else
+        * is a relative symbolic link.  (Quite simple compared to the various
+        * ways to provide Windows paths.)
+        *
+        * To change a UNIX relative symbolic link to Windows format, we only
+        * need to translate it to UTF-16LE and replace backslashes with forward
+        * slashes.  We do not make any attempt to handle filename character
+        * problems, such as a link target that itself contains backslashes on
+        * UNIX.  Then, for these relative links, we set the reparse header
+        * @flags field to SYMBOLIC_LINK_RELATIVE.
+        *
+        * For UNIX absolute symbolic links, we must set the @flags field to 0.
+        * Then, there are multiple options as to actually represent the
+        * absolute link targets:
+        *
+        * (1) An absolute path beginning with one backslash character. similar
+        * to UNIX-style, just with a different path separator.  Print name same
+        * as substitute name.
+        *
+        * (2) Absolute path beginning with drive letter followed by a
+        * backslash.  Print name same as substitute name.
+        *
+        * (3) Absolute path beginning with drive letter followed by a
+        * backslash; substitute name prefixed with \??\, otherwise same as
+        * print name.
+        *
+        * We choose option (3) here, and we just assume C: for the drive
+        * letter.  The reasoning for this is:
+        *
+        * (1) Microsoft imagex.exe has a bug where it does not attempt to do
+        * reparse point fixups for these links, even though they are valid
+        * absolute links.  (Note: in this case prefixing the substitute name
+        * with \??\ does not work; it just makes the data unable to be restored
+        * at all.)
+        * (2) Microsoft imagex.exe will fail when doing reparse point fixups
+        * for these.  It apparently contains a bug that causes it to create an
+        * invalid reparse point, which then cannot be restored.
+        * (3) This is the only option I tested for which reparse point fixups
+        * worked properly in Microsoft imagex.exe.
+        *
+        * So option (3) it is.
+        */
+
+       substitute_name_nbytes = name_utf16le_nbytes;
+       print_name_nbytes = name_utf16le_nbytes;
+       if (symlink_target[0] == '/') {
+               substitute_name_nbytes += sizeof(abs_subst_name_prefix);
+               print_name_nbytes += sizeof(abs_print_name_prefix);
+       }
+
+       rplen = 12 + substitute_name_nbytes + print_name_nbytes +
+                       2 * sizeof(utf16lechar);
+
+       if (rplen > REPARSE_POINT_MAX_SIZE) {
+               ERROR("Symlink \"%s\" is too long!", symlink_target);
+               return WIMLIB_ERR_LINK;
        }
+
+       p = rpdata;
+
+       /* Substitute name offset */
+       p = put_u16(p, 0);
+
+       /* Substitute name length */
+       p = put_u16(p, substitute_name_nbytes);
+
+       /* Print name offset */
+       p = put_u16(p, substitute_name_nbytes + sizeof(utf16lechar));
+
+       /* Print name length */
+       p = put_u16(p, print_name_nbytes);
+
+       /* Flags */
+       flags = 0;
+       if (symlink_target[0] != '/')
+               flags |= SYMBOLIC_LINK_RELATIVE;
+       p = put_u32(p, flags);
+
+       /* Substitute name */
+       if (symlink_target[0] == '/')
+               p = put_bytes(p, sizeof(abs_subst_name_prefix), abs_subst_name_prefix);
+       p = put_bytes(p, name_utf16le_nbytes, name_utf16le);
+       p = put_u16(p, 0);
+
+       /* Print name */
+       if (symlink_target[0] == '/')
+               p = put_bytes(p, sizeof(abs_print_name_prefix), abs_print_name_prefix);
+       p = put_bytes(p, name_utf16le_nbytes, name_utf16le);
+       p = put_u16(p, 0);
+
+       *rplen_ret = rplen;
+       ret = 0;
+out_free_name_utf16le:
        FREE(name_utf16le);
        return ret;
 }
@@ -201,6 +294,7 @@ inode_readlink(const struct wim_inode *inode, char *buf, size_t buf_len,
 {
        const struct wim_lookup_table_entry *lte;
        int ret;
+       u8 *res_buf;
 
        wimlib_assert(inode_is_symlink(inode));
 
@@ -211,12 +305,12 @@ inode_readlink(const struct wim_inode *inode, char *buf, size_t buf_len,
        if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE)
                return -EIO;
 
-       u8 res_buf[wim_resource_size(lte)];
+       res_buf = alloca(wim_resource_size(lte));
        ret = read_full_resource_into_buf(lte, res_buf, threadsafe);
        if (ret)
                return -EIO;
-       return get_symlink_name(res_buf, wim_resource_size(lte), buf,
-                               buf_len, inode->i_reparse_tag);
+       return get_symlink_name(res_buf, wim_resource_size(lte),
+                               buf, buf_len, inode->i_reparse_tag);
 }
 
 /*
@@ -238,55 +332,28 @@ inode_set_symlink(struct wim_inode *inode,
 
 {
        int ret;
-       size_t symlink_buf_len;
-       struct wim_lookup_table_entry *lte = NULL, *existing_lte;
-       u8 symlink_buf_hash[SHA1_HASH_SIZE];
-       void *symlink_buf;
 
-       ret = make_symlink_reparse_data_buf(target, &symlink_buf_len,
-                                           &symlink_buf);
-       if (ret)
-               return ret;
+       /* Buffer for reparse point data */
+       u8 rpdata[REPARSE_POINT_MAX_SIZE];
 
-       DEBUG("Made symlink reparse data buf (len = %zu, name len = %zu)",
-                       symlink_buf_len, symlink_buf_len);
+       /* Actual length of the reparse point data (to be calculated by
+        * make_symlink_reparse_data_buf()) */
+       size_t rplen;
 
-       sha1_buffer(symlink_buf, symlink_buf_len, symlink_buf_hash);
-
-       existing_lte = __lookup_resource(lookup_table, symlink_buf_hash);
-
-       if (existing_lte) {
-               lte = existing_lte;
-               FREE(symlink_buf);
-               symlink_buf = NULL;
-       } else {
-               DEBUG("Creating new lookup table entry for symlink buf");
-               lte = new_lookup_table_entry();
-               if (!lte) {
-                       ret = WIMLIB_ERR_NOMEM;
-                       goto out_free_symlink_buf;
-               }
-               lte->resource_location            = RESOURCE_IN_ATTACHED_BUFFER;
-               lte->attached_buffer              = symlink_buf;
-               lte->resource_entry.original_size = symlink_buf_len;
-               copy_hash(lte->hash, symlink_buf_hash);
-       }
+       DEBUG("Creating reparse point data buffer "
+             "for UNIX symlink target \"%s\"", target);
 
-       inode->i_lte = lte;
-       inode->i_resolved = 1;
+       ret = make_symlink_reparse_data_buf(target, rpdata, &rplen);
+       if (ret)
+               return ret;
 
-       DEBUG("Loaded symlink buf");
+       ret = inode_set_unnamed_stream(inode, rpdata, rplen, lookup_table);
+       if (ret)
+               return ret;
 
-       if (existing_lte)
-               lte->refcnt++;
-       else
-               lookup_table_insert(lookup_table, lte);
        if (lte_ret)
-               *lte_ret = lte;
+               *lte_ret = inode->i_lte;
        return 0;
-out_free_symlink_buf:
-       FREE(symlink_buf);
-       return ret;
 }
 
 static int
@@ -310,24 +377,21 @@ unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret)
 #ifdef __WIN32__
 #  include "win32.h"
 #  define RP_PATH_SEPARATOR L'\\'
+#  define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/')
 #  define os_get_ino_and_dev win32_get_file_and_vol_ids
 #else
 #  define RP_PATH_SEPARATOR '/'
+#  define is_rp_path_separator(c) ((c) == '/')
 #  define os_get_ino_and_dev unix_get_ino_and_dev
 #endif
 
-/* Fix up reparse points--- mostly shared between UNIX and Windows */
+/* Fix up absolute symbolic link targets--- mostly shared between UNIX and
+ * Windows */
 tchar *
 fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev)
 {
        tchar *p = dest;
 
-#ifdef __WIN32__
-       /* Skip over drive letter */
-       if (*p != RP_PATH_SEPARATOR)
-               p += 2;
-#endif
-
        DEBUG("Fixing symlink or junction \"%"TS"\"", dest);
        for (;;) {
                tchar save;
@@ -335,7 +399,7 @@ fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev)
                u64 ino;
                u64 dev;
 
-               while (*p == RP_PATH_SEPARATOR)
+               while (is_rp_path_separator(*p))
                        p++;
 
                save = *p;
@@ -343,21 +407,17 @@ fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev)
                ret = os_get_ino_and_dev(dest, &ino, &dev);
                *p = save;
 
+               if (ret) /* stat() failed before we got to the capture root---
+                           assume the link points outside it. */
+                       return NULL;
+
                if (ino == capture_root_ino && dev == capture_root_dev) {
                        /* Link points inside capture root.  Return abbreviated
                         * path. */
                        if (*p == T('\0'))
                                *(p - 1) = RP_PATH_SEPARATOR;
-                       while (p - 1 >= dest && *(p - 1) == RP_PATH_SEPARATOR)
+                       while (p - 1 >= dest && is_rp_path_separator(*(p - 1)))
                                p--;
-               #ifdef __WIN32__
-                       /* Add back drive letter */
-                       if (*dest != RP_PATH_SEPARATOR) {
-                               *--p = *(dest + 1);
-                               *--p = *dest;
-                       }
-               #endif
-                       wimlib_assert(p >= dest);
                        return p;
                }
 
@@ -368,7 +428,6 @@ fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev)
 
                do {
                        p++;
-               } while (*p != RP_PATH_SEPARATOR && *p != T('\0'));
+               } while (!is_rp_path_separator(*p) && *p != T('\0'));
        }
 }
-