X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fsymlink.c;h=f005cd715f025ef2730472001669f1e68007afab;hp=3e89a88eead6cfed31980c86f9f4fd51a2e5ca4e;hb=ca8727cf22a445adc20eb1f8b7a12b8eededc32d;hpb=a2c56fd332b6c37026b24c8a216e0ff0bf140d80 diff --git a/src/symlink.c b/src/symlink.c index 3e89a88e..f005cd71 100644 --- a/src/symlink.c +++ b/src/symlink.c @@ -5,7 +5,7 @@ */ /* - * Copyright (C) 2012 Eric Biggers + * Copyright (C) 2012, 2013 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -24,35 +24,46 @@ */ #include "dentry.h" -#include "io.h" +#include "buffer_io.h" #include "lookup_table.h" #include "sha1.h" #include +/* UNIX version of getting and setting the data in reparse points */ +#if !defined(__WIN32__) + +#include + +#ifdef HAVE_ALLOCA_H +# include +#endif + /* * Find the symlink target of a symbolic link or junction point in the WIM. * - * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx - * Except the first 8 bytes aren't included in the resource (presumably because - * we already know the reparse tag from the dentry, and we already know the - * reparse tag len from the lookup table entry resource length). + * See http://msdn.microsoft.com/en-us/library/cc232006(v=prot.10).aspx for a + * description of the format of the so-called "reparse point data buffers". + * + * But, in the WIM format, the first 8 bytes of the reparse point data buffer + * are omitted, presumably because we already know the reparse tag from the + * dentry, and we already know the reparse tag length from the lookup table + * entry resource length. */ -static ssize_t get_symlink_name(const u8 *resource, size_t resource_len, - char *buf, size_t buf_len, - u32 reparse_tag) +static ssize_t +get_symlink_name(const void *resource, size_t resource_len, char *buf, + size_t buf_len, u32 reparse_tag) { - const u8 *p = resource; + const void *p = resource; u16 substitute_name_offset; u16 substitute_name_len; u16 print_name_offset; u16 print_name_len; char *link_target; + char *translated_target; size_t link_target_len; ssize_t ret; unsigned header_size; - char *translated_target; - bool is_absolute; - u32 flags; + bool translate_slashes; if (resource_len < 12) return -EIO; @@ -60,113 +71,230 @@ static ssize_t get_symlink_name(const u8 *resource, size_t resource_len, p = get_u16(p, &substitute_name_len); p = get_u16(p, &print_name_offset); p = get_u16(p, &print_name_len); - get_u32(p, &flags); wimlib_assert(reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK || reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT); - /* I think that some junction points incorrectly get marked as symbolic - * links. So, parse the link buffer as a symlink if the flags seem - * plausible. */ - if (flags <= 1) - reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK; - - if (reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT) { + if (reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT) header_size = 8; - } else { - is_absolute = (flags & 1) ? false : true; + else { header_size = 12; p += 4; } - if (header_size + substitute_name_offset + substitute_name_len > resource_len) + if (header_size + + substitute_name_offset + substitute_name_len > resource_len) return -EIO; - link_target = utf16_to_utf8((const char *)p + substitute_name_offset, - substitute_name_len, - &link_target_len); - if (!link_target) - return -EIO; + ret = utf16le_to_tstr((const utf16lechar*)(p + substitute_name_offset), + substitute_name_len, + &link_target, &link_target_len); + if (ret) + return -errno; - if (link_target_len + 1 > buf_len) { - ret = -ENAMETOOLONG; + DEBUG("Interpeting substitute name \"%s\" (ReparseTag=0x%x)", + link_target, reparse_tag); + translate_slashes = true; + translated_target = link_target; + if (link_target_len >= 7 && + translated_target[0] == '\\' && + translated_target[1] == '?' && + translated_target[2] == '?' && + translated_target[3] == '\\' && + translated_target[4] != '\0' && + translated_target[5] == ':' && + translated_target[6] == '\\') + { + /* "Full" symlink or junction (\??\x:\ prefixed path) */ + translated_target += 6; + link_target_len -= 6; + } else if (reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT && + link_target_len >= 12 && + memcmp(translated_target, "\\\\?\\Volume{", 11) == 0 && + translated_target[link_target_len - 1] == '\\') + { + /* Volume junction. Can't really do anything with it. */ + translate_slashes = false; + } else if (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK && + link_target_len >= 3 && + translated_target[0] != '\0' && + translated_target[1] == ':' && + translated_target[2] == '\\') + { + /* "Absolute" symlink, with drive letter */ + translated_target += 2; + link_target_len -= 2; + } else if (reparse_tag == WIM_IO_REPARSE_TAG_SYMLINK && + link_target_len >= 1) + { + if (translated_target[0] == '\\') + /* "Absolute" symlink, without drive letter */ + ; + else + /* "Relative" symlink, without drive letter */ + ; + } else { + ERROR("Invalid reparse point substitute name: \"%s\"", translated_target); + ret = -EIO; goto out; } - translated_target = link_target; - if (reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT || is_absolute) { - if (link_target_len < 7 - || memcmp(translated_target, "\\??\\", 4) != 0 - || translated_target[4] == '\0' - || translated_target[5] != ':' - || translated_target[6] != '\\') { - ret = -EIO; - goto out; - } - translated_target += 4; - link_target_len -= 4; - /* There's a drive letter, so just leave the backslashes since - * it won't go anyhwere on UNIX anyway... - * - * XXX - * NTFS-3g tries to re-map these links to actually point to - * something, so maybe we could do something like that here - * XXX*/ - } else { + if (translate_slashes) for (size_t i = 0; i < link_target_len; i++) if (translated_target[i] == '\\') translated_target[i] = '/'; - } - memcpy(buf, translated_target, link_target_len + 1); - ret = link_target_len; + if (link_target_len > buf_len) { + link_target_len = buf_len; + ret = -ENAMETOOLONG; + } else { + ret = link_target_len; + } + memcpy(buf, translated_target, link_target_len); out: FREE(link_target); return ret; } -void *make_symlink_reparse_data_buf(const char *symlink_target, size_t *len_ret) +#define SYMBOLIC_LINK_RELATIVE 0x00000001 + +/* Given a UNIX symlink target, prepare the corresponding symbolic link reparse + * data buffer. */ +static int +make_symlink_reparse_data_buf(const char *symlink_target, void *rpdata, + size_t *rplen_ret) { - size_t utf8_len = strlen(symlink_target); - size_t utf16_len; - char *name_utf16 = utf8_to_utf16(symlink_target, utf8_len, &utf16_len); - if (!name_utf16) - return NULL; - /*DEBUG("utf16_len = %zu", utf16_len);*/ - for (size_t i = 0; i < utf16_len / 2; i++) - if (((u16*)name_utf16)[i] == cpu_to_le16('/')) - ((u16*)name_utf16)[i] = cpu_to_le16('\\'); - size_t len = 12 + utf16_len * 2 + 4; - void *buf = MALLOC(len); - if (!buf) - goto out; - /* XXX Fix absolute paths */ - - u8 *p = buf; - p = put_u16(p, utf16_len + 2); /* Substitute name offset */ - p = put_u16(p, utf16_len); /* Substitute name length */ - p = put_u16(p, 0); /* Print name offset */ - p = put_u16(p, utf16_len); /* Print name length */ - p = put_u32(p, 1); - p = put_bytes(p, utf16_len, (const u8*)name_utf16); + int ret; + utf16lechar *name_utf16le; + size_t name_utf16le_nbytes; + size_t substitute_name_nbytes; + size_t print_name_nbytes; + static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0"; + static const char abs_print_name_prefix[4] = "C\0:\0"; + u32 flags; + size_t rplen; + void *p; + + ret = tstr_to_utf16le(symlink_target, strlen(symlink_target), + &name_utf16le, &name_utf16le_nbytes); + if (ret) + return ret; + + for (size_t i = 0; i < name_utf16le_nbytes / 2; i++) + if (name_utf16le[i] == cpu_to_le16('/')) + name_utf16le[i] = cpu_to_le16('\\'); + + /* Compatability notes: + * + * On UNIX, an absolute symbolic link begins with '/'; everything else + * is a relative symbolic link. (Quite simple compared to the various + * ways to provide Windows paths.) + * + * To change a UNIX relative symbolic link to Windows format, we only + * need to translate it to UTF-16LE and replace backslashes with forward + * slashes. We do not make any attempt to handle filename character + * problems, such as a link target that itself contains backslashes on + * UNIX. Then, for these relative links, we set the reparse header + * @flags field to SYMBOLIC_LINK_RELATIVE. + * + * For UNIX absolute symbolic links, we must set the @flags field to 0. + * Then, there are multiple options as to actually represent the + * absolute link targets: + * + * (1) An absolute path beginning with one backslash character. similar + * to UNIX-style, just with a different path separator. Print name same + * as substitute name. + * + * (2) Absolute path beginning with drive letter followed by a + * backslash. Print name same as substitute name. + * + * (3) Absolute path beginning with drive letter followed by a + * backslash; substitute name prefixed with \??\, otherwise same as + * print name. + * + * We choose option (3) here, and we just assume C: for the drive + * letter. The reasoning for this is: + * + * (1) Microsoft imagex.exe has a bug where it does not attempt to do + * reparse point fixups for these links, even though they are valid + * absolute links. (Note: in this case prefixing the substitute name + * with \??\ does not work; it just makes the data unable to be restored + * at all.) + * (2) Microsoft imagex.exe will fail when doing reparse point fixups + * for these. It apparently contains a bug that causes it to create an + * invalid reparse point, which then cannot be restored. + * (3) This is the only option I tested for which reparse point fixups + * worked properly in Microsoft imagex.exe. + * + * So option (3) it is. + */ + + substitute_name_nbytes = name_utf16le_nbytes; + print_name_nbytes = name_utf16le_nbytes; + if (symlink_target[0] == '/') { + substitute_name_nbytes += sizeof(abs_subst_name_prefix); + print_name_nbytes += sizeof(abs_print_name_prefix); + } + + rplen = 12 + substitute_name_nbytes + print_name_nbytes + + 2 * sizeof(utf16lechar); + + if (rplen > REPARSE_POINT_MAX_SIZE) { + ERROR("Symlink \"%s\" is too long!", symlink_target); + return WIMLIB_ERR_LINK; + } + + p = rpdata; + + /* Substitute name offset */ p = put_u16(p, 0); - p = put_bytes(p, utf16_len, (const u8*)name_utf16); + + /* Substitute name length */ + p = put_u16(p, substitute_name_nbytes); + + /* Print name offset */ + p = put_u16(p, substitute_name_nbytes + sizeof(utf16lechar)); + + /* Print name length */ + p = put_u16(p, print_name_nbytes); + + /* Flags */ + flags = 0; + if (symlink_target[0] != '/') + flags |= SYMBOLIC_LINK_RELATIVE; + p = put_u32(p, flags); + + /* Substitute name */ + if (symlink_target[0] == '/') + p = put_bytes(p, sizeof(abs_subst_name_prefix), abs_subst_name_prefix); + p = put_bytes(p, name_utf16le_nbytes, name_utf16le); p = put_u16(p, 0); - /*DEBUG("utf16_len = %zu, len = %zu", utf16_len, len);*/ - *len_ret = len; -out: - FREE(name_utf16); - return buf; + + /* Print name */ + if (symlink_target[0] == '/') + p = put_bytes(p, sizeof(abs_print_name_prefix), abs_print_name_prefix); + p = put_bytes(p, name_utf16le_nbytes, name_utf16le); + p = put_u16(p, 0); + + *rplen_ret = rplen; + ret = 0; +out_free_name_utf16le: + FREE(name_utf16le); + return ret; } -/* Get the symlink target from a dentry. +/* Get the symlink target from a WIM inode. * - * The dentry may be either "real" symlink or a junction point. + * The inode may be either a "real" symlink (reparse tag + * WIM_IO_REPARSE_TAG_SYMLINK), or it may be a junction point (reparse tag + * WIM_IO_REPARSE_TAG_MOUNT_POINT). */ -ssize_t inode_readlink(const struct inode *inode, char *buf, size_t buf_len, - const WIMStruct *w) +ssize_t +inode_readlink(const struct wim_inode *inode, char *buf, size_t buf_len, + const WIMStruct *w, bool threadsafe) { - const struct lookup_table_entry *lte; + const struct wim_lookup_table_entry *lte; int ret; + u8 *res_buf; wimlib_assert(inode_is_symlink(inode)); @@ -174,15 +302,15 @@ ssize_t inode_readlink(const struct inode *inode, char *buf, size_t buf_len, if (!lte) return -EIO; - if (wim_resource_size(lte) > 10000) + if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE) return -EIO; - u8 res_buf[wim_resource_size(lte)]; - ret = read_full_wim_resource(lte, res_buf); - if (ret != 0) + res_buf = alloca(wim_resource_size(lte)); + ret = read_full_resource_into_buf(lte, res_buf, threadsafe); + if (ret) return -EIO; - return get_symlink_name(res_buf, wim_resource_size(lte), buf, - buf_len, inode->reparse_tag); + return get_symlink_name(res_buf, wim_resource_size(lte), + buf, buf_len, inode->i_reparse_tag); } /* @@ -196,59 +324,110 @@ ssize_t inode_readlink(const struct inode *inode, char *buf, size_t buf_len, * * On failure @dentry and @lookup_table are not modified. */ -int inode_set_symlink(struct inode *inode, const char *target, - struct lookup_table *lookup_table, - struct lookup_table_entry **lte_ret) +int +inode_set_symlink(struct wim_inode *inode, + const char *target, + struct wim_lookup_table *lookup_table, + struct wim_lookup_table_entry **lte_ret) { int ret; - size_t symlink_buf_len; - struct lookup_table_entry *lte = NULL, *existing_lte; - u8 symlink_buf_hash[SHA1_HASH_SIZE]; - void *symlink_buf; - symlink_buf = make_symlink_reparse_data_buf(target, &symlink_buf_len); - if (!symlink_buf) - return WIMLIB_ERR_NOMEM; + /* Buffer for reparse point data */ + u8 rpdata[REPARSE_POINT_MAX_SIZE]; + + /* Actual length of the reparse point data (to be calculated by + * make_symlink_reparse_data_buf()) */ + size_t rplen; + + DEBUG("Creating reparse point data buffer " + "for UNIX symlink target \"%s\"", target); - DEBUG("Made symlink reparse data buf (len = %zu, name len = %zu)", - symlink_buf_len, symlink_buf_len); + ret = make_symlink_reparse_data_buf(target, rpdata, &rplen); + if (ret) + return ret; - sha1_buffer(symlink_buf, symlink_buf_len, symlink_buf_hash); + ret = inode_set_unnamed_stream(inode, rpdata, rplen, lookup_table); + if (ret) + return ret; - existing_lte = __lookup_resource(lookup_table, symlink_buf_hash); + if (lte_ret) + *lte_ret = inode->i_lte; + return 0; +} - if (existing_lte) { - lte = existing_lte; - FREE(symlink_buf); - symlink_buf = NULL; +static int +unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret) +{ + struct stat stbuf; + if (stat(path, &stbuf)) { + WARNING_WITH_ERRNO("Failed to stat \"%s\"", path); + /* Treat as a link pointing outside the capture root (it + * most likely is). */ + return WIMLIB_ERR_STAT; } else { - DEBUG("Creating new lookup table entry for symlink buf"); - lte = new_lookup_table_entry(); - if (!lte) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_symlink_buf; - } - lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; - lte->attached_buffer = symlink_buf; - lte->resource_entry.original_size = symlink_buf_len; - lte->resource_entry.size = symlink_buf_len; - copy_hash(lte->hash, symlink_buf_hash); + *ino_ret = stbuf.st_ino; + *dev_ret = stbuf.st_dev; + return 0; } +} - inode->lte = lte; - inode->resolved = true; +#endif /* !defined(__WIN32__) */ - DEBUG("Loaded symlink buf"); +#ifdef __WIN32__ +# include "win32.h" +# define RP_PATH_SEPARATOR L'\\' +# define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/') +# define os_get_ino_and_dev win32_get_file_and_vol_ids +#else +# define RP_PATH_SEPARATOR '/' +# define is_rp_path_separator(c) ((c) == '/') +# define os_get_ino_and_dev unix_get_ino_and_dev +#endif - if (existing_lte) - lte->refcnt++; - else - lookup_table_insert(lookup_table, lte); - if (lte_ret) - *lte_ret = lte; - return 0; -out_free_symlink_buf: - FREE(symlink_buf); - return ret; +/* Fix up absolute symbolic link targets--- mostly shared between UNIX and + * Windows */ +tchar * +fixup_symlink(tchar *dest, u64 capture_root_ino, u64 capture_root_dev) +{ + tchar *p = dest; + + DEBUG("Fixing symlink or junction \"%"TS"\"", dest); + for (;;) { + tchar save; + int ret; + u64 ino; + u64 dev; + + while (is_rp_path_separator(*p)) + p++; + + save = *p; + *p = T('\0'); + ret = os_get_ino_and_dev(dest, &ino, &dev); + *p = save; + + if (ret) /* stat() failed before we got to the capture root--- + assume the link points outside it. */ + return NULL; + + if (ino == capture_root_ino && dev == capture_root_dev) { + /* Link points inside capture root. Return abbreviated + * path. */ + if (*p == T('\0')) + *(p - 1) = RP_PATH_SEPARATOR; + while (p - 1 >= dest && is_rp_path_separator(*(p - 1))) + p--; + return p; + } + + if (*p == T('\0')) { + /* Link points outside capture root. */ + return NULL; + } + + do { + p++; + } while (!is_rp_path_separator(*p) && *p != T('\0')); + } }