X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Freparse.c;h=4df2a4b428831fd02cee77329a5ba198ba0a2514;hp=a245a7e16a455e0575d8caffab6ae002d20f3503;hb=af141a23c4d1540b8a64759bb68c7cd7ff054e72;hpb=1153f04c39e01b96f983132f1367f777f852ac99 diff --git a/src/reparse.c b/src/reparse.c index a245a7e1..4df2a4b4 100644 --- a/src/reparse.c +++ b/src/reparse.c @@ -1,7 +1,5 @@ /* - * reparse.c - * - * Handle reparse data. + * reparse.c - Handle reparse data. */ /* @@ -23,91 +21,25 @@ * along with wimlib; if not, see http://www.gnu.org/licenses/. */ -#include "dentry.h" -#include "buffer_io.h" -#include "lookup_table.h" -#include "sha1.h" -#include +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif -static const utf16lechar volume_junction_prefix[11] = { - cpu_to_le16('\\'), - cpu_to_le16('\\'), - cpu_to_le16('?'), - cpu_to_le16('\\'), - cpu_to_le16('V'), - cpu_to_le16('o'), - cpu_to_le16('l'), - cpu_to_le16('u'), - cpu_to_le16('m'), - cpu_to_le16('e'), - cpu_to_le16('{'), -}; +#include "wimlib/assert.h" +#include "wimlib/compiler.h" +#include "wimlib/endianness.h" +#include "wimlib/encoding.h" +#include "wimlib/error.h" +#include "wimlib/inode.h" +#include "wimlib/lookup_table.h" +#include "wimlib/reparse.h" +#include "wimlib/resource.h" -/* Parse the "substitute name" (link target) from a symbolic link or junction - * reparse point. - * - * Return value is: - * - * Non-negative integer: - * The name is an absolute symbolic link in one of several formats, - * and the return value is the number of UTF-16LE characters that need to - * be advanced to reach a simple "absolute" path starting with a backslash - * (i.e. skip over \??\ and/or drive letter) - * Negative integer: - * SUBST_NAME_IS_VOLUME_JUNCTION: - * The name is a volume junction. - * SUBST_NAME_IS_RELATIVE_LINK: - * The name is a relative symbolic link. - * SUBST_NAME_IS_UNKNOWN: - * The name does not appear to be a valid symbolic link, junction, - * or mount point. - */ -int -parse_substitute_name(const utf16lechar *substitute_name, - u16 substitute_name_nbytes, u32 rptag) -{ - u16 substitute_name_nchars = substitute_name_nbytes / 2; - - if (substitute_name_nchars >= 7 && - substitute_name[0] == cpu_to_le16('\\') && - substitute_name[1] == cpu_to_le16('?') && - substitute_name[2] == cpu_to_le16('?') && - substitute_name[3] == cpu_to_le16('\\') && - substitute_name[4] != cpu_to_le16('\0') && - substitute_name[5] == cpu_to_le16(':') && - substitute_name[6] == cpu_to_le16('\\')) - { - /* "Full" symlink or junction (\??\x:\ prefixed path) */ - return 6; - } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT && - substitute_name_nchars >= 12 && - memcmp(substitute_name, volume_junction_prefix, - sizeof(volume_junction_prefix)) == 0 && - substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\')) - { - /* Volume junction. Can't really do anything with it. */ - return SUBST_NAME_IS_VOLUME_JUNCTION; - } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK && - substitute_name_nchars >= 3 && - substitute_name[0] != cpu_to_le16('\0') && - substitute_name[1] == cpu_to_le16(':') && - substitute_name[2] == cpu_to_le16('\\')) - { - /* "Absolute" symlink, with drive letter */ - return 2; - } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK && - substitute_name_nchars >= 1) - { - if (substitute_name[0] == cpu_to_le16('\\')) - /* "Absolute" symlink, without drive letter */ - return 0; - else - /* "Relative" symlink, without drive letter */ - return SUBST_NAME_IS_RELATIVE_LINK; - } else { - return SUBST_NAME_IS_UNKNOWN; - } -} +#ifdef HAVE_ALLOCA_H +# include +#endif +#include +#include /* * Read the data from a symbolic link, junction, or mount point reparse point @@ -117,37 +49,51 @@ parse_substitute_name(const utf16lechar *substitute_name, * description of the format of the reparse point buffers. */ int -parse_reparse_data(const u8 *rpbuf, u16 rpbuflen, struct reparse_data *rpdata) +parse_reparse_data(const u8 * restrict rpbuf, u16 rpbuflen, + struct reparse_data * restrict rpdata) { - const u8 *p = rpbuf; u16 substitute_name_offset; u16 print_name_offset; + const struct reparse_buffer_disk *rpbuf_disk = + (const struct reparse_buffer_disk*)rpbuf; + const u8 *data; memset(rpdata, 0, sizeof(*rpdata)); if (rpbuflen < 16) goto out_invalid; - p = get_u32(p, &rpdata->rptag); + rpdata->rptag = le32_to_cpu(rpbuf_disk->rptag); wimlib_assert(rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK || rpdata->rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT); - p = get_u16(p, &rpdata->rpdatalen); - p = get_u16(p, &rpdata->rpreserved); - p = get_u16(p, &substitute_name_offset); - p = get_u16(p, &rpdata->substitute_name_nbytes); - p = get_u16(p, &print_name_offset); - p = get_u16(p, &rpdata->print_name_nbytes); + rpdata->rpdatalen = le16_to_cpu(rpbuf_disk->rpdatalen); + rpdata->rpreserved = le16_to_cpu(rpbuf_disk->rpreserved); + substitute_name_offset = le16_to_cpu(rpbuf_disk->symlink.substitute_name_offset); + rpdata->substitute_name_nbytes = le16_to_cpu(rpbuf_disk->symlink.substitute_name_nbytes); + print_name_offset = le16_to_cpu(rpbuf_disk->symlink.print_name_offset); + rpdata->print_name_nbytes = le16_to_cpu(rpbuf_disk->symlink.print_name_nbytes); + + if ((substitute_name_offset & 1) | (print_name_offset & 1) | + (rpdata->substitute_name_nbytes & 1) | (rpdata->print_name_nbytes & 1)) + { + /* Names would be unaligned... */ + goto out_invalid; + } + if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) { if (rpbuflen < 20) goto out_invalid; - p = get_u32(p, &rpdata->rpflags); + rpdata->rpflags = le32_to_cpu(rpbuf_disk->symlink.rpflags); + data = rpbuf_disk->symlink.data; + } else { + data = rpbuf_disk->junction.data; } if ((size_t)substitute_name_offset + rpdata->substitute_name_nbytes + - (p - rpbuf) > rpbuflen) + (data - rpbuf) > rpbuflen) goto out_invalid; if ((size_t)print_name_offset + rpdata->print_name_nbytes + - (p - rpbuf) > rpbuflen) + (data - rpbuf) > rpbuflen) goto out_invalid; - rpdata->substitute_name = (utf16lechar*)&p[substitute_name_offset]; - rpdata->print_name = (utf16lechar*)&p[print_name_offset]; + rpdata->substitute_name = (utf16lechar*)&data[substitute_name_offset]; + rpdata->print_name = (utf16lechar*)&data[print_name_offset]; return 0; out_invalid: ERROR("Invalid reparse data"); @@ -163,34 +109,46 @@ out_invalid: * at least REPARSE_POINT_MAX_SIZE bytes long. */ int -make_reparse_buffer(const struct reparse_data *rpdata, u8 *rpbuf) +make_reparse_buffer(const struct reparse_data * restrict rpdata, + u8 * restrict rpbuf, + u16 * restrict rpbuflen_ret) { - u8 *p = rpbuf; - - p = put_u32(p, rpdata->rptag); - p += 2; /* We set ReparseDataLength later */ - p = put_u16(p, rpdata->rpreserved); - p = put_u16(p, 0); /* substitute name offset */ - p = put_u16(p, rpdata->substitute_name_nbytes); /* substitute name nbytes */ - p = put_u16(p, rpdata->substitute_name_nbytes + 2); /* print name offset */ - p = put_u16(p, rpdata->print_name_nbytes); /* print name nbytes */ - if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) - p = put_u32(p, rpdata->rpflags); + struct reparse_buffer_disk *rpbuf_disk = + (struct reparse_buffer_disk*)rpbuf; + u8 *data; + + rpbuf_disk->rptag = cpu_to_le32(rpdata->rptag); + rpbuf_disk->rpreserved = cpu_to_le16(rpdata->rpreserved); + rpbuf_disk->symlink.substitute_name_offset = cpu_to_le16(0); + rpbuf_disk->symlink.substitute_name_nbytes = cpu_to_le16(rpdata->substitute_name_nbytes); + rpbuf_disk->symlink.print_name_offset = cpu_to_le16(rpdata->substitute_name_nbytes + 2); + rpbuf_disk->symlink.print_name_nbytes = cpu_to_le16(rpdata->print_name_nbytes); + + if (rpdata->rptag == WIM_IO_REPARSE_TAG_SYMLINK) { + rpbuf_disk->symlink.rpflags = cpu_to_le32(rpdata->rpflags); + data = rpbuf_disk->symlink.data; + } else { + data = rpbuf_disk->junction.data; + } + /* We null-terminate the substitute and print names, although this may * not be strictly necessary. Note that the byte counts should not * include the null terminators. */ - if (p + rpdata->substitute_name_nbytes + + if (data + rpdata->substitute_name_nbytes + rpdata->print_name_nbytes + 2 * sizeof(utf16lechar) - rpbuf > REPARSE_POINT_MAX_SIZE) { ERROR("Reparse data is too long!"); return WIMLIB_ERR_INVALID_REPARSE_DATA; } - p = put_bytes(p, rpdata->substitute_name_nbytes, rpdata->substitute_name); - p = put_u16(p, 0); - p = put_bytes(p, rpdata->print_name_nbytes, rpdata->print_name); - p = put_u16(p, 0); - put_u16(rpbuf + 4, p - rpbuf - 8); /* Set ReparseDataLength */ + data = mempcpy(data, rpdata->substitute_name, rpdata->substitute_name_nbytes); + *(utf16lechar*)data = cpu_to_le16(0); + data += 2; + data = mempcpy(data, rpdata->print_name, rpdata->print_name_nbytes); + *(utf16lechar*)data = cpu_to_le16(0); + data += 2; + rpbuf_disk->rpdatalen = cpu_to_le16(data - rpbuf - 8); + *rpbuflen_ret = data - rpbuf; return 0; } @@ -207,72 +165,194 @@ make_reparse_buffer(const struct reparse_data *rpdata, u8 *rpbuf) * buffer returned by this function. */ int -wim_inode_get_reparse_data(const struct wim_inode *inode, u8 *rpbuf) +wim_inode_get_reparse_data(const struct wim_inode * restrict inode, + u8 * restrict rpbuf, + u16 * restrict rpbuflen_ret, + struct wim_lookup_table_entry *lte_override) { struct wim_lookup_table_entry *lte; int ret; + struct reparse_buffer_disk *rpbuf_disk; + u16 rpdatalen; wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT); - lte = inode_unnamed_lte_resolved(inode); - if (!lte) { - ERROR("Reparse point has no reparse data!"); - return WIMLIB_ERR_INVALID_REPARSE_DATA; + if (!lte_override) { + lte = inode_unnamed_lte_resolved(inode); + if (!lte) { + ERROR("Reparse point has no reparse data!"); + return WIMLIB_ERR_INVALID_REPARSE_DATA; + } + } else { + lte = lte_override; } - if (wim_resource_size(lte) > REPARSE_POINT_MAX_SIZE - 8) { + + if (lte->size > REPARSE_POINT_MAX_SIZE - 8) { ERROR("Reparse data is too long!"); return WIMLIB_ERR_INVALID_REPARSE_DATA; } + rpdatalen = lte->size; /* Read the data from the WIM file */ - ret = read_full_resource_into_buf(lte, rpbuf + 8, true); + ret = read_full_stream_into_buf(lte, rpbuf + 8); if (ret) return ret; /* Reconstruct the first 8 bytes of the reparse point buffer */ + rpbuf_disk = (struct reparse_buffer_disk*)rpbuf; /* ReparseTag */ - put_u32(rpbuf, inode->i_reparse_tag); + rpbuf_disk->rptag = cpu_to_le32(inode->i_reparse_tag); /* ReparseDataLength */ - put_u16(rpbuf + 4, wim_resource_size(lte)); + rpbuf_disk->rpdatalen = cpu_to_le16(rpdatalen); /* ReparseReserved * XXX this could be one of the unknown fields in the WIM dentry. */ - put_u16(rpbuf + 6, 0); + rpbuf_disk->rpreserved = cpu_to_le16(0); + + *rpbuflen_ret = rpdatalen + 8; return 0; } /* UNIX version of getting and setting the data in reparse points */ -#if !defined(__WIN32__) +#ifndef __WIN32__ -/* Get the UNIX symlink target from a WIM inode. The inode may be either a - * "real" symlink (reparse tag WIM_IO_REPARSE_TAG_SYMLINK), or it may be a - * junction point (reparse tag WIM_IO_REPARSE_TAG_MOUNT_POINT). +static const utf16lechar volume_junction_prefix[11] = { + cpu_to_le16('\\'), + cpu_to_le16('?'), + cpu_to_le16('?'), + cpu_to_le16('\\'), + cpu_to_le16('V'), + cpu_to_le16('o'), + cpu_to_le16('l'), + cpu_to_le16('u'), + cpu_to_le16('m'), + cpu_to_le16('e'), + cpu_to_le16('{'), +}; + +enum { + SUBST_NAME_IS_RELATIVE_LINK = -1, + SUBST_NAME_IS_VOLUME_JUNCTION = -2, + SUBST_NAME_IS_UNKNOWN = -3, +}; + +/* Parse the "substitute name" (link target) from a symbolic link or junction + * reparse point. + * + * Return value is: * - * This has similar semantics to the UNIX readlink() function, except the path - * argument is swapped out with the `struct wim_inode' for a reparse point, and - * on failure a negated error code is returned rather than -1 with errno set. */ + * Non-negative integer: + * The name is an absolute symbolic link in one of several formats, + * and the return value is the number of UTF-16LE characters that need to + * be advanced to reach a simple "absolute" path starting with a backslash + * (i.e. skip over \??\ and/or drive letter) + * Negative integer: + * SUBST_NAME_IS_VOLUME_JUNCTION: + * The name is a volume junction. + * SUBST_NAME_IS_RELATIVE_LINK: + * The name is a relative symbolic link. + * SUBST_NAME_IS_UNKNOWN: + * The name does not appear to be a valid symbolic link, junction, + * or mount point. + */ +static int +parse_substitute_name(const utf16lechar *substitute_name, + u16 substitute_name_nbytes, u32 rptag) +{ + u16 substitute_name_nchars = substitute_name_nbytes / 2; + + if (substitute_name_nchars >= 7 && + substitute_name[0] == cpu_to_le16('\\') && + substitute_name[1] == cpu_to_le16('?') && + substitute_name[2] == cpu_to_le16('?') && + substitute_name[3] == cpu_to_le16('\\') && + substitute_name[4] != cpu_to_le16('\0') && + substitute_name[5] == cpu_to_le16(':') && + substitute_name[6] == cpu_to_le16('\\')) + { + /* "Full" symlink or junction (\??\x:\ prefixed path) */ + return 6; + } else if (rptag == WIM_IO_REPARSE_TAG_MOUNT_POINT && + substitute_name_nchars >= 12 && + memcmp(substitute_name, volume_junction_prefix, + sizeof(volume_junction_prefix)) == 0 && + substitute_name[substitute_name_nchars - 1] == cpu_to_le16('\\')) + { + /* Volume junction. Can't really do anything with it. */ + return SUBST_NAME_IS_VOLUME_JUNCTION; + } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK && + substitute_name_nchars >= 3 && + substitute_name[0] != cpu_to_le16('\0') && + substitute_name[1] == cpu_to_le16(':') && + substitute_name[2] == cpu_to_le16('\\')) + { + /* "Absolute" symlink, with drive letter */ + return 2; + } else if (rptag == WIM_IO_REPARSE_TAG_SYMLINK && + substitute_name_nchars >= 1) + { + if (substitute_name[0] == cpu_to_le16('\\')) + /* "Absolute" symlink, without drive letter */ + return 0; + else + /* "Relative" symlink, without drive letter */ + return SUBST_NAME_IS_RELATIVE_LINK; + } else { + return SUBST_NAME_IS_UNKNOWN; + } +} + +/* + * Get the UNIX-style symlink target from the WIM inode for a reparse point. + * Specifically, this translates the target from UTF-16 to the current multibyte + * encoding, strips the drive prefix if present, and replaces backslashes with + * forward slashes. + * + * @inode + * The inode to read the symlink from. It must be a reparse point with + * tag WIM_IO_REPARSE_TAG_SYMLINK (a real symlink) or + * WIM_IO_REPARSE_TAG_MOUNT_POINT (a mount point or junction point). + * + * @buf + * Buffer into which to place the link target. + * + * @bufsize + * Available space in @buf, in bytes. + * + * @lte_override + * If not NULL, the stream from which to read the reparse data. Otherwise, + * the reparse data will be read from the unnamed stream of @inode. + * + * If the entire symbolic link target was placed in the buffer, returns the + * number of bytes written. The resulting string is not null-terminated. If + * the symbolic link target was too large to be placed in the buffer, the first + * @bufsize bytes of it are placed in the buffer and + * -ENAMETOOLONG is returned. Otherwise, a negative errno value indicating + * another error is returned. + */ ssize_t -wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize) +wim_inode_readlink(const struct wim_inode * restrict inode, + char * restrict buf, size_t bufsize, + struct wim_lookup_table_entry *lte_override) { int ret; - u8 rpbuf[REPARSE_POINT_MAX_SIZE]; - u16 rpdatalen; + struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8); struct reparse_data rpdata; char *link_target; char *translated_target; size_t link_target_len; + u16 rpbuflen; wimlib_assert(inode_is_symlink(inode)); - if (wim_inode_get_reparse_data(inode, rpbuf)) + if (wim_inode_get_reparse_data(inode, (u8*)&rpbuf_disk, &rpbuflen, + lte_override)) return -EIO; - get_u16(rpbuf + 4, &rpdatalen); - - if (parse_reparse_data(rpbuf, rpdatalen + 8, &rpdata)) - return -EIO; + if (parse_reparse_data((const u8*)&rpbuf_disk, rpbuflen, &rpdata)) + return -EINVAL; ret = utf16le_to_tstr(rpdata.substitute_name, rpdata.substitute_name_nbytes, @@ -292,7 +372,8 @@ wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize) case SUBST_NAME_IS_UNKNOWN: ERROR("Can't understand reparse point " "substitute name \"%s\"", link_target); - return -EIO; + ret = -EIO; + goto out_free_link_target; default: translated_target += ret; link_target_len -= ret; @@ -311,28 +392,25 @@ out_have_link: ret = link_target_len; } memcpy(buf, translated_target, link_target_len); +out_free_link_target: FREE(link_target); return ret; } -#ifdef HAVE_ALLOCA_H -# include -#endif - int wim_inode_set_symlink(struct wim_inode *inode, const char *target, struct wim_lookup_table *lookup_table) { - u8 rpbuf[REPARSE_POINT_MAX_SIZE]; - u16 rpdatalen; + struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8); struct reparse_data rpdata; static const char abs_subst_name_prefix[12] = "\\\0?\0?\0\\\0C\0:\0"; static const char abs_print_name_prefix[4] = "C\0:\0"; utf16lechar *name_utf16le; size_t name_utf16le_nbytes; int ret; + u16 rpbuflen; DEBUG("Creating reparse point data buffer for UNIX " "symlink target \"%s\"", target); @@ -353,8 +431,8 @@ wim_inode_set_symlink(struct wim_inode *inode, * ways to provide Windows paths.) * * To change a UNIX relative symbolic link to Windows format, we only - * need to translate it to UTF-16LE and replace backslashes with forward - * slashes. We do not make any attempt to handle filename character + * need to translate it to UTF-16LE and replace forward slashes with + * backslashes. We do not make any attempt to handle filename character * problems, such as a link target that itself contains backslashes on * UNIX. Then, for these relative links, we set the reparse header * @flags field to SYMBOLIC_LINK_RELATIVE. @@ -415,92 +493,15 @@ wim_inode_set_symlink(struct wim_inode *inode, rpdata.rpflags = SYMBOLIC_LINK_RELATIVE; } - ret = make_reparse_buffer(&rpdata, rpbuf); + ret = make_reparse_buffer(&rpdata, (u8*)&rpbuf_disk, &rpbuflen); if (ret == 0) { - get_u16(rpbuf + 4, &rpdatalen); - ret = inode_set_unnamed_stream(inode, rpbuf + 8, rpdatalen, + ret = inode_set_unnamed_stream(inode, + (u8*)&rpbuf_disk + 8, + rpbuflen - 8, lookup_table); } FREE(name_utf16le); return ret; } -#include - -static int -unix_get_ino_and_dev(const char *path, u64 *ino_ret, u64 *dev_ret) -{ - struct stat stbuf; - if (stat(path, &stbuf)) { - if (errno != ENOENT) - WARNING_WITH_ERRNO("Failed to stat \"%s\"", path); - /* Treat as a link pointing outside the capture root (it - * most likely is). */ - return WIMLIB_ERR_STAT; - } else { - *ino_ret = stbuf.st_ino; - *dev_ret = stbuf.st_dev; - return 0; - } -} - -#endif /* !defined(__WIN32__) */ - -#ifdef __WIN32__ -# include "win32.h" -# define RP_PATH_SEPARATOR L'\\' -# define is_rp_path_separator(c) ((c) == L'\\' || (c) == L'/') -# define os_get_ino_and_dev win32_get_file_and_vol_ids -#else -# define RP_PATH_SEPARATOR '/' -# define is_rp_path_separator(c) ((c) == '/') -# define os_get_ino_and_dev unix_get_ino_and_dev -#endif - -/* Fix up absolute symbolic link targets--- mostly shared between UNIX and - * Windows */ -tchar * -capture_fixup_absolute_symlink(tchar *dest, - u64 capture_root_ino, u64 capture_root_dev) -{ - tchar *p = dest; - - DEBUG("Fixing symlink or junction \"%"TS"\"", dest); - for (;;) { - tchar save; - int ret; - u64 ino; - u64 dev; - - while (is_rp_path_separator(*p)) - p++; - - save = *p; - *p = T('\0'); - ret = os_get_ino_and_dev(dest, &ino, &dev); - *p = save; - - if (ret) /* stat() failed before we got to the capture root--- - assume the link points outside it. */ - return NULL; - - if (ino == capture_root_ino && dev == capture_root_dev) { - /* Link points inside capture root. Return abbreviated - * path. */ - if (*p == T('\0')) - *(p - 1) = RP_PATH_SEPARATOR; - while (p - 1 >= dest && is_rp_path_separator(*(p - 1))) - p--; - return p; - } - - if (*p == T('\0')) { - /* Link points outside capture root. */ - return NULL; - } - - do { - p++; - } while (!is_rp_path_separator(*p) && *p != T('\0')); - } -} +#endif /* !__WIN32__ */