]> wimlib.net Git - wimlib/commitdiff
utf8_to_utf16(), utf16_to_utf8() changes
authorEric Biggers <ebiggers3@gmail.com>
Wed, 19 Dec 2012 02:09:59 +0000 (20:09 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Wed, 19 Dec 2012 02:09:59 +0000 (20:09 -0600)
These two functions have been moved out of 'util.c' to a separate file
'encoding.c'.  They also now return an integer error code because they can fail
for at least two reasons (no memory, and invalid UTF-8 or UTF-16LE character).

13 files changed:
Makefile.am
src/add_image.c
src/dentry.c
src/dentry.h
src/encoding.c [new file with mode: 0644]
src/mount_image.c
src/ntfs-apply.c
src/ntfs-capture.c
src/symlink.c
src/util.c
src/util.h
src/wimlib.h
src/xml.c

index 96a9d3901b47faa20ed62a455f84065bf0c30e57..35b2ad1a8eb23b31cc93c52b91b06bf6173a1d6a 100644 (file)
@@ -19,6 +19,7 @@ libwim_la_SOURCES =           \
        src/delete_image.c      \
        src/dentry.c            \
        src/dentry.h            \
+       src/encoding.c          \
        src/endianness.h        \
        src/export_image.c      \
        src/extract_image.c     \
index d09dd197deb450d0ffac39dec492c7df7b8e6cb8..d658dc923fd58395e1e0062e715b9034a1337850 100644 (file)
@@ -214,8 +214,12 @@ static int build_dentry_tree(struct dentry **root_ret,
                filename = path_basename(root_disk_path);
 
        root = new_dentry_with_timeless_inode(filename);
-       if (!root)
-               return WIMLIB_ERR_NOMEM;
+       if (!root) {
+               if (errno == EILSEQ)
+                       return WIMLIB_ERR_INVALID_UTF8_STRING;
+               else
+                       return WIMLIB_ERR_NOMEM;
+       }
 
        inode = root->d_inode;
 
index fb57c93717631ad64bbb0964bcb8c67338716153..e7d34f5d08ed9c968225baa3e3f03f395c505df6 100644 (file)
@@ -83,13 +83,12 @@ int get_names(char **name_utf16_ret, char **name_utf8_ret,
        size_t utf8_len;
        size_t utf16_len;
        char *name_utf16, *name_utf8;
+       int ret;
 
        utf8_len = strlen(name);
-
-       name_utf16 = utf8_to_utf16(name, utf8_len, &utf16_len);
-
-       if (!name_utf16)
-               return WIMLIB_ERR_NOMEM;
+       ret = utf8_to_utf16(name, utf8_len, &name_utf16, &utf16_len);
+       if (ret != 0)
+               return ret;
 
        name_utf8 = MALLOC(utf8_len + 1);
        if (!name_utf8) {
@@ -115,11 +114,14 @@ static int change_dentry_name(struct dentry *dentry, const char *new_name)
 
        ret = get_names(&dentry->file_name, &dentry->file_name_utf8,
                        &dentry->file_name_len, &dentry->file_name_utf8_len,
-                        new_name);
-       FREE(dentry->short_name);
-       dentry->short_name_len = 0;
-       if (ret == 0)
+                       new_name);
+       if (ret == 0) {
+               if (dentry->short_name_len) {
+                       FREE(dentry->short_name);
+                       dentry->short_name_len = 0;
+               }
                dentry->length = dentry_correct_length(dentry);
+       }
        return ret;
 }
 
@@ -809,6 +811,9 @@ static struct inode *new_inode()
  *
  * Returns a pointer to the new dentry, or NULL if out of memory.
  */
+#ifndef WITH_FUSE
+static
+#endif
 struct dentry *new_dentry(const char *name)
 {
        struct dentry *dentry;
@@ -826,7 +831,7 @@ struct dentry *new_dentry(const char *name)
        return dentry;
 err:
        FREE(dentry);
-       ERROR("Failed to allocate new dentry");
+       ERROR_WITH_ERRNO("Failed to create new dentry with name \"%s\"", name);
        return NULL;
 }
 
@@ -1251,15 +1256,14 @@ static int read_ads_entries(const u8 *p, struct inode *inode,
                        }
                        get_bytes(p, cur_entry->stream_name_len,
                                  (u8*)cur_entry->stream_name);
-                       cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name,
-                                                                   cur_entry->stream_name_len,
-                                                                   &utf8_len);
-                       cur_entry->stream_name_utf8_len = utf8_len;
 
-                       if (!cur_entry->stream_name_utf8) {
-                               ret = WIMLIB_ERR_NOMEM;
+                       ret = utf16_to_utf8(cur_entry->stream_name,
+                                           cur_entry->stream_name_len,
+                                           &cur_entry->stream_name_utf8,
+                                           &utf8_len);
+                       if (ret != 0)
                                goto out_free_ads_entries;
-                       }
+                       cur_entry->stream_name_utf8_len = utf8_len;
                }
                /* It's expected that the size of every ADS entry is a multiple
                 * of 8.  However, to be safe, I'm allowing the possibility of
@@ -1434,15 +1438,10 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len,
                p = get_bytes(p, file_name_len, file_name);
 
                /* Convert filename to UTF-8. */
-               file_name_utf8 = utf16_to_utf8(file_name, file_name_len,
-                                              &file_name_utf8_len);
-
-               if (!file_name_utf8) {
-                       ERROR("Failed to allocate memory to convert UTF-16 "
-                             "filename (%hu bytes) to UTF-8", file_name_len);
-                       ret = WIMLIB_ERR_NOMEM;
+               ret = utf16_to_utf8(file_name, file_name_len, &file_name_utf8,
+                                   &file_name_utf8_len);
+               if (ret != 0)
                        goto out_free_file_name;
-               }
                if (*(u16*)p)
                        WARNING("Expected two zero bytes following the file name "
                                "`%s', but found non-zero bytes", file_name_utf8);
index afdb3619190e102ebf6b478b6954471af02973ef..0d3f89e3b0de54b4386ad7c18dfe62216d6cfc09 100644 (file)
@@ -358,7 +358,10 @@ extern struct dentry *get_parent_dentry(struct WIMStruct *w, const char *path);
 extern int print_dentry(struct dentry *dentry, void *lookup_table);
 extern int print_dentry_full_path(struct dentry *entry, void *ignore);
 
+#ifdef WITH_FUSE
 extern struct dentry *new_dentry(const char *name);
+#endif
+
 extern struct dentry *new_dentry_with_inode(const char *name);
 extern struct dentry *new_dentry_with_timeless_inode(const char *name);
 
diff --git a/src/encoding.c b/src/encoding.c
new file mode 100644 (file)
index 0000000..edb24c7
--- /dev/null
@@ -0,0 +1,332 @@
+/*
+ * encoding.c:  Convert UTF-8 to UTF-16LE strings and vice versa
+ */
+
+/*
+ * Copyright (C) 2012 Eric Biggers
+ *
+ * This file is part of wimlib, a library for working with WIM files.
+ *
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ */
+
+#include "wimlib.h"
+#include "util.h"
+#include "endianness.h"
+
+#include <errno.h>
+
+#ifdef WITH_NTFS_3G
+#include <ntfs-3g/volume.h>
+#include <ntfs-3g/unistr.h>
+#else
+#include <iconv.h>
+#endif
+
+/*
+ * NOTE:
+ *
+ * utf16_to_utf8_size() and utf8_to_utf16_size() were taken from
+ * libntfs-3g/unistr.c in the NTFS-3g sources.  (Modified slightly to remove
+ * unneeded functionality.)
+ */
+#ifndef WITH_NTFS_3G
+/*
+ * Return the amount of 8-bit elements in UTF-8 needed (without the terminating
+ * null) to store a given UTF-16LE string.
+ *
+ * Return -1 with errno set if string has invalid byte sequence or too long.
+ */
+static int utf16_to_utf8_size(const u16 *ins, const int ins_len)
+{
+       int i, ret = -1;
+       int count = 0;
+       bool surrog;
+
+       surrog = false;
+       for (i = 0; i < ins_len && ins[i]; i++) {
+               unsigned short c = le16_to_cpu(ins[i]);
+               if (surrog) {
+                       if ((c >= 0xdc00) && (c < 0xe000)) {
+                               surrog = false;
+                               count += 4;
+                       } else
+                               goto fail;
+               } else
+                       if (c < 0x80)
+                               count++;
+                       else if (c < 0x800)
+                               count += 2;
+                       else if (c < 0xd800)
+                               count += 3;
+                       else if (c < 0xdc00)
+                               surrog = true;
+#if NOREVBOM
+                       else if ((c >= 0xe000) && (c < 0xfffe))
+#else
+                       else if (c >= 0xe000)
+#endif
+                               count += 3;
+                       else
+                               goto fail;
+       }
+       if (surrog)
+               goto fail;
+
+       ret = count;
+out:
+       return ret;
+fail:
+       errno = EILSEQ;
+       goto out;
+}
+
+/*
+ * Return the amount of 16-bit elements in UTF-16LE needed
+ * (without the terminating null) to store given UTF-8 string.
+ *
+ * Return -1 with errno set if it's longer than PATH_MAX or string is invalid.
+ *
+ * Note: This does not check whether the input sequence is a valid utf8 string,
+ *      and should be used only in context where such check is made!
+ */
+static int utf8_to_utf16_size(const char *s)
+{
+       unsigned int byte;
+       size_t count = 0;
+       while ((byte = *((const unsigned char *)s++))) {
+               count++;
+               if (byte >= 0xc0) {
+                       if (byte >= 0xF5) {
+                               errno = EILSEQ;
+                               return -1;
+                       }
+                       if (!*s)
+                               break;
+                       if (byte >= 0xC0)
+                               s++;
+                       if (!*s)
+                               break;
+                       if (byte >= 0xE0)
+                               s++;
+                       if (!*s)
+                               break;
+                       if (byte >= 0xF0) {
+                               s++;
+                               count++;
+                       }
+               }
+       }
+       return count;
+}
+#endif /* !WITH_NTFS_3G */
+
+/* Converts a string in the UTF-16LE encoding to a newly allocated string in the
+ * UTF-8 encoding.
+ *
+ * If available, do so by calling a similar function from libntfs-3g.
+ * Otherwise, use iconv() along with the helper function utf16_to_utf8_size().
+ */
+int utf16_to_utf8(const char *utf16_str, size_t utf16_nbytes,
+                 char **utf8_str_ret, size_t *utf8_nbytes_ret)
+{
+       int ret;
+
+       if (utf16_nbytes == 0) {
+               *utf8_str_ret = NULL;
+               *utf8_nbytes_ret = 0;
+               return 0;
+       }
+
+       if (utf16_nbytes & 1) {
+               ERROR("UTF-16LE string is invalid (odd number of bytes)!");
+               return WIMLIB_ERR_INVALID_UTF16_STRING;
+       }
+#ifdef WITH_NTFS_3G
+       char *outs = NULL;
+       int outs_len = ntfs_ucstombs((const ntfschar*)utf16_str,
+                                    utf16_nbytes / 2, &outs, 0);
+       if (outs_len >= 0) {
+               *utf8_str_ret = outs;
+               *utf8_nbytes_ret = outs_len;
+               ret = 0;
+       } else {
+               if (errno == ENOMEM)
+                       ret = WIMLIB_ERR_NOMEM;
+               else
+                       ret = WIMLIB_ERR_INVALID_UTF16_STRING;
+       }
+#else /* WITH_NTFS_3G */
+       static iconv_t cd_utf16_to_utf8 = (iconv_t)(-1);
+       if (cd_utf16_to_utf8 == (iconv_t)(-1)) {
+               cd_utf16_to_utf8 = iconv_open("UTF-8", "UTF-16LE");
+               if (cd_utf16_to_utf8 == (iconv_t)-1) {
+                       ERROR_WITH_ERRNO("Failed to get conversion descriptor "
+                                        "for converting UTF-16LE to UTF-8");
+                       if (errno == ENOMEM)
+                               return WIMLIB_ERR_NOMEM;
+                       else
+                               return WIMLIB_ERR_ICONV_NOT_AVAILABLE;
+               }
+       }
+       ret = utf16_to_utf8_size((const u16*)utf16_str, utf16_nbytes / 2);
+       if (ret >= 0) {
+               size_t utf8_expected_nbytes;
+               char  *utf8_str;
+               size_t utf8_bytes_left;
+               size_t utf16_bytes_left;
+               size_t num_chars_converted;
+               char  *utf8_str_save;
+               const char *utf16_str_save;
+
+               utf8_expected_nbytes = ret;
+               utf8_str = MALLOC(utf8_expected_nbytes + 1);
+               if (utf8_str) {
+                       utf8_bytes_left = utf8_expected_nbytes;
+                       utf16_bytes_left = utf16_nbytes;
+                       utf8_str_save = utf8_str;
+                       utf16_str_save = utf16_str;
+                       num_chars_converted = iconv(cd_utf16_to_utf8,
+                                                   (char**)&utf16_str,
+                                                   &utf16_bytes_left,
+                                                   &utf8_str,
+                                                   &utf8_bytes_left);
+                       utf8_str = utf8_str_save;
+                       utf16_str = utf16_str_save;
+                       if (utf16_bytes_left == 0 &&
+                           utf8_bytes_left == 0 &&
+                           num_chars_converted != (size_t)(-1))
+                       {
+                               utf8_str[utf8_expected_nbytes] = '\0';
+                               *utf8_str_ret = utf8_str;
+                               *utf8_nbytes_ret = utf8_expected_nbytes;
+                               ret = 0;
+                       } else {
+                               FREE(utf8_str);
+                               ret = WIMLIB_ERR_INVALID_UTF16_STRING;
+                       }
+               } else
+                       ret = WIMLIB_ERR_NOMEM;
+       } else
+               ret = WIMLIB_ERR_INVALID_UTF16_STRING;
+#endif /* WITH_NTFS_3G */
+
+#ifdef ENABLE_ERROR_MESSAGES
+       if (ret != 0) {
+               ERROR_WITH_ERRNO("Error converting UTF-16LE string to UTF-8");
+               ERROR("The failing string was:");
+               print_string(utf16_str, utf16_nbytes);
+               putchar('\n');
+       }
+#endif /* ENABLE_ERROR_MESSAGES */
+       return ret;
+}
+
+
+/* Converts a string in the UTF-8 encoding to a newly allocated string in the
+ * UTF-16 encoding.
+ *
+ * If available, do so by calling a similar function from libntfs-3g.
+ * Otherwise, use iconv() along with the helper function utf8_to_utf16_size().
+ */
+int utf8_to_utf16(const char *utf8_str, size_t utf8_nbytes,
+                 char **utf16_str_ret, size_t *utf16_nbytes_ret)
+{
+       int ret;
+       if (utf8_nbytes == 0) {
+               *utf16_str_ret = NULL;
+               *utf16_nbytes_ret = 0;
+               return 0;
+       }
+#ifdef WITH_NTFS_3G
+       char *outs = NULL;
+       int outs_nchars = ntfs_mbstoucs(utf8_str, (ntfschar**)&outs);
+       if (outs_nchars >= 0) {
+               *utf16_str_ret = outs;
+               *utf16_nbytes_ret = (size_t)outs_nchars * 2;
+               ret = 0;
+       } else {
+               if (errno == ENOMEM)
+                       ret = WIMLIB_ERR_NOMEM;
+               else
+                       ret = WIMLIB_ERR_INVALID_UTF8_STRING;
+       }
+#else /* WITH_NTFS_3G */
+       static iconv_t cd_utf8_to_utf16 = (iconv_t)(-1);
+       if (cd_utf8_to_utf16 == (iconv_t)(-1)) {
+               cd_utf8_to_utf16 = iconv_open("UTF-16LE", "UTF-8");
+               if (cd_utf8_to_utf16 == (iconv_t)-1) {
+                       ERROR_WITH_ERRNO("Failed to get conversion descriptor "
+                                        "for converting UTF-8 to UTF-16LE");
+                       if (errno == ENOMEM)
+                               return WIMLIB_ERR_NOMEM;
+                       else
+                               return WIMLIB_ERR_ICONV_NOT_AVAILABLE;
+               }
+       }
+
+       ret = utf8_to_utf16_size(utf8_str);
+       if (ret >= 0) {
+               size_t utf16_expected_nbytes;
+               char  *utf16_str;
+               size_t utf16_bytes_left;
+               size_t utf8_bytes_left;
+               size_t num_chars_converted;
+               const char *utf8_str_save;
+               char  *utf16_str_save;
+
+               utf16_expected_nbytes = (size_t)ret * 2;
+               utf16_str = MALLOC(utf16_expected_nbytes + 2);
+               if (utf16_str) {
+                       utf16_bytes_left = utf16_expected_nbytes;
+                       utf8_bytes_left = utf8_nbytes;
+                       utf8_str_save = utf8_str;
+                       utf16_str_save = utf16_str;
+                       num_chars_converted = iconv(cd_utf8_to_utf16,
+                                                   (char**)&utf8_str,
+                                                   &utf8_bytes_left,
+                                                   &utf16_str,
+                                                   &utf16_bytes_left);
+                       utf8_str = utf8_str_save;
+                       utf16_str = utf16_str_save;
+                       if (utf16_bytes_left == 0 &&
+                           utf8_bytes_left == 0 &&
+                           num_chars_converted != (size_t)(-1))
+                       {
+                               utf16_str[utf16_expected_nbytes] = '\0';
+                               utf16_str[utf16_expected_nbytes + 1] = '\0';
+                               *utf16_str_ret = utf16_str;
+                               *utf16_nbytes_ret = utf16_expected_nbytes;
+                               ret = 0;
+                       } else {
+                               FREE(utf16_str);
+                               ret = WIMLIB_ERR_INVALID_UTF8_STRING;
+                       }
+               } else
+                       ret = WIMLIB_ERR_NOMEM;
+       } else
+               ret = WIMLIB_ERR_INVALID_UTF8_STRING;
+#endif /* WITH_NTFS_3G */
+
+#ifdef ENABLE_ERROR_MESSAGES
+       if (ret != 0) {
+               ERROR_WITH_ERRNO("Error converting UTF-8 string to UTF-16LE");
+               ERROR("The failing string was:");
+               print_string(utf8_str, utf8_nbytes);
+               putchar('\n');
+               ERROR("Length: %zu bytes", utf8_nbytes);
+       }
+#endif /* ENABLE_ERROR_MESSAGES */
+       return ret;
+}
index 7f0a2aa01eb64b1c2475cd8b19a649b907414feb..69fad24468332471d7049e8707b6f4ef1349b320 100644 (file)
@@ -291,7 +291,7 @@ static int create_dentry(struct wimfs_context *ctx, const char *path,
 
        new = new_dentry_with_inode(basename);
        if (!new)
-               return -ENOMEM;
+               return -errno;
 
        new->d_inode->resolved = 1;
        new->d_inode->ino = ctx->next_ino++;
@@ -1609,7 +1609,7 @@ static int wimfs_link(const char *to, const char *from)
                return -EEXIST;
        from_dentry = new_dentry(link_name);
        if (!from_dentry)
-               return -ENOMEM;
+               return -errno;
 
        inode_add_dentry(from_dentry, inode);
        from_dentry->d_inode = inode;
index f1ff0dd1adc6f28713b496ba35b826e9902c03fb..0bfc060e9a55d0430e2dadbf67e786c96399f34c 100644 (file)
@@ -461,14 +461,12 @@ out_set_dos_name:
 
                char *short_name_utf8;
                size_t short_name_utf8_len;
-               short_name_utf8 = utf16_to_utf8(dentry->short_name,
-                                               dentry->short_name_len,
-                                               &short_name_utf8_len);
-               if (!short_name_utf8) {
-                       ERROR("Out of memory");
-                       ret = WIMLIB_ERR_NOMEM;
+               ret = utf16_to_utf8(dentry->short_name,
+                                   dentry->short_name_len,
+                                   &short_name_utf8,
+                                   &short_name_utf8_len);
+               if (ret != 0)
                        goto out_close_dir_ni;
-               }
 
                if (is_hardlink) {
                        char *p;
index ae5502132480f8c2c7ec4968b36256a799127c36..0abbebac94608ab53bc4a2b5808b419f289030be 100644 (file)
@@ -362,10 +362,12 @@ static int capture_ntfs_streams(struct dentry *dentry, ntfs_inode *ni,
                         * alternate data stream entries */
                        struct ads_entry *new_ads_entry;
                        size_t stream_name_utf8_len;
-                       stream_name_utf8 = utf16_to_utf8((const char*)attr_record_name(actx->attr),
-                                                        name_length * 2,
-                                                        &stream_name_utf8_len);
-                       if (!stream_name_utf8)
+
+                       ret = utf16_to_utf8((const char*)attr_record_name(actx->attr),
+                                           name_length * 2,
+                                           &stream_name_utf8,
+                                           &stream_name_utf8_len);
+                       if (ret != 0)
                                goto out_free_lte;
                        new_ads_entry = inode_add_ads(dentry->d_inode, stream_name_utf8);
                        FREE(stream_name_utf8);
@@ -435,12 +437,10 @@ static int wim_ntfs_capture_filldir(void *dirent, const ntfschar *name,
        if (name_type == FILE_NAME_DOS)
                return 0;
 
-       ret = -1;
-
-       utf8_name = utf16_to_utf8((const char*)name, name_len * 2,
-                                 &utf8_name_len);
-       if (!utf8_name)
-               goto out;
+       ret = utf16_to_utf8((const char*)name, name_len * 2,
+                           &utf8_name, &utf8_name_len);
+       if (ret != 0)
+               return -1;
 
        if (utf8_name[0] == '.' &&
             (utf8_name[1] == '\0' ||
@@ -484,15 +484,15 @@ static int change_dentry_short_name(struct dentry *dentry,
 {
        size_t short_name_utf16_len;
        char *short_name_utf16;
-       short_name_utf16 = utf8_to_utf16(short_name_utf8, short_name_utf8_len,
-                                        &short_name_utf16_len);
-       if (!short_name_utf16) {
-               ERROR_WITH_ERRNO("Failed to convert short name to UTF-16");
-               return WIMLIB_ERR_NOMEM;
+       int ret;
+
+       ret = utf8_to_utf16(short_name_utf8, short_name_utf8_len,
+                           &short_name_utf16, &short_name_utf16_len);
+       if (ret == 0) {
+               dentry->short_name = short_name_utf16;
+               dentry->short_name_len = short_name_utf16_len;
        }
-       dentry->short_name = short_name_utf16;
-       dentry->short_name_len = short_name_utf16_len;
-       return 0;
+       return ret;
 }
 
 /* Recursively build a WIM dentry tree corresponding to a NTFS volume.
@@ -553,8 +553,12 @@ static int build_dentry_tree_ntfs_recursive(struct dentry **root_p,
        }
 
        root = new_dentry_with_timeless_inode(path_basename(path));
-       if (!root)
-               return WIMLIB_ERR_NOMEM;
+       if (!root) {
+               if (errno == EILSEQ)
+                       return WIMLIB_ERR_INVALID_UTF8_STRING;
+               else
+                       return WIMLIB_ERR_NOMEM;
+       }
        *root_p = root;
 
        if (dir_ni && (name_type == FILE_NAME_WIN32_AND_DOS
index b90ebe5e41c067edb1c76eb14ac311cce39a4699..bb829651098cfdd73a8d610cbf0ee4ce7a01bf6b 100644 (file)
@@ -80,9 +80,16 @@ static ssize_t get_symlink_name(const u8 *resource, size_t resource_len,
        }
        if (header_size + substitute_name_offset + substitute_name_len > resource_len)
                return -EIO;
-       link_target = utf16_to_utf8((const char *)p + substitute_name_offset,
-                                   substitute_name_len,
-                                   &link_target_len);
+
+       ret = utf16_to_utf8((const char *)p + substitute_name_offset,
+                           substitute_name_len,
+                           &link_target, &link_target_len);
+       if (ret == WIMLIB_ERR_INVALID_UTF16_STRING)
+               return -EILSEQ;
+       else if (ret == WIMLIB_ERR_NOMEM)
+               return -ENOMEM;
+
+       wimlib_assert(ret == 0);
 
        if (!link_target)
                return -EIO;
@@ -124,22 +131,28 @@ out:
        return ret;
 }
 
-static void *make_symlink_reparse_data_buf(const char *symlink_target,
-                                          size_t *len_ret)
+static int make_symlink_reparse_data_buf(const char *symlink_target,
+                                        size_t *len_ret, void **buf_ret)
 {
        size_t utf8_len = strlen(symlink_target);
+       char *name_utf16;
        size_t utf16_len;
-       char *name_utf16 = utf8_to_utf16(symlink_target, utf8_len, &utf16_len);
-       if (!name_utf16)
-               return NULL;
+       int ret;
+
+       ret = utf8_to_utf16(symlink_target, utf8_len,
+                           &name_utf16, &utf16_len);
+       if (ret != 0)
+               return ret;
 
        for (size_t i = 0; i < utf16_len / 2; i++)
                if (((u16*)name_utf16)[i] == cpu_to_le16('/'))
                        ((u16*)name_utf16)[i] = cpu_to_le16('\\');
        size_t len = 12 + utf16_len * 2;
        void *buf = MALLOC(len);
-       if (!buf)
-               goto out;
+       if (!buf) {
+               FREE(name_utf16);
+               return WIMLIB_ERR_NOMEM;
+       }
 
        u8 *p = buf;
        p = put_u16(p, utf16_len); /* Substitute name offset */
@@ -150,9 +163,10 @@ static void *make_symlink_reparse_data_buf(const char *symlink_target,
        p = put_bytes(p, utf16_len, (const u8*)name_utf16);
        p = put_bytes(p, utf16_len, (const u8*)name_utf16);
        *len_ret = len;
+       *buf_ret = buf;
 out:
        FREE(name_utf16);
-       return buf;
+       return 0;
 }
 
 /* Get the symlink target from a dentry.
@@ -204,9 +218,10 @@ int inode_set_symlink(struct inode *inode, const char *target,
        u8 symlink_buf_hash[SHA1_HASH_SIZE];
        void *symlink_buf;
 
-       symlink_buf = make_symlink_reparse_data_buf(target, &symlink_buf_len);
-       if (!symlink_buf)
-               return WIMLIB_ERR_NOMEM;
+       ret = make_symlink_reparse_data_buf(target, &symlink_buf_len,
+                                           &symlink_buf);
+       if (ret != 0)
+               return ret;
 
        DEBUG("Made symlink reparse data buf (len = %zu, name len = %zu)",
                        symlink_buf_len, symlink_buf_len);
index bcaefacddebff7cd7d4c6c180b265db7ccfdbe18..c4f6e5ec01bf7a961a52bc2fbcada00999c77d70 100644 (file)
 
 #include "wimlib_internal.h"
 #include "endianness.h"
-#include "sha1.h"
 #include "timestamp.h"
-#include <sys/time.h>
 
-
-#include <iconv.h>
-#include <string.h>
 #include <ctype.h>
-#include <stdlib.h>
-#include <time.h>
-#include <unistd.h>
 #include <errno.h>
-
-#ifdef WITH_NTFS_3G
-#include <ntfs-3g/volume.h>
-#include <ntfs-3g/unistr.h>
-#endif
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h> /* for getpid() */
 
 /* True if wimlib is to print an informational message when an error occurs.
  * This can be turned off by calling wimlib_set_print_errors(false). */
@@ -117,8 +107,6 @@ static const char *error_strings[] = {
                = "Success",
        [WIMLIB_ERR_ALREADY_LOCKED]
                = "The WIM is already locked for writing",
-       [WIMLIB_ERR_CHAR_CONVERSION]
-               = "Failed to perform a conversion between UTF-8 and UTF-16LE",
        [WIMLIB_ERR_COMPRESSED_LOOKUP_TABLE]
                = "Lookup table is compressed",
        [WIMLIB_ERR_DECOMPRESSION]
@@ -165,11 +153,21 @@ static const char *error_strings[] = {
                = "The part number or total parts of the WIM is invalid",
        [WIMLIB_ERR_INVALID_RESOURCE_HASH]
                = "The SHA1 message digest of a WIM resource did not match the expected value",
+       [WIMLIB_ERR_ICONV_NOT_AVAILABLE]
+               = "The iconv() function does not seem to work. "
+                 "Maybe check to make sure the directory /usr/lib/gconv exists",
        [WIMLIB_ERR_INVALID_RESOURCE_SIZE]
                = "A resource entry in the WIM has an invalid size",
        [WIMLIB_ERR_INVALID_UNMOUNT_MESSAGE]
                = "The version of wimlib that has mounted a WIM image is incompatible with the "
                  "version being used to unmount it",
+       [WIMLIB_ERR_INVALID_UTF8_STRING]
+               = "A string provided as input by the user was not a valid UTF-8 string",
+       [WIMLIB_ERR_INVALID_UTF16_STRING]
+               = "A string in a WIM dentry is not a valid UTF-16LE string",
+       [WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE]
+               = "libxml2 was unable to find a character encoding conversion handler "
+                 "for UTF-16LE",
        [WIMLIB_ERR_LINK]
                = "Failed to create a hard or symbolic link when extracting "
                        "a file from the WIM",
@@ -281,122 +279,6 @@ WIMLIBAPI int wimlib_set_memory_allocator(void *(*malloc_func)(size_t),
 #endif
 }
 
-
-
-static iconv_t cd_utf16_to_utf8 = (iconv_t)(-1);
-
-/* Converts a string in the UTF-16 encoding to a newly allocated string in the
- * UTF-8 encoding.  */
-char *utf16_to_utf8(const char *utf16_str, size_t utf16_len,
-                   size_t *utf8_len_ret)
-{
-#ifdef WITH_NTFS_3G
-       if (utf16_len & 1) {
-               errno = -EILSEQ;
-               return NULL;
-       }
-       char *outs = NULL;
-       int outs_len = ntfs_ucstombs((const ntfschar*)utf16_str,
-                                    utf16_len >> 1, &outs, 0);
-       if (outs_len >= 0) {
-               *utf8_len_ret = outs_len;
-       } else {
-               ERROR_WITH_ERRNO("Error converting UTF-16LE string to UTF-8");
-               outs = NULL;
-       }
-       return outs;
-#else
-       if (cd_utf16_to_utf8 == (iconv_t)(-1)) {
-               cd_utf16_to_utf8 = iconv_open("UTF-8", "UTF-16LE");
-               if (cd_utf16_to_utf8 == (iconv_t)-1) {
-                       ERROR_WITH_ERRNO("Failed to get conversion descriptor "
-                                        "for converting UTF-16LE to UTF-8");
-                       return NULL;
-               }
-       }
-       size_t utf16_bytes_left  = utf16_len;
-       size_t utf8_bytes_left   = utf16_len;
-
-       char *utf8_str = MALLOC(utf8_bytes_left);
-       if (!utf8_str)
-               return NULL;
-
-       char *orig_utf8_str = utf8_str;
-
-       size_t num_chars_converted = iconv(cd_utf16_to_utf8, (char**)&utf16_str,
-                       &utf16_bytes_left, &utf8_str, &utf8_bytes_left);
-
-       if (num_chars_converted == (size_t)(-1)) {
-               ERROR_WITH_ERRNO("Failed to convert UTF-16LE string to UTF-8 "
-                                "string");
-               FREE(orig_utf8_str);
-               return NULL;
-       }
-
-       size_t utf8_len = utf16_len - utf8_bytes_left;
-
-       *utf8_len_ret = utf8_len;
-       orig_utf8_str[utf8_len] = '\0';
-       return orig_utf8_str;
-#endif
-}
-
-static iconv_t cd_utf8_to_utf16 = (iconv_t)(-1);
-
-/* Converts a string in the UTF-8 encoding to a newly allocated string in the
- * UTF-16 encoding.  */
-char *utf8_to_utf16(const char *utf8_str, size_t utf8_len,
-                   size_t *utf16_len_ret)
-{
-#ifdef WITH_NTFS_3G
-       char *outs = NULL;
-       int outs_nchars = ntfs_mbstoucs(utf8_str, (ntfschar**)&outs);
-       if (outs_nchars >= 0) {
-               *utf16_len_ret = (size_t)outs_nchars * 2;
-       } else {
-               ERROR_WITH_ERRNO("Error converting UTF-8 string to UTF-16LE");
-               outs = NULL;
-       }
-       return outs;
-#else
-       if (cd_utf8_to_utf16 == (iconv_t)(-1)) {
-               cd_utf8_to_utf16 = iconv_open("UTF-16LE", "UTF-8");
-               if (cd_utf8_to_utf16 == (iconv_t)-1) {
-                       ERROR_WITH_ERRNO("Failed to get conversion descriptor "
-                                        "for converting UTF-8 to UTF-16LE");
-                       return NULL;
-               }
-       }
-
-       size_t utf8_bytes_left   = utf8_len;
-       size_t utf16_capacity    = utf8_len * 4;
-       size_t utf16_bytes_left  = utf16_capacity;
-
-       char *utf16_str = MALLOC(utf16_capacity + 2);
-       if (!utf16_str)
-               return NULL;
-
-       char *orig_utf16_str = utf16_str;
-
-       size_t num_chars_converted = iconv(cd_utf8_to_utf16, (char**)&utf8_str,
-                       &utf8_bytes_left, &utf16_str, &utf16_bytes_left);
-
-       if (num_chars_converted == (size_t)(-1)) {
-               ERROR_WITH_ERRNO("Failed to convert UTF-8 string to UTF-16LE "
-                                "string");
-               FREE(orig_utf16_str);
-               return NULL;
-       }
-
-       size_t utf16_len = utf16_capacity - utf16_bytes_left;
-
-       *utf16_len_ret = utf16_len;
-       orig_utf16_str[utf16_len] = '\0';
-       orig_utf16_str[utf16_len + 1] = '\0';
-       return orig_utf16_str;
-#endif
-}
-
 static bool seeded = false;
 
 static void seed_random()
index 8a6dc4da2174a1c747db67dfed9347b8afa01cea..08b15636acdcc66ae0c440755bfc45bf276ec13e 100644 (file)
@@ -155,12 +155,14 @@ extern char *wimlib_strdup(const char *str);
 #endif /* ENABLE_CUSTOM_MEMORY_ALLOCATOR */
 
 
-extern char *utf16_to_utf8(const char *utf16_str, size_t utf16_len,
-                          size_t *utf8_len_ret);
+/* encoding.c */
+extern int utf16_to_utf8(const char *utf16_str, size_t utf16_nbytes,
+                        char **utf8_str_ret, size_t *utf8_nbytes_ret);
 
-extern char *utf8_to_utf16(const char *utf8_str, size_t utf8_len,
-                          size_t *utf16_len_ret);
+extern int utf8_to_utf16(const char *utf8_str, size_t utf8_nbytes,
+                        char **utf16_str_ret, size_t *utf16_nbytes_ret);
 
+/* util.c */
 extern void randomize_byte_array(u8 *p, size_t n);
 
 extern void randomize_char_array_with_alnum(char p[], size_t n);
index 57f9aa9722a3e83741571d04bf92c46a7c0cc88a..b2dd303423e16c96c79b50a961b2353f2eaa8a2c 100644 (file)
@@ -681,7 +681,6 @@ typedef int (*wimlib_progress_func_t)(enum wimlib_progress_msg msg_type,
 enum wimlib_error_code {
        WIMLIB_ERR_SUCCESS = 0,
        WIMLIB_ERR_ALREADY_LOCKED,
-       WIMLIB_ERR_CHAR_CONVERSION,
        WIMLIB_ERR_COMPRESSED_LOOKUP_TABLE,
        WIMLIB_ERR_DECOMPRESSION,
        WIMLIB_ERR_DELETE_STAGING_DIR,
@@ -689,6 +688,7 @@ enum wimlib_error_code {
        WIMLIB_ERR_FORK,
        WIMLIB_ERR_FUSE,
        WIMLIB_ERR_FUSERMOUNT,
+       WIMLIB_ERR_ICONV_NOT_AVAILABLE,
        WIMLIB_ERR_IMAGE_COUNT,
        WIMLIB_ERR_IMAGE_NAME_COLLISION,
        WIMLIB_ERR_INTEGRITY,
@@ -706,6 +706,9 @@ enum wimlib_error_code {
        WIMLIB_ERR_INVALID_RESOURCE_SIZE,
        WIMLIB_ERR_INVALID_SECURITY_DATA,
        WIMLIB_ERR_INVALID_UNMOUNT_MESSAGE,
+       WIMLIB_ERR_INVALID_UTF8_STRING,
+       WIMLIB_ERR_INVALID_UTF16_STRING,
+       WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE,
        WIMLIB_ERR_LINK,
        WIMLIB_ERR_MKDIR,
        WIMLIB_ERR_MQUEUE,
index 65e34a2114db281b4f1af4f97fffb73177ec5f3d..9fbe259f76e46ca686e6a1c571c63856637c964c 100644 (file)
--- a/src/xml.c
+++ b/src/xml.c
@@ -1322,7 +1322,7 @@ int write_xml_data(const struct wim_info *wim_info, int image, FILE *out,
        encoding_handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_UTF16LE);
        if (!encoding_handler) {
                ERROR("Failed to get XML character encoding handler for UTF-16LE");
-               ret = WIMLIB_ERR_CHAR_CONVERSION;
+               ret = WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE;
                goto out_cleanup_char_encoding_handlers;
        }