From ded522fd0a15d740354329c5066ebd3473563e57 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 18 Dec 2012 20:09:59 -0600 Subject: [PATCH] utf8_to_utf16(), utf16_to_utf8() changes These two functions have been moved out of 'util.c' to a separate file 'encoding.c'. They also now return an integer error code because they can fail for at least two reasons (no memory, and invalid UTF-8 or UTF-16LE character). --- Makefile.am | 1 + src/add_image.c | 8 +- src/dentry.c | 49 ++++--- src/dentry.h | 3 + src/encoding.c | 332 +++++++++++++++++++++++++++++++++++++++++++++ src/mount_image.c | 4 +- src/ntfs-apply.c | 12 +- src/ntfs-capture.c | 44 +++--- src/symlink.c | 43 ++++-- src/util.c | 144 ++------------------ src/util.h | 10 +- src/wimlib.h | 5 +- src/xml.c | 2 +- 13 files changed, 450 insertions(+), 207 deletions(-) create mode 100644 src/encoding.c diff --git a/Makefile.am b/Makefile.am index 96a9d390..35b2ad1a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,6 +19,7 @@ libwim_la_SOURCES = \ src/delete_image.c \ src/dentry.c \ src/dentry.h \ + src/encoding.c \ src/endianness.h \ src/export_image.c \ src/extract_image.c \ diff --git a/src/add_image.c b/src/add_image.c index d09dd197..d658dc92 100644 --- a/src/add_image.c +++ b/src/add_image.c @@ -214,8 +214,12 @@ static int build_dentry_tree(struct dentry **root_ret, filename = path_basename(root_disk_path); root = new_dentry_with_timeless_inode(filename); - if (!root) - return WIMLIB_ERR_NOMEM; + if (!root) { + if (errno == EILSEQ) + return WIMLIB_ERR_INVALID_UTF8_STRING; + else + return WIMLIB_ERR_NOMEM; + } inode = root->d_inode; diff --git a/src/dentry.c b/src/dentry.c index fb57c937..e7d34f5d 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -83,13 +83,12 @@ int get_names(char **name_utf16_ret, char **name_utf8_ret, size_t utf8_len; size_t utf16_len; char *name_utf16, *name_utf8; + int ret; utf8_len = strlen(name); - - name_utf16 = utf8_to_utf16(name, utf8_len, &utf16_len); - - if (!name_utf16) - return WIMLIB_ERR_NOMEM; + ret = utf8_to_utf16(name, utf8_len, &name_utf16, &utf16_len); + if (ret != 0) + return ret; name_utf8 = MALLOC(utf8_len + 1); if (!name_utf8) { @@ -115,11 +114,14 @@ static int change_dentry_name(struct dentry *dentry, const char *new_name) ret = get_names(&dentry->file_name, &dentry->file_name_utf8, &dentry->file_name_len, &dentry->file_name_utf8_len, - new_name); - FREE(dentry->short_name); - dentry->short_name_len = 0; - if (ret == 0) + new_name); + if (ret == 0) { + if (dentry->short_name_len) { + FREE(dentry->short_name); + dentry->short_name_len = 0; + } dentry->length = dentry_correct_length(dentry); + } return ret; } @@ -809,6 +811,9 @@ static struct inode *new_inode() * * Returns a pointer to the new dentry, or NULL if out of memory. */ +#ifndef WITH_FUSE +static +#endif struct dentry *new_dentry(const char *name) { struct dentry *dentry; @@ -826,7 +831,7 @@ struct dentry *new_dentry(const char *name) return dentry; err: FREE(dentry); - ERROR("Failed to allocate new dentry"); + ERROR_WITH_ERRNO("Failed to create new dentry with name \"%s\"", name); return NULL; } @@ -1251,15 +1256,14 @@ static int read_ads_entries(const u8 *p, struct inode *inode, } get_bytes(p, cur_entry->stream_name_len, (u8*)cur_entry->stream_name); - cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name, - cur_entry->stream_name_len, - &utf8_len); - cur_entry->stream_name_utf8_len = utf8_len; - if (!cur_entry->stream_name_utf8) { - ret = WIMLIB_ERR_NOMEM; + ret = utf16_to_utf8(cur_entry->stream_name, + cur_entry->stream_name_len, + &cur_entry->stream_name_utf8, + &utf8_len); + if (ret != 0) goto out_free_ads_entries; - } + cur_entry->stream_name_utf8_len = utf8_len; } /* It's expected that the size of every ADS entry is a multiple * of 8. However, to be safe, I'm allowing the possibility of @@ -1434,15 +1438,10 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, p = get_bytes(p, file_name_len, file_name); /* Convert filename to UTF-8. */ - file_name_utf8 = utf16_to_utf8(file_name, file_name_len, - &file_name_utf8_len); - - if (!file_name_utf8) { - ERROR("Failed to allocate memory to convert UTF-16 " - "filename (%hu bytes) to UTF-8", file_name_len); - ret = WIMLIB_ERR_NOMEM; + ret = utf16_to_utf8(file_name, file_name_len, &file_name_utf8, + &file_name_utf8_len); + if (ret != 0) goto out_free_file_name; - } if (*(u16*)p) WARNING("Expected two zero bytes following the file name " "`%s', but found non-zero bytes", file_name_utf8); diff --git a/src/dentry.h b/src/dentry.h index afdb3619..0d3f89e3 100644 --- a/src/dentry.h +++ b/src/dentry.h @@ -358,7 +358,10 @@ extern struct dentry *get_parent_dentry(struct WIMStruct *w, const char *path); extern int print_dentry(struct dentry *dentry, void *lookup_table); extern int print_dentry_full_path(struct dentry *entry, void *ignore); +#ifdef WITH_FUSE extern struct dentry *new_dentry(const char *name); +#endif + extern struct dentry *new_dentry_with_inode(const char *name); extern struct dentry *new_dentry_with_timeless_inode(const char *name); diff --git a/src/encoding.c b/src/encoding.c new file mode 100644 index 00000000..edb24c70 --- /dev/null +++ b/src/encoding.c @@ -0,0 +1,332 @@ +/* + * encoding.c: Convert UTF-8 to UTF-16LE strings and vice versa + */ + +/* + * Copyright (C) 2012 Eric Biggers + * + * This file is part of wimlib, a library for working with WIM files. + * + * wimlib is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) + * any later version. + * + * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR + * A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with wimlib; if not, see http://www.gnu.org/licenses/. + */ + +#include "wimlib.h" +#include "util.h" +#include "endianness.h" + +#include + +#ifdef WITH_NTFS_3G +#include +#include +#else +#include +#endif + +/* + * NOTE: + * + * utf16_to_utf8_size() and utf8_to_utf16_size() were taken from + * libntfs-3g/unistr.c in the NTFS-3g sources. (Modified slightly to remove + * unneeded functionality.) + */ +#ifndef WITH_NTFS_3G +/* + * Return the amount of 8-bit elements in UTF-8 needed (without the terminating + * null) to store a given UTF-16LE string. + * + * Return -1 with errno set if string has invalid byte sequence or too long. + */ +static int utf16_to_utf8_size(const u16 *ins, const int ins_len) +{ + int i, ret = -1; + int count = 0; + bool surrog; + + surrog = false; + for (i = 0; i < ins_len && ins[i]; i++) { + unsigned short c = le16_to_cpu(ins[i]); + if (surrog) { + if ((c >= 0xdc00) && (c < 0xe000)) { + surrog = false; + count += 4; + } else + goto fail; + } else + if (c < 0x80) + count++; + else if (c < 0x800) + count += 2; + else if (c < 0xd800) + count += 3; + else if (c < 0xdc00) + surrog = true; +#if NOREVBOM + else if ((c >= 0xe000) && (c < 0xfffe)) +#else + else if (c >= 0xe000) +#endif + count += 3; + else + goto fail; + } + if (surrog) + goto fail; + + ret = count; +out: + return ret; +fail: + errno = EILSEQ; + goto out; +} + +/* + * Return the amount of 16-bit elements in UTF-16LE needed + * (without the terminating null) to store given UTF-8 string. + * + * Return -1 with errno set if it's longer than PATH_MAX or string is invalid. + * + * Note: This does not check whether the input sequence is a valid utf8 string, + * and should be used only in context where such check is made! + */ +static int utf8_to_utf16_size(const char *s) +{ + unsigned int byte; + size_t count = 0; + while ((byte = *((const unsigned char *)s++))) { + count++; + if (byte >= 0xc0) { + if (byte >= 0xF5) { + errno = EILSEQ; + return -1; + } + if (!*s) + break; + if (byte >= 0xC0) + s++; + if (!*s) + break; + if (byte >= 0xE0) + s++; + if (!*s) + break; + if (byte >= 0xF0) { + s++; + count++; + } + } + } + return count; +} +#endif /* !WITH_NTFS_3G */ + +/* Converts a string in the UTF-16LE encoding to a newly allocated string in the + * UTF-8 encoding. + * + * If available, do so by calling a similar function from libntfs-3g. + * Otherwise, use iconv() along with the helper function utf16_to_utf8_size(). + */ +int utf16_to_utf8(const char *utf16_str, size_t utf16_nbytes, + char **utf8_str_ret, size_t *utf8_nbytes_ret) +{ + int ret; + + if (utf16_nbytes == 0) { + *utf8_str_ret = NULL; + *utf8_nbytes_ret = 0; + return 0; + } + + if (utf16_nbytes & 1) { + ERROR("UTF-16LE string is invalid (odd number of bytes)!"); + return WIMLIB_ERR_INVALID_UTF16_STRING; + } +#ifdef WITH_NTFS_3G + char *outs = NULL; + int outs_len = ntfs_ucstombs((const ntfschar*)utf16_str, + utf16_nbytes / 2, &outs, 0); + if (outs_len >= 0) { + *utf8_str_ret = outs; + *utf8_nbytes_ret = outs_len; + ret = 0; + } else { + if (errno == ENOMEM) + ret = WIMLIB_ERR_NOMEM; + else + ret = WIMLIB_ERR_INVALID_UTF16_STRING; + } +#else /* WITH_NTFS_3G */ + static iconv_t cd_utf16_to_utf8 = (iconv_t)(-1); + if (cd_utf16_to_utf8 == (iconv_t)(-1)) { + cd_utf16_to_utf8 = iconv_open("UTF-8", "UTF-16LE"); + if (cd_utf16_to_utf8 == (iconv_t)-1) { + ERROR_WITH_ERRNO("Failed to get conversion descriptor " + "for converting UTF-16LE to UTF-8"); + if (errno == ENOMEM) + return WIMLIB_ERR_NOMEM; + else + return WIMLIB_ERR_ICONV_NOT_AVAILABLE; + } + } + ret = utf16_to_utf8_size((const u16*)utf16_str, utf16_nbytes / 2); + if (ret >= 0) { + size_t utf8_expected_nbytes; + char *utf8_str; + size_t utf8_bytes_left; + size_t utf16_bytes_left; + size_t num_chars_converted; + char *utf8_str_save; + const char *utf16_str_save; + + utf8_expected_nbytes = ret; + utf8_str = MALLOC(utf8_expected_nbytes + 1); + if (utf8_str) { + utf8_bytes_left = utf8_expected_nbytes; + utf16_bytes_left = utf16_nbytes; + utf8_str_save = utf8_str; + utf16_str_save = utf16_str; + num_chars_converted = iconv(cd_utf16_to_utf8, + (char**)&utf16_str, + &utf16_bytes_left, + &utf8_str, + &utf8_bytes_left); + utf8_str = utf8_str_save; + utf16_str = utf16_str_save; + if (utf16_bytes_left == 0 && + utf8_bytes_left == 0 && + num_chars_converted != (size_t)(-1)) + { + utf8_str[utf8_expected_nbytes] = '\0'; + *utf8_str_ret = utf8_str; + *utf8_nbytes_ret = utf8_expected_nbytes; + ret = 0; + } else { + FREE(utf8_str); + ret = WIMLIB_ERR_INVALID_UTF16_STRING; + } + } else + ret = WIMLIB_ERR_NOMEM; + } else + ret = WIMLIB_ERR_INVALID_UTF16_STRING; +#endif /* WITH_NTFS_3G */ + +#ifdef ENABLE_ERROR_MESSAGES + if (ret != 0) { + ERROR_WITH_ERRNO("Error converting UTF-16LE string to UTF-8"); + ERROR("The failing string was:"); + print_string(utf16_str, utf16_nbytes); + putchar('\n'); + } +#endif /* ENABLE_ERROR_MESSAGES */ + return ret; +} + + +/* Converts a string in the UTF-8 encoding to a newly allocated string in the + * UTF-16 encoding. + * + * If available, do so by calling a similar function from libntfs-3g. + * Otherwise, use iconv() along with the helper function utf8_to_utf16_size(). + */ +int utf8_to_utf16(const char *utf8_str, size_t utf8_nbytes, + char **utf16_str_ret, size_t *utf16_nbytes_ret) +{ + int ret; + if (utf8_nbytes == 0) { + *utf16_str_ret = NULL; + *utf16_nbytes_ret = 0; + return 0; + } +#ifdef WITH_NTFS_3G + char *outs = NULL; + int outs_nchars = ntfs_mbstoucs(utf8_str, (ntfschar**)&outs); + if (outs_nchars >= 0) { + *utf16_str_ret = outs; + *utf16_nbytes_ret = (size_t)outs_nchars * 2; + ret = 0; + } else { + if (errno == ENOMEM) + ret = WIMLIB_ERR_NOMEM; + else + ret = WIMLIB_ERR_INVALID_UTF8_STRING; + } +#else /* WITH_NTFS_3G */ + static iconv_t cd_utf8_to_utf16 = (iconv_t)(-1); + if (cd_utf8_to_utf16 == (iconv_t)(-1)) { + cd_utf8_to_utf16 = iconv_open("UTF-16LE", "UTF-8"); + if (cd_utf8_to_utf16 == (iconv_t)-1) { + ERROR_WITH_ERRNO("Failed to get conversion descriptor " + "for converting UTF-8 to UTF-16LE"); + if (errno == ENOMEM) + return WIMLIB_ERR_NOMEM; + else + return WIMLIB_ERR_ICONV_NOT_AVAILABLE; + } + } + + ret = utf8_to_utf16_size(utf8_str); + if (ret >= 0) { + size_t utf16_expected_nbytes; + char *utf16_str; + size_t utf16_bytes_left; + size_t utf8_bytes_left; + size_t num_chars_converted; + const char *utf8_str_save; + char *utf16_str_save; + + utf16_expected_nbytes = (size_t)ret * 2; + utf16_str = MALLOC(utf16_expected_nbytes + 2); + if (utf16_str) { + utf16_bytes_left = utf16_expected_nbytes; + utf8_bytes_left = utf8_nbytes; + utf8_str_save = utf8_str; + utf16_str_save = utf16_str; + num_chars_converted = iconv(cd_utf8_to_utf16, + (char**)&utf8_str, + &utf8_bytes_left, + &utf16_str, + &utf16_bytes_left); + utf8_str = utf8_str_save; + utf16_str = utf16_str_save; + if (utf16_bytes_left == 0 && + utf8_bytes_left == 0 && + num_chars_converted != (size_t)(-1)) + { + utf16_str[utf16_expected_nbytes] = '\0'; + utf16_str[utf16_expected_nbytes + 1] = '\0'; + *utf16_str_ret = utf16_str; + *utf16_nbytes_ret = utf16_expected_nbytes; + ret = 0; + } else { + FREE(utf16_str); + ret = WIMLIB_ERR_INVALID_UTF8_STRING; + } + } else + ret = WIMLIB_ERR_NOMEM; + } else + ret = WIMLIB_ERR_INVALID_UTF8_STRING; +#endif /* WITH_NTFS_3G */ + +#ifdef ENABLE_ERROR_MESSAGES + if (ret != 0) { + ERROR_WITH_ERRNO("Error converting UTF-8 string to UTF-16LE"); + ERROR("The failing string was:"); + print_string(utf8_str, utf8_nbytes); + putchar('\n'); + ERROR("Length: %zu bytes", utf8_nbytes); + } +#endif /* ENABLE_ERROR_MESSAGES */ + return ret; +} diff --git a/src/mount_image.c b/src/mount_image.c index 7f0a2aa0..69fad244 100644 --- a/src/mount_image.c +++ b/src/mount_image.c @@ -291,7 +291,7 @@ static int create_dentry(struct wimfs_context *ctx, const char *path, new = new_dentry_with_inode(basename); if (!new) - return -ENOMEM; + return -errno; new->d_inode->resolved = 1; new->d_inode->ino = ctx->next_ino++; @@ -1609,7 +1609,7 @@ static int wimfs_link(const char *to, const char *from) return -EEXIST; from_dentry = new_dentry(link_name); if (!from_dentry) - return -ENOMEM; + return -errno; inode_add_dentry(from_dentry, inode); from_dentry->d_inode = inode; diff --git a/src/ntfs-apply.c b/src/ntfs-apply.c index f1ff0dd1..0bfc060e 100644 --- a/src/ntfs-apply.c +++ b/src/ntfs-apply.c @@ -461,14 +461,12 @@ out_set_dos_name: char *short_name_utf8; size_t short_name_utf8_len; - short_name_utf8 = utf16_to_utf8(dentry->short_name, - dentry->short_name_len, - &short_name_utf8_len); - if (!short_name_utf8) { - ERROR("Out of memory"); - ret = WIMLIB_ERR_NOMEM; + ret = utf16_to_utf8(dentry->short_name, + dentry->short_name_len, + &short_name_utf8, + &short_name_utf8_len); + if (ret != 0) goto out_close_dir_ni; - } if (is_hardlink) { char *p; diff --git a/src/ntfs-capture.c b/src/ntfs-capture.c index ae550213..0abbebac 100644 --- a/src/ntfs-capture.c +++ b/src/ntfs-capture.c @@ -362,10 +362,12 @@ static int capture_ntfs_streams(struct dentry *dentry, ntfs_inode *ni, * alternate data stream entries */ struct ads_entry *new_ads_entry; size_t stream_name_utf8_len; - stream_name_utf8 = utf16_to_utf8((const char*)attr_record_name(actx->attr), - name_length * 2, - &stream_name_utf8_len); - if (!stream_name_utf8) + + ret = utf16_to_utf8((const char*)attr_record_name(actx->attr), + name_length * 2, + &stream_name_utf8, + &stream_name_utf8_len); + if (ret != 0) goto out_free_lte; new_ads_entry = inode_add_ads(dentry->d_inode, stream_name_utf8); FREE(stream_name_utf8); @@ -435,12 +437,10 @@ static int wim_ntfs_capture_filldir(void *dirent, const ntfschar *name, if (name_type == FILE_NAME_DOS) return 0; - ret = -1; - - utf8_name = utf16_to_utf8((const char*)name, name_len * 2, - &utf8_name_len); - if (!utf8_name) - goto out; + ret = utf16_to_utf8((const char*)name, name_len * 2, + &utf8_name, &utf8_name_len); + if (ret != 0) + return -1; if (utf8_name[0] == '.' && (utf8_name[1] == '\0' || @@ -484,15 +484,15 @@ static int change_dentry_short_name(struct dentry *dentry, { size_t short_name_utf16_len; char *short_name_utf16; - short_name_utf16 = utf8_to_utf16(short_name_utf8, short_name_utf8_len, - &short_name_utf16_len); - if (!short_name_utf16) { - ERROR_WITH_ERRNO("Failed to convert short name to UTF-16"); - return WIMLIB_ERR_NOMEM; + int ret; + + ret = utf8_to_utf16(short_name_utf8, short_name_utf8_len, + &short_name_utf16, &short_name_utf16_len); + if (ret == 0) { + dentry->short_name = short_name_utf16; + dentry->short_name_len = short_name_utf16_len; } - dentry->short_name = short_name_utf16; - dentry->short_name_len = short_name_utf16_len; - return 0; + return ret; } /* Recursively build a WIM dentry tree corresponding to a NTFS volume. @@ -553,8 +553,12 @@ static int build_dentry_tree_ntfs_recursive(struct dentry **root_p, } root = new_dentry_with_timeless_inode(path_basename(path)); - if (!root) - return WIMLIB_ERR_NOMEM; + if (!root) { + if (errno == EILSEQ) + return WIMLIB_ERR_INVALID_UTF8_STRING; + else + return WIMLIB_ERR_NOMEM; + } *root_p = root; if (dir_ni && (name_type == FILE_NAME_WIN32_AND_DOS diff --git a/src/symlink.c b/src/symlink.c index b90ebe5e..bb829651 100644 --- a/src/symlink.c +++ b/src/symlink.c @@ -80,9 +80,16 @@ static ssize_t get_symlink_name(const u8 *resource, size_t resource_len, } if (header_size + substitute_name_offset + substitute_name_len > resource_len) return -EIO; - link_target = utf16_to_utf8((const char *)p + substitute_name_offset, - substitute_name_len, - &link_target_len); + + ret = utf16_to_utf8((const char *)p + substitute_name_offset, + substitute_name_len, + &link_target, &link_target_len); + if (ret == WIMLIB_ERR_INVALID_UTF16_STRING) + return -EILSEQ; + else if (ret == WIMLIB_ERR_NOMEM) + return -ENOMEM; + + wimlib_assert(ret == 0); if (!link_target) return -EIO; @@ -124,22 +131,28 @@ out: return ret; } -static void *make_symlink_reparse_data_buf(const char *symlink_target, - size_t *len_ret) +static int make_symlink_reparse_data_buf(const char *symlink_target, + size_t *len_ret, void **buf_ret) { size_t utf8_len = strlen(symlink_target); + char *name_utf16; size_t utf16_len; - char *name_utf16 = utf8_to_utf16(symlink_target, utf8_len, &utf16_len); - if (!name_utf16) - return NULL; + int ret; + + ret = utf8_to_utf16(symlink_target, utf8_len, + &name_utf16, &utf16_len); + if (ret != 0) + return ret; for (size_t i = 0; i < utf16_len / 2; i++) if (((u16*)name_utf16)[i] == cpu_to_le16('/')) ((u16*)name_utf16)[i] = cpu_to_le16('\\'); size_t len = 12 + utf16_len * 2; void *buf = MALLOC(len); - if (!buf) - goto out; + if (!buf) { + FREE(name_utf16); + return WIMLIB_ERR_NOMEM; + } u8 *p = buf; p = put_u16(p, utf16_len); /* Substitute name offset */ @@ -150,9 +163,10 @@ static void *make_symlink_reparse_data_buf(const char *symlink_target, p = put_bytes(p, utf16_len, (const u8*)name_utf16); p = put_bytes(p, utf16_len, (const u8*)name_utf16); *len_ret = len; + *buf_ret = buf; out: FREE(name_utf16); - return buf; + return 0; } /* Get the symlink target from a dentry. @@ -204,9 +218,10 @@ int inode_set_symlink(struct inode *inode, const char *target, u8 symlink_buf_hash[SHA1_HASH_SIZE]; void *symlink_buf; - symlink_buf = make_symlink_reparse_data_buf(target, &symlink_buf_len); - if (!symlink_buf) - return WIMLIB_ERR_NOMEM; + ret = make_symlink_reparse_data_buf(target, &symlink_buf_len, + &symlink_buf); + if (ret != 0) + return ret; DEBUG("Made symlink reparse data buf (len = %zu, name len = %zu)", symlink_buf_len, symlink_buf_len); diff --git a/src/util.c b/src/util.c index bcaefacd..c4f6e5ec 100644 --- a/src/util.c +++ b/src/util.c @@ -23,23 +23,13 @@ #include "wimlib_internal.h" #include "endianness.h" -#include "sha1.h" #include "timestamp.h" -#include - -#include -#include #include -#include -#include -#include #include - -#ifdef WITH_NTFS_3G -#include -#include -#endif +#include +#include +#include /* for getpid() */ /* True if wimlib is to print an informational message when an error occurs. * This can be turned off by calling wimlib_set_print_errors(false). */ @@ -117,8 +107,6 @@ static const char *error_strings[] = { = "Success", [WIMLIB_ERR_ALREADY_LOCKED] = "The WIM is already locked for writing", - [WIMLIB_ERR_CHAR_CONVERSION] - = "Failed to perform a conversion between UTF-8 and UTF-16LE", [WIMLIB_ERR_COMPRESSED_LOOKUP_TABLE] = "Lookup table is compressed", [WIMLIB_ERR_DECOMPRESSION] @@ -165,11 +153,21 @@ static const char *error_strings[] = { = "The part number or total parts of the WIM is invalid", [WIMLIB_ERR_INVALID_RESOURCE_HASH] = "The SHA1 message digest of a WIM resource did not match the expected value", + [WIMLIB_ERR_ICONV_NOT_AVAILABLE] + = "The iconv() function does not seem to work. " + "Maybe check to make sure the directory /usr/lib/gconv exists", [WIMLIB_ERR_INVALID_RESOURCE_SIZE] = "A resource entry in the WIM has an invalid size", [WIMLIB_ERR_INVALID_UNMOUNT_MESSAGE] = "The version of wimlib that has mounted a WIM image is incompatible with the " "version being used to unmount it", + [WIMLIB_ERR_INVALID_UTF8_STRING] + = "A string provided as input by the user was not a valid UTF-8 string", + [WIMLIB_ERR_INVALID_UTF16_STRING] + = "A string in a WIM dentry is not a valid UTF-16LE string", + [WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE] + = "libxml2 was unable to find a character encoding conversion handler " + "for UTF-16LE", [WIMLIB_ERR_LINK] = "Failed to create a hard or symbolic link when extracting " "a file from the WIM", @@ -281,122 +279,6 @@ WIMLIBAPI int wimlib_set_memory_allocator(void *(*malloc_func)(size_t), #endif } - - -static iconv_t cd_utf16_to_utf8 = (iconv_t)(-1); - -/* Converts a string in the UTF-16 encoding to a newly allocated string in the - * UTF-8 encoding. */ -char *utf16_to_utf8(const char *utf16_str, size_t utf16_len, - size_t *utf8_len_ret) -{ -#ifdef WITH_NTFS_3G - if (utf16_len & 1) { - errno = -EILSEQ; - return NULL; - } - char *outs = NULL; - int outs_len = ntfs_ucstombs((const ntfschar*)utf16_str, - utf16_len >> 1, &outs, 0); - if (outs_len >= 0) { - *utf8_len_ret = outs_len; - } else { - ERROR_WITH_ERRNO("Error converting UTF-16LE string to UTF-8"); - outs = NULL; - } - return outs; -#else - if (cd_utf16_to_utf8 == (iconv_t)(-1)) { - cd_utf16_to_utf8 = iconv_open("UTF-8", "UTF-16LE"); - if (cd_utf16_to_utf8 == (iconv_t)-1) { - ERROR_WITH_ERRNO("Failed to get conversion descriptor " - "for converting UTF-16LE to UTF-8"); - return NULL; - } - } - size_t utf16_bytes_left = utf16_len; - size_t utf8_bytes_left = utf16_len; - - char *utf8_str = MALLOC(utf8_bytes_left); - if (!utf8_str) - return NULL; - - char *orig_utf8_str = utf8_str; - - size_t num_chars_converted = iconv(cd_utf16_to_utf8, (char**)&utf16_str, - &utf16_bytes_left, &utf8_str, &utf8_bytes_left); - - if (num_chars_converted == (size_t)(-1)) { - ERROR_WITH_ERRNO("Failed to convert UTF-16LE string to UTF-8 " - "string"); - FREE(orig_utf8_str); - return NULL; - } - - size_t utf8_len = utf16_len - utf8_bytes_left; - - *utf8_len_ret = utf8_len; - orig_utf8_str[utf8_len] = '\0'; - return orig_utf8_str; -#endif -} - -static iconv_t cd_utf8_to_utf16 = (iconv_t)(-1); - -/* Converts a string in the UTF-8 encoding to a newly allocated string in the - * UTF-16 encoding. */ -char *utf8_to_utf16(const char *utf8_str, size_t utf8_len, - size_t *utf16_len_ret) -{ -#ifdef WITH_NTFS_3G - char *outs = NULL; - int outs_nchars = ntfs_mbstoucs(utf8_str, (ntfschar**)&outs); - if (outs_nchars >= 0) { - *utf16_len_ret = (size_t)outs_nchars * 2; - } else { - ERROR_WITH_ERRNO("Error converting UTF-8 string to UTF-16LE"); - outs = NULL; - } - return outs; -#else - if (cd_utf8_to_utf16 == (iconv_t)(-1)) { - cd_utf8_to_utf16 = iconv_open("UTF-16LE", "UTF-8"); - if (cd_utf8_to_utf16 == (iconv_t)-1) { - ERROR_WITH_ERRNO("Failed to get conversion descriptor " - "for converting UTF-8 to UTF-16LE"); - return NULL; - } - } - - size_t utf8_bytes_left = utf8_len; - size_t utf16_capacity = utf8_len * 4; - size_t utf16_bytes_left = utf16_capacity; - - char *utf16_str = MALLOC(utf16_capacity + 2); - if (!utf16_str) - return NULL; - - char *orig_utf16_str = utf16_str; - - size_t num_chars_converted = iconv(cd_utf8_to_utf16, (char**)&utf8_str, - &utf8_bytes_left, &utf16_str, &utf16_bytes_left); - - if (num_chars_converted == (size_t)(-1)) { - ERROR_WITH_ERRNO("Failed to convert UTF-8 string to UTF-16LE " - "string"); - FREE(orig_utf16_str); - return NULL; - } - - size_t utf16_len = utf16_capacity - utf16_bytes_left; - - *utf16_len_ret = utf16_len; - orig_utf16_str[utf16_len] = '\0'; - orig_utf16_str[utf16_len + 1] = '\0'; - return orig_utf16_str; -#endif -} - static bool seeded = false; static void seed_random() diff --git a/src/util.h b/src/util.h index 8a6dc4da..08b15636 100644 --- a/src/util.h +++ b/src/util.h @@ -155,12 +155,14 @@ extern char *wimlib_strdup(const char *str); #endif /* ENABLE_CUSTOM_MEMORY_ALLOCATOR */ -extern char *utf16_to_utf8(const char *utf16_str, size_t utf16_len, - size_t *utf8_len_ret); +/* encoding.c */ +extern int utf16_to_utf8(const char *utf16_str, size_t utf16_nbytes, + char **utf8_str_ret, size_t *utf8_nbytes_ret); -extern char *utf8_to_utf16(const char *utf8_str, size_t utf8_len, - size_t *utf16_len_ret); +extern int utf8_to_utf16(const char *utf8_str, size_t utf8_nbytes, + char **utf16_str_ret, size_t *utf16_nbytes_ret); +/* util.c */ extern void randomize_byte_array(u8 *p, size_t n); extern void randomize_char_array_with_alnum(char p[], size_t n); diff --git a/src/wimlib.h b/src/wimlib.h index 57f9aa97..b2dd3034 100644 --- a/src/wimlib.h +++ b/src/wimlib.h @@ -681,7 +681,6 @@ typedef int (*wimlib_progress_func_t)(enum wimlib_progress_msg msg_type, enum wimlib_error_code { WIMLIB_ERR_SUCCESS = 0, WIMLIB_ERR_ALREADY_LOCKED, - WIMLIB_ERR_CHAR_CONVERSION, WIMLIB_ERR_COMPRESSED_LOOKUP_TABLE, WIMLIB_ERR_DECOMPRESSION, WIMLIB_ERR_DELETE_STAGING_DIR, @@ -689,6 +688,7 @@ enum wimlib_error_code { WIMLIB_ERR_FORK, WIMLIB_ERR_FUSE, WIMLIB_ERR_FUSERMOUNT, + WIMLIB_ERR_ICONV_NOT_AVAILABLE, WIMLIB_ERR_IMAGE_COUNT, WIMLIB_ERR_IMAGE_NAME_COLLISION, WIMLIB_ERR_INTEGRITY, @@ -706,6 +706,9 @@ enum wimlib_error_code { WIMLIB_ERR_INVALID_RESOURCE_SIZE, WIMLIB_ERR_INVALID_SECURITY_DATA, WIMLIB_ERR_INVALID_UNMOUNT_MESSAGE, + WIMLIB_ERR_INVALID_UTF8_STRING, + WIMLIB_ERR_INVALID_UTF16_STRING, + WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE, WIMLIB_ERR_LINK, WIMLIB_ERR_MKDIR, WIMLIB_ERR_MQUEUE, diff --git a/src/xml.c b/src/xml.c index 65e34a21..9fbe259f 100644 --- a/src/xml.c +++ b/src/xml.c @@ -1322,7 +1322,7 @@ int write_xml_data(const struct wim_info *wim_info, int image, FILE *out, encoding_handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_UTF16LE); if (!encoding_handler) { ERROR("Failed to get XML character encoding handler for UTF-16LE"); - ret = WIMLIB_ERR_CHAR_CONVERSION; + ret = WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE; goto out_cleanup_char_encoding_handlers; } -- 2.43.0