From ded522fd0a15d740354329c5066ebd3473563e57 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Tue, 18 Dec 2012 20:09:59 -0600
Subject: [PATCH] utf8_to_utf16(), utf16_to_utf8() changes

These two functions have been moved out of 'util.c' to a separate file
'encoding.c'.  They also now return an integer error code because they can fail
for at least two reasons (no memory, and invalid UTF-8 or UTF-16LE character).
---
 Makefile.am        |   1 +
 src/add_image.c    |   8 +-
 src/dentry.c       |  49 ++++---
 src/dentry.h       |   3 +
 src/encoding.c     | 332 +++++++++++++++++++++++++++++++++++++++++++++
 src/mount_image.c  |   4 +-
 src/ntfs-apply.c   |  12 +-
 src/ntfs-capture.c |  44 +++---
 src/symlink.c      |  43 ++++--
 src/util.c         | 144 ++------------------
 src/util.h         |  10 +-
 src/wimlib.h       |   5 +-
 src/xml.c          |   2 +-
 13 files changed, 450 insertions(+), 207 deletions(-)
 create mode 100644 src/encoding.c

diff --git a/Makefile.am b/Makefile.am
index 96a9d390..35b2ad1a 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -19,6 +19,7 @@ libwim_la_SOURCES =		\
 	src/delete_image.c	\
 	src/dentry.c		\
 	src/dentry.h		\
+	src/encoding.c		\
 	src/endianness.h	\
 	src/export_image.c	\
 	src/extract_image.c	\
diff --git a/src/add_image.c b/src/add_image.c
index d09dd197..d658dc92 100644
--- a/src/add_image.c
+++ b/src/add_image.c
@@ -214,8 +214,12 @@ static int build_dentry_tree(struct dentry **root_ret,
 		filename = path_basename(root_disk_path);
 
 	root = new_dentry_with_timeless_inode(filename);
-	if (!root)
-		return WIMLIB_ERR_NOMEM;
+	if (!root) {
+		if (errno == EILSEQ)
+			return WIMLIB_ERR_INVALID_UTF8_STRING;
+		else
+			return WIMLIB_ERR_NOMEM;
+	}
 
 	inode = root->d_inode;
 
diff --git a/src/dentry.c b/src/dentry.c
index fb57c937..e7d34f5d 100644
--- a/src/dentry.c
+++ b/src/dentry.c
@@ -83,13 +83,12 @@ int get_names(char **name_utf16_ret, char **name_utf8_ret,
 	size_t utf8_len;
 	size_t utf16_len;
 	char *name_utf16, *name_utf8;
+	int ret;
 
 	utf8_len = strlen(name);
-
-	name_utf16 = utf8_to_utf16(name, utf8_len, &utf16_len);
-
-	if (!name_utf16)
-		return WIMLIB_ERR_NOMEM;
+	ret = utf8_to_utf16(name, utf8_len, &name_utf16, &utf16_len);
+	if (ret != 0)
+		return ret;
 
 	name_utf8 = MALLOC(utf8_len + 1);
 	if (!name_utf8) {
@@ -115,11 +114,14 @@ static int change_dentry_name(struct dentry *dentry, const char *new_name)
 
 	ret = get_names(&dentry->file_name, &dentry->file_name_utf8,
 			&dentry->file_name_len, &dentry->file_name_utf8_len,
-			 new_name);
-	FREE(dentry->short_name);
-	dentry->short_name_len = 0;
-	if (ret == 0)
+			new_name);
+	if (ret == 0) {
+		if (dentry->short_name_len) {
+			FREE(dentry->short_name);
+			dentry->short_name_len = 0;
+		}
 		dentry->length = dentry_correct_length(dentry);
+	}
 	return ret;
 }
 
@@ -809,6 +811,9 @@ static struct inode *new_inode()
  *
  * Returns a pointer to the new dentry, or NULL if out of memory.
  */
+#ifndef WITH_FUSE
+static
+#endif
 struct dentry *new_dentry(const char *name)
 {
 	struct dentry *dentry;
@@ -826,7 +831,7 @@ struct dentry *new_dentry(const char *name)
 	return dentry;
 err:
 	FREE(dentry);
-	ERROR("Failed to allocate new dentry");
+	ERROR_WITH_ERRNO("Failed to create new dentry with name \"%s\"", name);
 	return NULL;
 }
 
@@ -1251,15 +1256,14 @@ static int read_ads_entries(const u8 *p, struct inode *inode,
 			}
 			get_bytes(p, cur_entry->stream_name_len,
 				  (u8*)cur_entry->stream_name);
-			cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name,
-								    cur_entry->stream_name_len,
-								    &utf8_len);
-			cur_entry->stream_name_utf8_len = utf8_len;
 
-			if (!cur_entry->stream_name_utf8) {
-				ret = WIMLIB_ERR_NOMEM;
+			ret = utf16_to_utf8(cur_entry->stream_name,
+					    cur_entry->stream_name_len,
+					    &cur_entry->stream_name_utf8,
+					    &utf8_len);
+			if (ret != 0)
 				goto out_free_ads_entries;
-			}
+			cur_entry->stream_name_utf8_len = utf8_len;
 		}
 		/* It's expected that the size of every ADS entry is a multiple
 		 * of 8.  However, to be safe, I'm allowing the possibility of
@@ -1434,15 +1438,10 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len,
 		p = get_bytes(p, file_name_len, file_name);
 
 		/* Convert filename to UTF-8. */
-		file_name_utf8 = utf16_to_utf8(file_name, file_name_len,
-					       &file_name_utf8_len);
-
-		if (!file_name_utf8) {
-			ERROR("Failed to allocate memory to convert UTF-16 "
-			      "filename (%hu bytes) to UTF-8", file_name_len);
-			ret = WIMLIB_ERR_NOMEM;
+		ret = utf16_to_utf8(file_name, file_name_len, &file_name_utf8,
+				    &file_name_utf8_len);
+		if (ret != 0)
 			goto out_free_file_name;
-		}
 		if (*(u16*)p)
 			WARNING("Expected two zero bytes following the file name "
 				"`%s', but found non-zero bytes", file_name_utf8);
diff --git a/src/dentry.h b/src/dentry.h
index afdb3619..0d3f89e3 100644
--- a/src/dentry.h
+++ b/src/dentry.h
@@ -358,7 +358,10 @@ extern struct dentry *get_parent_dentry(struct WIMStruct *w, const char *path);
 extern int print_dentry(struct dentry *dentry, void *lookup_table);
 extern int print_dentry_full_path(struct dentry *entry, void *ignore);
 
+#ifdef WITH_FUSE
 extern struct dentry *new_dentry(const char *name);
+#endif
+
 extern struct dentry *new_dentry_with_inode(const char *name);
 extern struct dentry *new_dentry_with_timeless_inode(const char *name);
 
diff --git a/src/encoding.c b/src/encoding.c
new file mode 100644
index 00000000..edb24c70
--- /dev/null
+++ b/src/encoding.c
@@ -0,0 +1,332 @@
+/*
+ * encoding.c:  Convert UTF-8 to UTF-16LE strings and vice versa
+ */
+
+/*
+ * Copyright (C) 2012 Eric Biggers
+ *
+ * This file is part of wimlib, a library for working with WIM files.
+ *
+ * wimlib is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option)
+ * any later version.
+ *
+ * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ */
+
+#include "wimlib.h"
+#include "util.h"
+#include "endianness.h"
+
+#include <errno.h>
+
+#ifdef WITH_NTFS_3G
+#include <ntfs-3g/volume.h>
+#include <ntfs-3g/unistr.h>
+#else
+#include <iconv.h>
+#endif
+
+/*
+ * NOTE:
+ *
+ * utf16_to_utf8_size() and utf8_to_utf16_size() were taken from
+ * libntfs-3g/unistr.c in the NTFS-3g sources.  (Modified slightly to remove
+ * unneeded functionality.)
+ */
+#ifndef WITH_NTFS_3G
+/*
+ * Return the amount of 8-bit elements in UTF-8 needed (without the terminating
+ * null) to store a given UTF-16LE string.
+ *
+ * Return -1 with errno set if string has invalid byte sequence or too long.
+ */
+static int utf16_to_utf8_size(const u16 *ins, const int ins_len)
+{
+	int i, ret = -1;
+	int count = 0;
+	bool surrog;
+
+	surrog = false;
+	for (i = 0; i < ins_len && ins[i]; i++) {
+		unsigned short c = le16_to_cpu(ins[i]);
+		if (surrog) {
+			if ((c >= 0xdc00) && (c < 0xe000)) {
+				surrog = false;
+				count += 4;
+			} else
+				goto fail;
+		} else
+			if (c < 0x80)
+				count++;
+			else if (c < 0x800)
+				count += 2;
+			else if (c < 0xd800)
+				count += 3;
+			else if (c < 0xdc00)
+				surrog = true;
+#if NOREVBOM
+			else if ((c >= 0xe000) && (c < 0xfffe))
+#else
+			else if (c >= 0xe000)
+#endif
+				count += 3;
+			else
+				goto fail;
+	}
+	if (surrog)
+		goto fail;
+
+	ret = count;
+out:
+	return ret;
+fail:
+	errno = EILSEQ;
+	goto out;
+}
+
+/*
+ * Return the amount of 16-bit elements in UTF-16LE needed
+ * (without the terminating null) to store given UTF-8 string.
+ *
+ * Return -1 with errno set if it's longer than PATH_MAX or string is invalid.
+ *
+ * Note: This does not check whether the input sequence is a valid utf8 string,
+ *	 and should be used only in context where such check is made!
+ */
+static int utf8_to_utf16_size(const char *s)
+{
+	unsigned int byte;
+	size_t count = 0;
+	while ((byte = *((const unsigned char *)s++))) {
+		count++;
+		if (byte >= 0xc0) {
+			if (byte >= 0xF5) {
+				errno = EILSEQ;
+				return -1;
+			}
+			if (!*s)
+				break;
+			if (byte >= 0xC0)
+				s++;
+			if (!*s)
+				break;
+			if (byte >= 0xE0)
+				s++;
+			if (!*s)
+				break;
+			if (byte >= 0xF0) {
+				s++;
+				count++;
+			}
+		}
+	}
+	return count;
+}
+#endif /* !WITH_NTFS_3G */
+
+/* Converts a string in the UTF-16LE encoding to a newly allocated string in the
+ * UTF-8 encoding.
+ *
+ * If available, do so by calling a similar function from libntfs-3g.
+ * Otherwise, use iconv() along with the helper function utf16_to_utf8_size().
+ */
+int utf16_to_utf8(const char *utf16_str, size_t utf16_nbytes,
+		  char **utf8_str_ret, size_t *utf8_nbytes_ret)
+{
+	int ret;
+
+	if (utf16_nbytes == 0) {
+		*utf8_str_ret = NULL;
+		*utf8_nbytes_ret = 0;
+		return 0;
+	}
+
+	if (utf16_nbytes & 1) {
+		ERROR("UTF-16LE string is invalid (odd number of bytes)!");
+		return WIMLIB_ERR_INVALID_UTF16_STRING;
+	}
+#ifdef WITH_NTFS_3G
+	char *outs = NULL;
+	int outs_len = ntfs_ucstombs((const ntfschar*)utf16_str,
+				     utf16_nbytes / 2, &outs, 0);
+	if (outs_len >= 0) {
+		*utf8_str_ret = outs;
+		*utf8_nbytes_ret = outs_len;
+		ret = 0;
+	} else {
+		if (errno == ENOMEM)
+			ret = WIMLIB_ERR_NOMEM;
+		else
+			ret = WIMLIB_ERR_INVALID_UTF16_STRING;
+	}
+#else /* WITH_NTFS_3G */
+	static iconv_t cd_utf16_to_utf8 = (iconv_t)(-1);
+	if (cd_utf16_to_utf8 == (iconv_t)(-1)) {
+		cd_utf16_to_utf8 = iconv_open("UTF-8", "UTF-16LE");
+		if (cd_utf16_to_utf8 == (iconv_t)-1) {
+			ERROR_WITH_ERRNO("Failed to get conversion descriptor "
+					 "for converting UTF-16LE to UTF-8");
+			if (errno == ENOMEM)
+				return WIMLIB_ERR_NOMEM;
+			else
+				return WIMLIB_ERR_ICONV_NOT_AVAILABLE;
+		}
+	}
+	ret = utf16_to_utf8_size((const u16*)utf16_str, utf16_nbytes / 2);
+	if (ret >= 0) {
+		size_t utf8_expected_nbytes;
+		char  *utf8_str;
+		size_t utf8_bytes_left;
+		size_t utf16_bytes_left;
+		size_t num_chars_converted;
+		char  *utf8_str_save;
+		const char *utf16_str_save;
+
+		utf8_expected_nbytes = ret;
+ 		utf8_str = MALLOC(utf8_expected_nbytes + 1);
+		if (utf8_str) {
+			utf8_bytes_left = utf8_expected_nbytes;
+			utf16_bytes_left = utf16_nbytes;
+			utf8_str_save = utf8_str;
+			utf16_str_save = utf16_str;
+			num_chars_converted = iconv(cd_utf16_to_utf8,
+						    (char**)&utf16_str,
+						    &utf16_bytes_left,
+						    &utf8_str,
+						    &utf8_bytes_left);
+			utf8_str = utf8_str_save;
+			utf16_str = utf16_str_save;
+			if (utf16_bytes_left == 0 &&
+			    utf8_bytes_left == 0 &&
+			    num_chars_converted != (size_t)(-1))
+			{
+				utf8_str[utf8_expected_nbytes] = '\0';
+				*utf8_str_ret = utf8_str;
+				*utf8_nbytes_ret = utf8_expected_nbytes;
+				ret = 0;
+			} else {
+				FREE(utf8_str);
+				ret = WIMLIB_ERR_INVALID_UTF16_STRING;
+			}
+		} else
+			ret = WIMLIB_ERR_NOMEM;
+	} else
+		ret = WIMLIB_ERR_INVALID_UTF16_STRING;
+#endif /* WITH_NTFS_3G */
+
+#ifdef ENABLE_ERROR_MESSAGES
+	if (ret != 0) {
+		ERROR_WITH_ERRNO("Error converting UTF-16LE string to UTF-8");
+		ERROR("The failing string was:");
+		print_string(utf16_str, utf16_nbytes);
+		putchar('\n');
+	}
+#endif /* ENABLE_ERROR_MESSAGES */
+	return ret;
+}
+
+
+/* Converts a string in the UTF-8 encoding to a newly allocated string in the
+ * UTF-16 encoding.
+ *
+ * If available, do so by calling a similar function from libntfs-3g.
+ * Otherwise, use iconv() along with the helper function utf8_to_utf16_size().
+ */
+int utf8_to_utf16(const char *utf8_str, size_t utf8_nbytes,
+		  char **utf16_str_ret, size_t *utf16_nbytes_ret)
+{
+	int ret;
+	if (utf8_nbytes == 0) {
+		*utf16_str_ret = NULL;
+		*utf16_nbytes_ret = 0;
+		return 0;
+	}
+#ifdef WITH_NTFS_3G
+	char *outs = NULL;
+	int outs_nchars = ntfs_mbstoucs(utf8_str, (ntfschar**)&outs);
+	if (outs_nchars >= 0) {
+		*utf16_str_ret = outs;
+		*utf16_nbytes_ret = (size_t)outs_nchars * 2;
+		ret = 0;
+	} else {
+		if (errno == ENOMEM)
+			ret = WIMLIB_ERR_NOMEM;
+		else
+			ret = WIMLIB_ERR_INVALID_UTF8_STRING;
+	}
+#else /* WITH_NTFS_3G */
+	static iconv_t cd_utf8_to_utf16 = (iconv_t)(-1);
+	if (cd_utf8_to_utf16 == (iconv_t)(-1)) {
+		cd_utf8_to_utf16 = iconv_open("UTF-16LE", "UTF-8");
+		if (cd_utf8_to_utf16 == (iconv_t)-1) {
+			ERROR_WITH_ERRNO("Failed to get conversion descriptor "
+					 "for converting UTF-8 to UTF-16LE");
+			if (errno == ENOMEM)
+				return WIMLIB_ERR_NOMEM;
+			else
+				return WIMLIB_ERR_ICONV_NOT_AVAILABLE;
+		}
+	}
+
+	ret = utf8_to_utf16_size(utf8_str);
+	if (ret >= 0) {
+		size_t utf16_expected_nbytes;
+		char  *utf16_str;
+		size_t utf16_bytes_left;
+		size_t utf8_bytes_left;
+		size_t num_chars_converted;
+		const char *utf8_str_save;
+		char  *utf16_str_save;
+
+		utf16_expected_nbytes = (size_t)ret * 2;
+ 		utf16_str = MALLOC(utf16_expected_nbytes + 2);
+		if (utf16_str) {
+			utf16_bytes_left = utf16_expected_nbytes;
+			utf8_bytes_left = utf8_nbytes;
+			utf8_str_save = utf8_str;
+			utf16_str_save = utf16_str;
+			num_chars_converted = iconv(cd_utf8_to_utf16,
+						    (char**)&utf8_str,
+						    &utf8_bytes_left,
+						    &utf16_str,
+						    &utf16_bytes_left);
+			utf8_str = utf8_str_save;
+			utf16_str = utf16_str_save;
+			if (utf16_bytes_left == 0 &&
+			    utf8_bytes_left == 0 &&
+			    num_chars_converted != (size_t)(-1))
+			{
+				utf16_str[utf16_expected_nbytes] = '\0';
+				utf16_str[utf16_expected_nbytes + 1] = '\0';
+				*utf16_str_ret = utf16_str;
+				*utf16_nbytes_ret = utf16_expected_nbytes;
+				ret = 0;
+			} else {
+				FREE(utf16_str);
+				ret = WIMLIB_ERR_INVALID_UTF8_STRING;
+			}
+		} else
+			ret = WIMLIB_ERR_NOMEM;
+	} else
+		ret = WIMLIB_ERR_INVALID_UTF8_STRING;
+#endif /* WITH_NTFS_3G */
+
+#ifdef ENABLE_ERROR_MESSAGES
+	if (ret != 0) {
+		ERROR_WITH_ERRNO("Error converting UTF-8 string to UTF-16LE");
+		ERROR("The failing string was:");
+		print_string(utf8_str, utf8_nbytes);
+		putchar('\n');
+		ERROR("Length: %zu bytes", utf8_nbytes);
+	}
+#endif /* ENABLE_ERROR_MESSAGES */
+	return ret;
+}
diff --git a/src/mount_image.c b/src/mount_image.c
index 7f0a2aa0..69fad244 100644
--- a/src/mount_image.c
+++ b/src/mount_image.c
@@ -291,7 +291,7 @@ static int create_dentry(struct wimfs_context *ctx, const char *path,
 
 	new = new_dentry_with_inode(basename);
 	if (!new)
-		return -ENOMEM;
+		return -errno;
 
 	new->d_inode->resolved = 1;
 	new->d_inode->ino = ctx->next_ino++;
@@ -1609,7 +1609,7 @@ static int wimfs_link(const char *to, const char *from)
 		return -EEXIST;
 	from_dentry = new_dentry(link_name);
 	if (!from_dentry)
-		return -ENOMEM;
+		return -errno;
 
 	inode_add_dentry(from_dentry, inode);
 	from_dentry->d_inode = inode;
diff --git a/src/ntfs-apply.c b/src/ntfs-apply.c
index f1ff0dd1..0bfc060e 100644
--- a/src/ntfs-apply.c
+++ b/src/ntfs-apply.c
@@ -461,14 +461,12 @@ out_set_dos_name:
 
 		char *short_name_utf8;
 		size_t short_name_utf8_len;
-		short_name_utf8 = utf16_to_utf8(dentry->short_name,
-					   	dentry->short_name_len,
-					        &short_name_utf8_len);
-		if (!short_name_utf8) {
-			ERROR("Out of memory");
-			ret = WIMLIB_ERR_NOMEM;
+		ret = utf16_to_utf8(dentry->short_name,
+				    dentry->short_name_len,
+				    &short_name_utf8,
+				    &short_name_utf8_len);
+		if (ret != 0)
 			goto out_close_dir_ni;
-		}
 
 		if (is_hardlink) {
 			char *p;
diff --git a/src/ntfs-capture.c b/src/ntfs-capture.c
index ae550213..0abbebac 100644
--- a/src/ntfs-capture.c
+++ b/src/ntfs-capture.c
@@ -362,10 +362,12 @@ static int capture_ntfs_streams(struct dentry *dentry, ntfs_inode *ni,
 			 * alternate data stream entries */
 			struct ads_entry *new_ads_entry;
 			size_t stream_name_utf8_len;
-			stream_name_utf8 = utf16_to_utf8((const char*)attr_record_name(actx->attr),
-							 name_length * 2,
-							 &stream_name_utf8_len);
-			if (!stream_name_utf8)
+
+			ret = utf16_to_utf8((const char*)attr_record_name(actx->attr),
+					    name_length * 2,
+					    &stream_name_utf8,
+					    &stream_name_utf8_len);
+			if (ret != 0)
 				goto out_free_lte;
 			new_ads_entry = inode_add_ads(dentry->d_inode, stream_name_utf8);
 			FREE(stream_name_utf8);
@@ -435,12 +437,10 @@ static int wim_ntfs_capture_filldir(void *dirent, const ntfschar *name,
 	if (name_type == FILE_NAME_DOS)
 		return 0;
 
-	ret = -1;
-
- 	utf8_name = utf16_to_utf8((const char*)name, name_len * 2,
-				  &utf8_name_len);
-	if (!utf8_name)
-		goto out;
+	ret = utf16_to_utf8((const char*)name, name_len * 2,
+			    &utf8_name, &utf8_name_len);
+	if (ret != 0)
+		return -1;
 
 	if (utf8_name[0] == '.' &&
 	     (utf8_name[1] == '\0' ||
@@ -484,15 +484,15 @@ static int change_dentry_short_name(struct dentry *dentry,
 {
 	size_t short_name_utf16_len;
 	char *short_name_utf16;
-	short_name_utf16 = utf8_to_utf16(short_name_utf8, short_name_utf8_len,
-					 &short_name_utf16_len);
-	if (!short_name_utf16) {
-		ERROR_WITH_ERRNO("Failed to convert short name to UTF-16");
-		return WIMLIB_ERR_NOMEM;
+	int ret;
+
+	ret = utf8_to_utf16(short_name_utf8, short_name_utf8_len,
+			    &short_name_utf16, &short_name_utf16_len);
+	if (ret == 0) {
+		dentry->short_name = short_name_utf16;
+		dentry->short_name_len = short_name_utf16_len;
 	}
-	dentry->short_name = short_name_utf16;
-	dentry->short_name_len = short_name_utf16_len;
-	return 0;
+	return ret;
 }
 
 /* Recursively build a WIM dentry tree corresponding to a NTFS volume.
@@ -553,8 +553,12 @@ static int build_dentry_tree_ntfs_recursive(struct dentry **root_p,
 	}
 
 	root = new_dentry_with_timeless_inode(path_basename(path));
-	if (!root)
-		return WIMLIB_ERR_NOMEM;
+	if (!root) {
+		if (errno == EILSEQ)
+			return WIMLIB_ERR_INVALID_UTF8_STRING;
+		else
+			return WIMLIB_ERR_NOMEM;
+	}
 	*root_p = root;
 
 	if (dir_ni && (name_type == FILE_NAME_WIN32_AND_DOS
diff --git a/src/symlink.c b/src/symlink.c
index b90ebe5e..bb829651 100644
--- a/src/symlink.c
+++ b/src/symlink.c
@@ -80,9 +80,16 @@ static ssize_t get_symlink_name(const u8 *resource, size_t resource_len,
 	}
 	if (header_size + substitute_name_offset + substitute_name_len > resource_len)
 		return -EIO;
-	link_target = utf16_to_utf8((const char *)p + substitute_name_offset,
-				    substitute_name_len,
-				    &link_target_len);
+
+	ret = utf16_to_utf8((const char *)p + substitute_name_offset,
+			    substitute_name_len,
+			    &link_target, &link_target_len);
+	if (ret == WIMLIB_ERR_INVALID_UTF16_STRING)
+		return -EILSEQ;
+	else if (ret == WIMLIB_ERR_NOMEM)
+		return -ENOMEM;
+
+	wimlib_assert(ret == 0);
 
 	if (!link_target)
 		return -EIO;
@@ -124,22 +131,28 @@ out:
 	return ret;
 }
 
-static void *make_symlink_reparse_data_buf(const char *symlink_target,
-					   size_t *len_ret)
+static int make_symlink_reparse_data_buf(const char *symlink_target,
+					 size_t *len_ret, void **buf_ret)
 {
 	size_t utf8_len = strlen(symlink_target);
+	char *name_utf16;
 	size_t utf16_len;
-	char *name_utf16 = utf8_to_utf16(symlink_target, utf8_len, &utf16_len);
-	if (!name_utf16)
-		return NULL;
+	int ret;
+
+	ret = utf8_to_utf16(symlink_target, utf8_len,
+			    &name_utf16, &utf16_len);
+	if (ret != 0)
+		return ret;
 
 	for (size_t i = 0; i < utf16_len / 2; i++)
 		if (((u16*)name_utf16)[i] == cpu_to_le16('/'))
 			((u16*)name_utf16)[i] = cpu_to_le16('\\');
 	size_t len = 12 + utf16_len * 2;
 	void *buf = MALLOC(len);
-	if (!buf)
-		goto out;
+	if (!buf) {
+		FREE(name_utf16);
+		return WIMLIB_ERR_NOMEM;
+	}
 
 	u8 *p = buf;
 	p = put_u16(p, utf16_len); /* Substitute name offset */
@@ -150,9 +163,10 @@ static void *make_symlink_reparse_data_buf(const char *symlink_target,
 	p = put_bytes(p, utf16_len, (const u8*)name_utf16);
 	p = put_bytes(p, utf16_len, (const u8*)name_utf16);
 	*len_ret = len;
+	*buf_ret = buf;
 out:
 	FREE(name_utf16);
-	return buf;
+	return 0;
 }
 
 /* Get the symlink target from a dentry.
@@ -204,9 +218,10 @@ int inode_set_symlink(struct inode *inode, const char *target,
 	u8 symlink_buf_hash[SHA1_HASH_SIZE];
 	void *symlink_buf;
 
-	symlink_buf = make_symlink_reparse_data_buf(target, &symlink_buf_len);
-	if (!symlink_buf)
-		return WIMLIB_ERR_NOMEM;
+	ret = make_symlink_reparse_data_buf(target, &symlink_buf_len,
+					    &symlink_buf);
+	if (ret != 0)
+		return ret;
 
 	DEBUG("Made symlink reparse data buf (len = %zu, name len = %zu)",
 			symlink_buf_len, symlink_buf_len);
diff --git a/src/util.c b/src/util.c
index bcaefacd..c4f6e5ec 100644
--- a/src/util.c
+++ b/src/util.c
@@ -23,23 +23,13 @@
 
 #include "wimlib_internal.h"
 #include "endianness.h"
-#include "sha1.h"
 #include "timestamp.h"
-#include <sys/time.h>
 
-
-#include <iconv.h>
-#include <string.h>
 #include <ctype.h>
-#include <stdlib.h>
-#include <time.h>
-#include <unistd.h>
 #include <errno.h>
-
-#ifdef WITH_NTFS_3G
-#include <ntfs-3g/volume.h>
-#include <ntfs-3g/unistr.h>
-#endif
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h> /* for getpid() */
 
 /* True if wimlib is to print an informational message when an error occurs.
  * This can be turned off by calling wimlib_set_print_errors(false). */
@@ -117,8 +107,6 @@ static const char *error_strings[] = {
 		= "Success",
 	[WIMLIB_ERR_ALREADY_LOCKED]
 		= "The WIM is already locked for writing",
-	[WIMLIB_ERR_CHAR_CONVERSION]
-		= "Failed to perform a conversion between UTF-8 and UTF-16LE",
 	[WIMLIB_ERR_COMPRESSED_LOOKUP_TABLE]
 		= "Lookup table is compressed",
 	[WIMLIB_ERR_DECOMPRESSION]
@@ -165,11 +153,21 @@ static const char *error_strings[] = {
 		= "The part number or total parts of the WIM is invalid",
 	[WIMLIB_ERR_INVALID_RESOURCE_HASH]
 		= "The SHA1 message digest of a WIM resource did not match the expected value",
+	[WIMLIB_ERR_ICONV_NOT_AVAILABLE]
+		= "The iconv() function does not seem to work. "
+		  "Maybe check to make sure the directory /usr/lib/gconv exists",
 	[WIMLIB_ERR_INVALID_RESOURCE_SIZE]
 		= "A resource entry in the WIM has an invalid size",
 	[WIMLIB_ERR_INVALID_UNMOUNT_MESSAGE]
 		= "The version of wimlib that has mounted a WIM image is incompatible with the "
 		  "version being used to unmount it",
+	[WIMLIB_ERR_INVALID_UTF8_STRING]
+		= "A string provided as input by the user was not a valid UTF-8 string",
+	[WIMLIB_ERR_INVALID_UTF16_STRING]
+		= "A string in a WIM dentry is not a valid UTF-16LE string",
+	[WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE]
+		= "libxml2 was unable to find a character encoding conversion handler "
+		  "for UTF-16LE",
 	[WIMLIB_ERR_LINK]
 		= "Failed to create a hard or symbolic link when extracting "
 			"a file from the WIM",
@@ -281,122 +279,6 @@ WIMLIBAPI int wimlib_set_memory_allocator(void *(*malloc_func)(size_t),
 #endif
 }
 
-
-
-static iconv_t cd_utf16_to_utf8 = (iconv_t)(-1);
-
-/* Converts a string in the UTF-16 encoding to a newly allocated string in the
- * UTF-8 encoding.  */
-char *utf16_to_utf8(const char *utf16_str, size_t utf16_len,
-		    size_t *utf8_len_ret)
-{
-#ifdef WITH_NTFS_3G
-	if (utf16_len & 1) {
-		errno = -EILSEQ;
-		return NULL;
-	}
-	char *outs = NULL;
-	int outs_len = ntfs_ucstombs((const ntfschar*)utf16_str,
-				     utf16_len >> 1, &outs, 0);
-	if (outs_len >= 0) {
-		*utf8_len_ret = outs_len;
-	} else {
-		ERROR_WITH_ERRNO("Error converting UTF-16LE string to UTF-8");
-		outs = NULL;
-	}
-	return outs;
-#else
-	if (cd_utf16_to_utf8 == (iconv_t)(-1)) {
-		cd_utf16_to_utf8 = iconv_open("UTF-8", "UTF-16LE");
-		if (cd_utf16_to_utf8 == (iconv_t)-1) {
-			ERROR_WITH_ERRNO("Failed to get conversion descriptor "
-					 "for converting UTF-16LE to UTF-8");
-			return NULL;
-		}
-	}
-	size_t utf16_bytes_left  = utf16_len;
-	size_t utf8_bytes_left   = utf16_len;
-
-	char *utf8_str = MALLOC(utf8_bytes_left);
-	if (!utf8_str)
-		return NULL;
-
-	char *orig_utf8_str = utf8_str;
-
-	size_t num_chars_converted = iconv(cd_utf16_to_utf8, (char**)&utf16_str,
-			&utf16_bytes_left, &utf8_str, &utf8_bytes_left);
-
-	if (num_chars_converted == (size_t)(-1)) {
-		ERROR_WITH_ERRNO("Failed to convert UTF-16LE string to UTF-8 "
-				 "string");
-		FREE(orig_utf8_str);
-		return NULL;
-	}
-
-	size_t utf8_len = utf16_len - utf8_bytes_left;
-
-	*utf8_len_ret = utf8_len;
-	orig_utf8_str[utf8_len] = '\0';
-	return orig_utf8_str;
-#endif
-}
-
-static iconv_t cd_utf8_to_utf16 = (iconv_t)(-1);
-
-/* Converts a string in the UTF-8 encoding to a newly allocated string in the
- * UTF-16 encoding.  */
-char *utf8_to_utf16(const char *utf8_str, size_t utf8_len,
-		    size_t *utf16_len_ret)
-{
-#ifdef WITH_NTFS_3G
-	char *outs = NULL;
-	int outs_nchars = ntfs_mbstoucs(utf8_str, (ntfschar**)&outs);
-	if (outs_nchars >= 0) {
-		*utf16_len_ret = (size_t)outs_nchars * 2;
-	} else {
-		ERROR_WITH_ERRNO("Error converting UTF-8 string to UTF-16LE");
-		outs = NULL;
-	}
-	return outs;
-#else
-	if (cd_utf8_to_utf16 == (iconv_t)(-1)) {
-		cd_utf8_to_utf16 = iconv_open("UTF-16LE", "UTF-8");
-		if (cd_utf8_to_utf16 == (iconv_t)-1) {
-			ERROR_WITH_ERRNO("Failed to get conversion descriptor "
-					 "for converting UTF-8 to UTF-16LE");
-			return NULL;
-		}
-	}
-
-	size_t utf8_bytes_left   = utf8_len;
-	size_t utf16_capacity    = utf8_len * 4;
-	size_t utf16_bytes_left  = utf16_capacity;
-
-	char *utf16_str = MALLOC(utf16_capacity + 2);
-	if (!utf16_str)
-		return NULL;
-
-	char *orig_utf16_str = utf16_str;
-
-	size_t num_chars_converted = iconv(cd_utf8_to_utf16, (char**)&utf8_str,
-			&utf8_bytes_left, &utf16_str, &utf16_bytes_left);
-
-	if (num_chars_converted == (size_t)(-1)) {
-		ERROR_WITH_ERRNO("Failed to convert UTF-8 string to UTF-16LE "
-				 "string");
-		FREE(orig_utf16_str);
-		return NULL;
-	}
-
-	size_t utf16_len = utf16_capacity - utf16_bytes_left;
-
-	*utf16_len_ret = utf16_len;
-	orig_utf16_str[utf16_len] = '\0';
-	orig_utf16_str[utf16_len + 1] = '\0';
-	return orig_utf16_str;
-#endif
-}
-
 static bool seeded = false;
 
 static void seed_random()
diff --git a/src/util.h b/src/util.h
index 8a6dc4da..08b15636 100644
--- a/src/util.h
+++ b/src/util.h
@@ -155,12 +155,14 @@ extern char *wimlib_strdup(const char *str);
 #endif /* ENABLE_CUSTOM_MEMORY_ALLOCATOR */
 
 
-extern char *utf16_to_utf8(const char *utf16_str, size_t utf16_len,
-			   size_t *utf8_len_ret);
+/* encoding.c */
+extern int utf16_to_utf8(const char *utf16_str, size_t utf16_nbytes,
+			 char **utf8_str_ret, size_t *utf8_nbytes_ret);
 
-extern char *utf8_to_utf16(const char *utf8_str, size_t utf8_len,
-			   size_t *utf16_len_ret);
+extern int utf8_to_utf16(const char *utf8_str, size_t utf8_nbytes,
+			 char **utf16_str_ret, size_t *utf16_nbytes_ret);
 
+/* util.c */
 extern void randomize_byte_array(u8 *p, size_t n);
 
 extern void randomize_char_array_with_alnum(char p[], size_t n);
diff --git a/src/wimlib.h b/src/wimlib.h
index 57f9aa97..b2dd3034 100644
--- a/src/wimlib.h
+++ b/src/wimlib.h
@@ -681,7 +681,6 @@ typedef int (*wimlib_progress_func_t)(enum wimlib_progress_msg msg_type,
 enum wimlib_error_code {
 	WIMLIB_ERR_SUCCESS = 0,
 	WIMLIB_ERR_ALREADY_LOCKED,
-	WIMLIB_ERR_CHAR_CONVERSION,
 	WIMLIB_ERR_COMPRESSED_LOOKUP_TABLE,
 	WIMLIB_ERR_DECOMPRESSION,
 	WIMLIB_ERR_DELETE_STAGING_DIR,
@@ -689,6 +688,7 @@ enum wimlib_error_code {
 	WIMLIB_ERR_FORK,
 	WIMLIB_ERR_FUSE,
 	WIMLIB_ERR_FUSERMOUNT,
+	WIMLIB_ERR_ICONV_NOT_AVAILABLE,
 	WIMLIB_ERR_IMAGE_COUNT,
 	WIMLIB_ERR_IMAGE_NAME_COLLISION,
 	WIMLIB_ERR_INTEGRITY,
@@ -706,6 +706,9 @@ enum wimlib_error_code {
 	WIMLIB_ERR_INVALID_RESOURCE_SIZE,
 	WIMLIB_ERR_INVALID_SECURITY_DATA,
 	WIMLIB_ERR_INVALID_UNMOUNT_MESSAGE,
+	WIMLIB_ERR_INVALID_UTF8_STRING,
+	WIMLIB_ERR_INVALID_UTF16_STRING,
+	WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE,
 	WIMLIB_ERR_LINK,
 	WIMLIB_ERR_MKDIR,
 	WIMLIB_ERR_MQUEUE,
diff --git a/src/xml.c b/src/xml.c
index 65e34a21..9fbe259f 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -1322,7 +1322,7 @@ int write_xml_data(const struct wim_info *wim_info, int image, FILE *out,
 	encoding_handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_UTF16LE);
 	if (!encoding_handler) {
 		ERROR("Failed to get XML character encoding handler for UTF-16LE");
-		ret = WIMLIB_ERR_CHAR_CONVERSION;
+		ret = WIMLIB_ERR_LIBXML_UTF16_HANDLER_NOT_AVAILABLE;
 		goto out_cleanup_char_encoding_handlers;
 	}
 
-- 
2.43.0