/*
- * encoding.c: Convert "multibyte" strings (the locale-default encoding---
- * generally, UTF-8 or something like ISO-8859-1) to UTF-16LE strings, and vice
- * versa. Also, convert UTF-8 strings to multibyte strings.
+ * encoding.c
*/
/*
* along with wimlib; if not, see http://www.gnu.org/licenses/.
*/
-#include "wimlib_internal.h"
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "wimlib.h"
+#include "wimlib/encoding.h"
+#include "wimlib/error.h"
+#include "wimlib/list.h"
+#include "wimlib/util.h"
#include <errno.h>
#include <iconv.h>
#define DEFINE_CHAR_CONVERSION_FUNCTIONS(varname1, longname1, chartype1,\
varname2, longname2, chartype2,\
- earlyreturn, \
+ earlyreturn_on_utf8_locale, \
+ earlyreturn_expr, \
worst_case_len_expr, \
err_return, \
- err_msg) \
+ err_msg, \
+ modifier) \
static ICONV_LIST(iconv_##varname1##_to_##varname2, \
longname1, longname2); \
\
-int \
+modifier int \
varname1##_to_##varname2##_nbytes(const chartype1 *in, size_t in_nbytes,\
size_t *out_nbytes_ret) \
{ \
return ret; \
} \
\
-int \
+modifier int \
varname1##_to_##varname2##_buf(const chartype1 *in, size_t in_nbytes, \
chartype2 *out) \
{ \
return ret; \
} \
\
-int \
+modifier int \
varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes, \
chartype2 **out_ret, \
size_t *out_nbytes_ret) \
chartype2 *out; \
size_t out_nbytes; \
\
- if (earlyreturn) { \
+ if (earlyreturn_on_utf8_locale && wimlib_mbs_is_utf8) { \
+ earlyreturn_expr; \
/* Out same as in */ \
out = MALLOC(in_nbytes + sizeof(chartype2)); \
if (!out) \
\
ret = varname1##_to_##varname2##_buf(in, in_nbytes, out); \
if (ret) { \
- int errno_save = errno; \
FREE(out); \
- errno = errno_save; \
} else { \
*out_ret = out; \
*out_nbytes_ret = out_nbytes; \
return ret; \
}
-#if 0
+#if !TCHAR_IS_UTF16LE
+
+/* UNIX */
+
+DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", tchar,
+ utf16le, "UTF-16LE", utf16lechar,
+ false,
+ ,
+ in_nbytes * 2,
+ WIMLIB_ERR_INVALID_UTF8_STRING,
+ ERROR_WITH_ERRNO("Failed to convert UTF-8 string "
+ "to UTF-16LE string!"),
+ static)
+
DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar,
- mbs, "", mbchar,
+ utf8, "UTF-8", tchar,
false,
- in_nbytes / 2 * MB_CUR_MAX,
- WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE,
- ERROR("Failed to convert UTF-16LE string "
- "to multibyte string!");
- ERROR("This may be because the UTF-16LE data "
- "could not be represented in your "
- "locale's character encoding."))
-#endif
+ ,
+ in_nbytes * 2,
+ WIMLIB_ERR_INVALID_UTF16_STRING,
+ ERROR_WITH_ERRNO("Failed to convert UTF-16LE string "
+ "to UTF-8 string!"),
+ static)
-#if !TCHAR_IS_UTF16LE
DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar,
utf16le, "UTF-16LE", utf16lechar,
- false,
+ true,
+ return utf8_to_utf16le(in, in_nbytes, out_ret, out_nbytes_ret),
in_nbytes * 2,
WIMLIB_ERR_INVALID_MULTIBYTE_STRING,
ERROR_WITH_ERRNO("Failed to convert multibyte "
"string \"%"TS"\" to UTF-16LE string!", in);
ERROR("If the data you provided was UTF-8, please make sure "
- "the character encoding of your current locale is UTF-8."))
+ "the character encoding\n"
+ " of your current locale is UTF-8."),
+ )
+
+DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar,
+ tstr, "", tchar,
+ true,
+ return utf16le_to_utf8(in, in_nbytes, out_ret, out_nbytes_ret),
+ in_nbytes * 2,
+ WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE,
+ ERROR("Failed to convert UTF-16LE string to "
+ "multibyte string!");
+ ERROR("This may be because the UTF-16LE string "
+ "could not be represented\n"
+ " in your locale's character encoding."),
+ )
#endif
/* tchar to UTF-8 and back */
#if TCHAR_IS_UTF16LE
-DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF16-LE", tchar,
- utf8, "UTF-8", utf8char,
+
+/* Windows */
+DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar,
+ utf8, "UTF-8", char,
false,
+ ,
in_nbytes * 2,
- WIMLIB_ERR_INVALID_MULTIBYTE_STRING,
+ WIMLIB_ERR_INVALID_UTF16_STRING,
ERROR_WITH_ERRNO("Failed to convert UTF-16LE "
- "string \"%"TS"\" to UTF-8 string!", in);
- )
+ "string \"%"TS"\" to UTF-8 string!", in),
+ static)
-DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", utf8char,
- tstr, "UTF16-LE", tchar,
+DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
+ tstr, "UTF-16LE", tchar,
false,
+ ,
in_nbytes * 2,
- WIMLIB_ERR_INVALID_MULTIBYTE_STRING,
+ WIMLIB_ERR_INVALID_UTF8_STRING,
ERROR_WITH_ERRNO("Failed to convert UTF-8 string "
- "to UTF-16LE string!");
- )
+ "to UTF-16LE string!"),
+ static)
#else
+
+/* UNIX */
+
DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar,
- utf8, "UTF-8", utf8char,
- wimlib_mbs_is_utf8,
+ utf8, "UTF-8", char,
+ true,
+ ,
in_nbytes * 4,
WIMLIB_ERR_INVALID_MULTIBYTE_STRING,
ERROR_WITH_ERRNO("Failed to convert multibyte "
"string \"%"TS"\" to UTF-8 string!", in);
ERROR("If the data you provided was UTF-8, please make sure "
- "the character encoding of your current locale is UTF-8.");)
+ "the character\n"
+ " encoding of your current locale is UTF-8."),
+ static)
-DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", utf8char,
+DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
tstr, "", tchar,
- wimlib_mbs_is_utf8,
+ true,
+ ,
in_nbytes * 4,
WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE,
ERROR("Failed to convert UTF-8 string to "
"multibyte string!");
ERROR("This may be because the UTF-8 data "
- "could not be represented in your "
- "locale's character encoding.");)
+ "could not be represented\n"
+ " in your locale's character encoding."),
+ static)
#endif
int
-tstr_to_utf8_simple(const tchar *tstr, utf8char **out)
+tstr_to_utf8_simple(const tchar *tstr, char **out)
{
size_t out_nbytes;
- return tstr_to_utf8(tstr, tstrlen(tstr), out, &out_nbytes);
+ return tstr_to_utf8(tstr, tstrlen(tstr) * sizeof(tchar),
+ out, &out_nbytes);
}
int
-utf8_to_tstr_simple(const utf8char *utf8str, tchar **out)
+utf8_to_tstr_simple(const char *utf8str, tchar **out)
{
size_t out_nbytes;
return utf8_to_tstr(utf8str, strlen(utf8str), out, &out_nbytes);
}
void
-iconv_global_cleanup()
+iconv_global_cleanup(void)
{
- /*iconv_cleanup(&iconv_utf16le_to_mbs);*/
+ iconv_cleanup(&iconv_utf8_to_tstr);
+ iconv_cleanup(&iconv_tstr_to_utf8);
#if !TCHAR_IS_UTF16LE
- iconv_cleanup(&iconv_mbs_to_utf16le);
+ iconv_cleanup(&iconv_utf16le_to_tstr);
+ iconv_cleanup(&iconv_tstr_to_utf16le);
+ iconv_cleanup(&iconv_utf16le_to_utf8);
+ iconv_cleanup(&iconv_utf8_to_utf16le);
#endif
- /*iconv_cleanup(&iconv_utf8_to_mbs);*/
}
-
-#if 0
-bool
-utf8_str_contains_nonascii_chars(const utf8char *utf8_str)
-{
- do {
- if ((unsigned char)*utf8_str > 127)
- return true;
- } while (*++utf8_str);
- return false;
-}
-#endif