6 * Copyright (C) 2012, 2013 Eric Biggers
8 * This file is part of wimlib, a library for working with WIM files.
10 * wimlib is free software; you can redistribute it and/or modify it under the
11 * terms of the GNU General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option)
15 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
16 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
17 * A PARTICULAR PURPOSE. See the GNU General Public License for more
20 * You should have received a copy of the GNU General Public License
21 * along with wimlib; if not, see http://www.gnu.org/licenses/.
29 #include "wimlib/encoding.h"
30 #include "wimlib/error.h"
31 #include "wimlib/list.h"
32 #include "wimlib/util.h"
40 bool wimlib_mbs_is_utf8 = !TCHAR_IS_UTF16LE;
42 /* List of iconv_t conversion descriptors for a specific character conversion.
43 * The idea is that it is not thread-safe to have just one conversion
44 * descriptor, but it also is inefficient to open a new conversion descriptor to
45 * convert every string. Both these problems can be solved by maintaining a
46 * list of conversion descriptors; then, a thread can use an existing conversion
47 * descriptor if available. */
48 struct iconv_list_head {
49 const char *from_encoding;
50 const char *to_encoding;
51 struct list_head list;
52 pthread_mutex_t mutex;
57 struct list_head list;
58 struct iconv_list_head *head;
61 #define ICONV_LIST(name, from, to) \
62 struct iconv_list_head name = { \
63 .from_encoding = from, \
65 .list = LIST_HEAD_INIT(name.list), \
66 .mutex = PTHREAD_MUTEX_INITIALIZER, \
70 get_iconv(struct iconv_list_head *head)
76 pthread_mutex_lock(&head->mutex);
77 if (list_empty(&head->list)) {
78 cd = iconv_open(head->to_encoding, head->from_encoding);
79 if (cd == (iconv_t)-1) {
80 ERROR_WITH_ERRNO("Failed to open iconv from %s to %s",
81 head->from_encoding, head->to_encoding);
84 i = MALLOC(sizeof(struct iconv_node));
95 i = container_of(head->list.next, struct iconv_node, list);
96 list_del(head->list.next);
99 pthread_mutex_unlock(&head->mutex);
104 put_iconv(iconv_t *cd)
106 int errno_save = errno;
107 struct iconv_node *i = container_of(cd, struct iconv_node, cd);
108 struct iconv_list_head *head = i->head;
110 pthread_mutex_lock(&head->mutex);
111 list_add(&i->list, &head->list);
112 pthread_mutex_unlock(&head->mutex);
116 /* Prevent printing an error message if a character conversion error occurs
117 * while printing an error message. (This variable is not per-thread but it
118 * doesn't matter too much since it's just the error messages.) */
119 static bool error_message_being_printed = false;
121 #define DEFINE_CHAR_CONVERSION_FUNCTIONS(varname1, longname1, chartype1,\
122 varname2, longname2, chartype2,\
123 earlyreturn_on_utf8_locale, \
125 worst_case_len_expr, \
129 static ICONV_LIST(iconv_##varname1##_to_##varname2, \
130 longname1, longname2); \
133 varname1##_to_##varname2##_nbytes(const chartype1 *in, size_t in_nbytes,\
134 size_t *out_nbytes_ret) \
136 iconv_t *cd = get_iconv(&iconv_##varname1##_to_##varname2); \
138 return WIMLIB_ERR_ICONV_NOT_AVAILABLE; \
140 /* Worst case length */ \
141 chartype2 buf[worst_case_len_expr]; \
142 char *inbuf = (char*)in; \
143 size_t inbytesleft = in_nbytes; \
144 char *outbuf = (char*)buf; \
145 size_t outbytesleft = sizeof(buf); \
149 len = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); \
150 if (len == (size_t)-1) { \
151 if (!error_message_being_printed) { \
152 error_message_being_printed = true; \
154 error_message_being_printed = false; \
158 *out_nbytes_ret = sizeof(buf) - outbytesleft; \
166 varname1##_to_##varname2##_buf(const chartype1 *in, size_t in_nbytes, \
169 iconv_t *cd = get_iconv(&iconv_##varname1##_to_##varname2); \
171 return WIMLIB_ERR_ICONV_NOT_AVAILABLE; \
173 char *inbuf = (char*)in; \
174 size_t inbytesleft = in_nbytes; \
175 char *outbuf = (char*)out; \
176 const size_t LARGE_NUMBER = 1000000000; \
177 size_t outbytesleft = LARGE_NUMBER; \
181 len = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); \
182 if (len == (size_t)-1) { \
183 if (!error_message_being_printed) { \
184 error_message_being_printed = true; \
186 error_message_being_printed = false; \
190 out[(LARGE_NUMBER-outbytesleft)/sizeof(chartype2)] = 0; \
198 varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes, \
199 chartype2 **out_ret, \
200 size_t *out_nbytes_ret) \
206 if (earlyreturn_on_utf8_locale && wimlib_mbs_is_utf8) { \
208 /* Out same as in */ \
209 out = MALLOC(in_nbytes + sizeof(chartype2)); \
211 return WIMLIB_ERR_NOMEM; \
212 memcpy(out, in, in_nbytes); \
213 out[in_nbytes / sizeof(chartype2)] = 0; \
215 *out_nbytes_ret = in_nbytes; \
219 ret = varname1##_to_##varname2##_nbytes(in, in_nbytes, \
224 out = MALLOC(out_nbytes + sizeof(chartype2)); \
226 return WIMLIB_ERR_NOMEM; \
228 ret = varname1##_to_##varname2##_buf(in, in_nbytes, out); \
233 *out_nbytes_ret = out_nbytes; \
238 #if !TCHAR_IS_UTF16LE
242 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", tchar,
243 utf16le, "UTF-16LE", utf16lechar,
247 WIMLIB_ERR_INVALID_UTF8_STRING,
248 ERROR_WITH_ERRNO("Failed to convert UTF-8 string "
249 "to UTF-16LE string!"),
252 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar,
253 utf8, "UTF-8", tchar,
257 WIMLIB_ERR_INVALID_UTF16_STRING,
258 ERROR_WITH_ERRNO("Failed to convert UTF-16LE string "
262 DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar,
263 utf16le, "UTF-16LE", utf16lechar,
265 return utf8_to_utf16le(in, in_nbytes, out_ret, out_nbytes_ret),
267 WIMLIB_ERR_INVALID_MULTIBYTE_STRING,
268 ERROR_WITH_ERRNO("Failed to convert multibyte "
269 "string \"%"TS"\" to UTF-16LE string!", in);
270 ERROR("If the data you provided was UTF-8, please make sure "
271 "the character encoding\n"
272 " of your current locale is UTF-8."),
275 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar,
278 return utf16le_to_utf8(in, in_nbytes, out_ret, out_nbytes_ret),
280 WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE,
281 ERROR("Failed to convert UTF-16LE string to "
282 "multibyte string!");
283 ERROR("This may be because the UTF-16LE string "
284 "could not be represented\n"
285 " in your locale's character encoding."),
289 /* tchar to UTF-8 and back */
293 DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar,
298 WIMLIB_ERR_INVALID_UTF16_STRING,
299 ERROR_WITH_ERRNO("Failed to convert UTF-16LE "
300 "string \"%"TS"\" to UTF-8 string!", in),
303 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
304 tstr, "UTF-16LE", tchar,
308 WIMLIB_ERR_INVALID_UTF8_STRING,
309 ERROR_WITH_ERRNO("Failed to convert UTF-8 string "
310 "to UTF-16LE string!"),
316 DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar,
321 WIMLIB_ERR_INVALID_MULTIBYTE_STRING,
322 ERROR_WITH_ERRNO("Failed to convert multibyte "
323 "string \"%"TS"\" to UTF-8 string!", in);
324 ERROR("If the data you provided was UTF-8, please make sure "
326 " encoding of your current locale is UTF-8."),
329 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
334 WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE,
335 ERROR("Failed to convert UTF-8 string to "
336 "multibyte string!");
337 ERROR("This may be because the UTF-8 data "
338 "could not be represented\n"
339 " in your locale's character encoding."),
344 tstr_to_utf8_simple(const tchar *tstr, char **out)
347 return tstr_to_utf8(tstr, tstrlen(tstr) * sizeof(tchar),
352 utf8_to_tstr_simple(const char *utf8str, tchar **out)
355 return utf8_to_tstr(utf8str, strlen(utf8str), out, &out_nbytes);
359 iconv_cleanup(struct iconv_list_head *head)
361 pthread_mutex_destroy(&head->mutex);
362 while (!list_empty(&head->list)) {
363 struct iconv_node *i;
365 i = container_of(head->list.next, struct iconv_node, list);
373 iconv_global_cleanup(void)
375 iconv_cleanup(&iconv_utf8_to_tstr);
376 iconv_cleanup(&iconv_tstr_to_utf8);
377 #if !TCHAR_IS_UTF16LE
378 iconv_cleanup(&iconv_utf16le_to_tstr);
379 iconv_cleanup(&iconv_tstr_to_utf16le);
380 iconv_cleanup(&iconv_utf16le_to_utf8);
381 iconv_cleanup(&iconv_utf8_to_utf16le);