X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fencoding.c;h=5ff1df2312d906cb1f560a7aa9a756e12efaaa21;hb=0b1278f508ef7606c822edadb3958c2c3648b419;hp=927cb5dad342128017133ad903530643c246b056;hpb=ced16a28e197645a40fa04a54793d117a04526d7;p=wimlib diff --git a/src/encoding.c b/src/encoding.c index 927cb5da..5ff1df23 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -5,27 +5,31 @@ /* * Copyright (C) 2012, 2013 Eric Biggers * - * This file is part of wimlib, a library for working with WIM files. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. - * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif +#include +#include +#include +#include + #include "wimlib.h" +#include "wimlib/alloca.h" #include "wimlib/assert.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" @@ -33,15 +37,6 @@ #include "wimlib/list.h" #include "wimlib/util.h" -#include -#include -#include -#include -#include - -#ifdef HAVE_ALLOCA_H -# include -#endif bool wimlib_mbs_is_utf8 = !TCHAR_IS_UTF16LE; @@ -68,8 +63,6 @@ struct iconv_node { struct iconv_list_head name = { \ .from_encoding = from, \ .to_encoding = to, \ - .list = LIST_HEAD_INIT(name.list), \ - .mutex = PTHREAD_MUTEX_INITIALIZER, \ } static iconv_t * @@ -119,11 +112,6 @@ put_iconv(iconv_t *cd) errno = errno_save; } -/* Prevent printing an error message if a character conversion error occurs - * while printing an error message. (This variable is not per-thread but it - * doesn't matter too much since it's just the error messages.) */ -static bool error_message_being_printed = false; - #define DEFINE_CHAR_CONVERSION_FUNCTIONS(varname1, longname1, chartype1,\ varname2, longname2, chartype2,\ earlyreturn_on_utf8_locale, \ @@ -148,7 +136,7 @@ varname1##_to_##varname2##_nbytes(const chartype1 *in, size_t in_nbytes,\ bool buf_onheap; \ bufsize = (worst_case_len_expr) * sizeof(chartype2); \ /* Worst case length */ \ - if (bufsize <= STACK_MAX) { \ + if (bufsize <= STACK_MAX) { \ buf = alloca(bufsize); \ buf_onheap = false; \ } else { \ @@ -167,11 +155,7 @@ varname1##_to_##varname2##_nbytes(const chartype1 *in, size_t in_nbytes,\ \ len = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); \ if (len == (size_t)-1) { \ - if (!error_message_being_printed) { \ - error_message_being_printed = true; \ - err_msg; \ - error_message_being_printed = false; \ - } \ + err_msg; \ ret = err_return; \ } else { \ *out_nbytes_ret = bufsize - outbytesleft; \ @@ -201,11 +185,7 @@ varname1##_to_##varname2##_buf(const chartype1 *in, size_t in_nbytes, \ \ len = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); \ if (len == (size_t)-1) { \ - if (!error_message_being_printed) { \ - error_message_being_printed = true; \ - err_msg; \ - error_message_being_printed = false; \ - } \ + err_msg; \ ret = err_return; \ } else { \ out[(LARGE_NUMBER-outbytesleft)/sizeof(chartype2)] = 0; \ @@ -376,6 +356,13 @@ utf8_to_tstr_simple(const char *utf8str, tchar **out) return utf8_to_tstr(utf8str, strlen(utf8str), out, &out_nbytes); } +static void +iconv_init(struct iconv_list_head *head) +{ + pthread_mutex_init(&head->mutex, NULL); + INIT_LIST_HEAD(&head->list); +} + static void iconv_cleanup(struct iconv_list_head *head) { @@ -390,6 +377,19 @@ iconv_cleanup(struct iconv_list_head *head) } } +void +iconv_global_init(void) +{ + iconv_init(&iconv_utf8_to_tstr); + iconv_init(&iconv_tstr_to_utf8); +#if !TCHAR_IS_UTF16LE + iconv_init(&iconv_utf16le_to_tstr); + iconv_init(&iconv_tstr_to_utf16le); + iconv_init(&iconv_utf16le_to_utf8); + iconv_init(&iconv_utf8_to_utf16le); +#endif +} + void iconv_global_cleanup(void) { @@ -558,9 +558,35 @@ cmp_utf16le_strings(const utf16lechar *s1, size_t n1, return (n1 < n2) ? -1 : 1; } -/* Duplicate a UTF16-LE string which may not be null-terminated. */ +/* Like cmp_utf16le_strings(), but assumes the strings are null terminated. */ +int +cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2, + bool ignore_case) +{ + if (ignore_case) { + for (;;) { + u16 c1 = upcase[le16_to_cpu(*s1)]; + u16 c2 = upcase[le16_to_cpu(*s2)]; + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + if (c1 == 0) + return 0; + s1++, s2++; + } + } else { + while (*s1 && *s1 == *s2) + s1++, s2++; + if (*s1 == *s2) + return 0; + return (le16_to_cpu(*s1) < le16_to_cpu(*s2)) ? -1 : 1; + } +} + +/* Duplicate a UTF-16LE string. The input string might not be null terminated + * and might be misaligned, but the returned string is guaranteed to be null + * terminated and properly aligned. */ utf16lechar * -utf16le_dupz(const utf16lechar *ustr, size_t usize) +utf16le_dupz(const void *ustr, size_t usize) { utf16lechar *dup = MALLOC(usize + sizeof(utf16lechar)); if (dup) { @@ -569,3 +595,32 @@ utf16le_dupz(const utf16lechar *ustr, size_t usize) } return dup; } + +/* Duplicate a null-terminated UTF-16LE string. */ +utf16lechar * +utf16le_dup(const utf16lechar *ustr) +{ + const utf16lechar *p = ustr; + while (*p++) + ; + return memdup(ustr, (const u8 *)p - (const u8 *)ustr); +} + +/* Return the length, in bytes, of a UTF-null terminated UTF-16 string, + * excluding the null terminator. */ +size_t +utf16le_len_bytes(const utf16lechar *s) +{ + const utf16lechar *p = s; + while (*p) + p++; + return (p - s) * sizeof(utf16lechar); +} + +/* Return the length, in UTF-16 coding units, of a UTF-null terminated UTF-16 + * string, excluding the null terminator. */ +size_t +utf16le_len_chars(const utf16lechar *s) +{ + return utf16le_len_bytes(s) / sizeof(utf16lechar); +}