From 4e32f48feb64932954953a87f064170500f93221 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 24 Jun 2016 19:41:23 -0500 Subject: [PATCH] Character encoding and string conversion updates - Allow unpaired surrogates when translating between "UTF-8" and "UTF-16LE". This allows Windows-style filenames to be processed losslessly on UNIX-like systems, even if they are not valid UTF-16LE. - Implement UTF-8 and UTF-16LE codecs ourselves and drop the iconv requirement. This was necessary to allow surrogate codepoints, but it also provides better performance and actually results in *fewer* lines of code and a slightly smaller binary. - Drop support for multibyte encodings other than UTF-8 on UNIX-like systems. It is probably not worth the effort to support such encodings. Interestingly, the support was entirely broken before v1.9.1, yet no one ever complained... --- .gitignore | 1 - Makefile.am | 1 - configure.ac | 10 - doc/man1/wimlib-imagex-capture.1 | 14 +- doc/man1/wimlib-imagex.1 | 11 - include/wimlib.h | 32 +- include/wimlib/encoding.h | 187 +++++---- include/wimlib/ntfs_3g.h | 3 - include/wimlib_tchar.h | 16 +- m4/iconv.m4 | 268 ------------- programs/imagex.c | 25 -- src/dentry.c | 3 +- src/encoding.c | 633 ++++++++++++------------------- src/error.c | 9 +- src/iterate_dir.c | 8 +- src/mount_image.c | 2 +- src/ntfs-3g_apply.c | 6 - src/registry.c | 3 +- src/reparse.c | 2 +- src/wim.c | 29 +- src/xml.c | 38 +- tools/windeps/Makefile | 36 -- tools/windeps/sha256sums | 1 - 23 files changed, 398 insertions(+), 940 deletions(-) delete mode 100644 m4/iconv.m4 diff --git a/.gitignore b/.gitignore index 98944191..55644400 100644 --- a/.gitignore +++ b/.gitignore @@ -48,7 +48,6 @@ /tools/windeps/libxml2* /tools/windeps/mingw* /tools/windeps/sysroot_* -/tools/windeps/win-iconv* /tools/windeps/winpthreads* /wimlib-*-bin/ /wimlib-*.tar diff --git a/Makefile.am b/Makefile.am index ff5a29bb..6754144b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -204,7 +204,6 @@ libwim_la_LDFLAGS = $(AM_LDFLAGS) -version-info 26:0:11 libwim_la_LIBADD = \ $(PTHREAD_LIBS) \ $(LIBXML2_LIBS) \ - $(LTLIBICONV) \ $(LIBNTFS_3G_LIBS) \ $(LIBFUSE_LIBS) \ $(LIBRT_LIBS) \ diff --git a/configure.ac b/configure.ac index c6aed057..b978c03b 100644 --- a/configure.ac +++ b/configure.ac @@ -102,16 +102,6 @@ AX_PTHREAD([], [AC_MSG_ERROR(["cannot find pthreads library"])]) PKG_CHECK_MODULES([LIBXML2], [libxml-2.0]) PKGCONFIG_PRIVATE_REQUIRES="$PKGCONFIG_PRIVATE_REQUIRES libxml-2.0" -# ------------------------------ libiconv ------------------------------------- -AM_ICONV -if test "$am_cv_func_iconv" != "yes"; then - AC_MSG_ERROR([Cannot find the iconv() function. iconv() is used to - convert between encodings of WIM filenames and XML data. - wimlib cannot be compiled without it. iconv() is - available in the latest version of glibc and sometimes in - other libraries.]) -fi - ############################################################################### # Configuration options # ############################################################################### diff --git a/doc/man1/wimlib-imagex-capture.1 b/doc/man1/wimlib-imagex-capture.1 index d86f043b..226441c6 100644 --- a/doc/man1/wimlib-imagex-capture.1 +++ b/doc/man1/wimlib-imagex-capture.1 @@ -69,12 +69,14 @@ With \fB--unix-data\fR: UNIX owners, groups, and modes With \fB--unix-data\fR: device nodes, FIFOs, and UNIX domain sockets .PP There is no support for storing extended attributes (e.g. SELinux security -labels and POSIX ACLs). Also note that last status change times (ctime) are not -stored. -.PP -Pedantic note: A limitation of the WIM format prevents the unusual case where a -single symbolic link file itself has multiple names (hard links); in this -unlikely case, each symbolic link is stored as an independent file. +labels and POSIX ACLs), last status change times (ctimes), or hard link +information for symbolic link files (each symbolic link will be stored as an +independent file). In addition, filenames and symbolic link targets on UNIX +filesystems which are not valid UTF-8 with the addition of surrogate codepoints +are unsupported. Note: if you have a filesystem containing filenames in another +multibyte encoding, such as ISO-8859-1, and you wish to archive it with wimlib, +you may be able to mount it with an option which causes its filenames to be +presented as UTF-8. .SH NTFS VOLUME CAPTURE (UNIX) This section documents how \fBwimlib-imagex\fR captures files directly from an NTFS volume image on UNIX-like systems. diff --git a/doc/man1/wimlib-imagex.1 b/doc/man1/wimlib-imagex.1 index 3fe685af..9ec74282 100644 --- a/doc/man1/wimlib-imagex.1 +++ b/doc/man1/wimlib-imagex.1 @@ -200,17 +200,6 @@ driver (WOF). With the \fB--wimboot\fR option, \fBwimapply\fR will extract VSS snapshot support. On Windows, \fBwimcapture\fR or \fBwimappend\fR with the \fB--snapshot\fR option will automatically create a temporary VSS snapshot and capture the image from it. This can be used to image a "live" Windows system. -.SH LOCALES AND CHARACTER ENCODINGS -WIM files themselves store file and stream names using Windows native "wide -character strings", which are UTF-16. On Windows, wimlib works using these same -strings, so conversions are usually not necessary and there should be no -problems with character encodings. -.PP -On UNIX-like systems, wimlib works primarily in the locale-dependent multibyte -encoding, which you are strongly recommended to set to UTF-8 to avoid any -problems. You can alternatively set the environmental variable -\fBWIMLIB_IMAGEX_USE_UTF8\fR to force \fBwimlib-imagex\fR to use UTF-8 -internally, even if the current locale is not UTF-8 compatible. .SH CASE SENSITIVITY By default, the case sensitivity of \fBwimlib-imagex\fR differs somewhat between UNIX-like systems and Windows. WIM images may (but usually do not) have diff --git a/include/wimlib.h b/include/wimlib.h index 7254a126..277932d6 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -141,20 +141,21 @@ * messages and strings (as well as all documentation, for that matter) are only * available in English. * - * @section sec_encodings Locales and character encodings + * @section sec_encodings Character encoding * * To support Windows as well as UNIX-like systems, wimlib's API typically takes - * and returns strings of ::wimlib_tchar, which are in a platform-dependent - * encoding. + * and returns strings of ::wimlib_tchar which have a platform-dependent type + * and encoding. * - * On Windows, each ::wimlib_tchar is 2 bytes and is the same as a "wchar_t", - * and the encoding is UTF-16LE. + * On Windows, each ::wimlib_tchar is a 2-byte wchar_t. The encoding + * is meant to be UTF-16LE. However, unpaired surrogates are permitted because + * neither Windows nor the NTFS filesystem forbids them in filenames. * - * On UNIX-like systems, each ::wimlib_tchar is 1 byte and is simply a "char", - * and the encoding is the locale-dependent multibyte encoding. I recommend you - * set your locale to a UTF-8 capable locale to avoid any issues. Also, by - * default, wimlib on UNIX will assume the locale is UTF-8 capable unless you - * call wimlib_global_init() after having set your desired locale. + * On UNIX-like systems, each ::wimlib_tchar is a 1 byte char. The + * encoding is meant to be UTF-8. However, for compatibility with Windows-style + * filenames that are not valid UTF-16LE, surrogate codepoints are permitted. + * Other multibyte encodings (e.g. ISO-8859-1) or garbage sequences of bytes are + * not permitted. * * @section sec_advanced Additional information and features * @@ -2323,9 +2324,7 @@ typedef int (*wimlib_iterate_lookup_table_callback_t)(const struct wimlib_resour /** @addtogroup G_general * @{ */ -/** Assume that strings are represented in UTF-8, even if this is not the - * locale's character encoding. This flag is ignored on Windows, where wimlib - * always uses UTF-16LE. */ +/** Deprecated; no longer has any effect. */ #define WIMLIB_INIT_FLAG_ASSUME_UTF8 0x00000001 /** Windows-only: do not attempt to acquire additional privileges (currently @@ -2462,7 +2461,6 @@ enum wimlib_error_code { WIMLIB_ERR_DECOMPRESSION = 2, WIMLIB_ERR_FUSE = 6, WIMLIB_ERR_GLOB_HAD_NO_MATCHES = 8, - WIMLIB_ERR_ICONV_NOT_AVAILABLE = 9, WIMLIB_ERR_IMAGE_COUNT = 10, WIMLIB_ERR_IMAGE_NAME_COLLISION = 11, WIMLIB_ERR_INSUFFICIENT_PRIVILEGES = 12, @@ -2475,7 +2473,6 @@ enum wimlib_error_code { WIMLIB_ERR_INVALID_INTEGRITY_TABLE = 19, WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY = 20, WIMLIB_ERR_INVALID_METADATA_RESOURCE = 21, - WIMLIB_ERR_INVALID_MULTIBYTE_STRING = 22, WIMLIB_ERR_INVALID_OVERLAY = 23, WIMLIB_ERR_INVALID_PARAM = 24, WIMLIB_ERR_INVALID_PART_NUMBER = 25, @@ -3278,9 +3275,8 @@ wimlib_get_xml_data(WIMStruct *wim, void **buf_ret, size_t *bufsize_ret); * * Initialization function for wimlib. Call before using any other wimlib * function (except possibly wimlib_set_print_errors()). If not done manually, - * this function will be called automatically with @p init_flags set to - * ::WIMLIB_INIT_FLAG_ASSUME_UTF8. This function does nothing if called again - * after it has already successfully run. + * this function will be called automatically with a flags argument of 0. This + * function does nothing if called again after it has already successfully run. * * @param init_flags * Bitwise OR of flags prefixed with WIMLIB_INIT_FLAG. diff --git a/include/wimlib/encoding.h b/include/wimlib/encoding.h index f40f77be..9216ca2c 100644 --- a/include/wimlib/encoding.h +++ b/include/wimlib/encoding.h @@ -7,155 +7,142 @@ #include "wimlib/util.h" #include "wimlib/types.h" -extern void -iconv_global_init(void); - -extern void -iconv_global_cleanup(void); - -extern u16 upcase[65536]; - -extern void -init_upcase(void); - -extern bool wimlib_mbs_is_utf8; - -#define DECLARE_CHAR_CONVERSION_FUNCTIONS(varname1, varname2, \ - chartype1, chartype2) \ - \ -extern int \ -varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes, \ - chartype2 **out_ret, \ - size_t *out_nbytes_ret); \ - \ -extern int \ -varname1##_to_##varname2##_nbytes(const chartype1 *in, size_t in_nbytes,\ - size_t *out_nbytes_ret); \ - \ -extern int \ -varname1##_to_##varname2##_buf(const chartype1 *in, size_t in_nbytes, \ - chartype2 *out); +/* String conversion functions */ -extern utf16lechar * -utf16le_dupz(const void *ustr, size_t usize); - -extern utf16lechar * -utf16le_dup(const utf16lechar *ustr); - -extern size_t -utf16le_len_bytes(const utf16lechar *s); - -extern size_t -utf16le_len_chars(const utf16lechar *s); +extern int +utf8_to_utf16le(const char *in, size_t in_nbytes, + utf16lechar **out_ret, size_t *out_nbytes_ret); -#if !TCHAR_IS_UTF16LE -DECLARE_CHAR_CONVERSION_FUNCTIONS(utf16le, tstr, utf16lechar, tchar); -DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf16le, tchar, utf16lechar); -#else +extern int +utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes, + char **out_ret, size_t *out_nbytes_ret); +/* Identity conversion: duplicate a 'tchar' string. */ static inline int -tstr_to_utf16le(const tchar *tstr, size_t tsize, - utf16lechar **ustr_ret, size_t *usize_ret) +tstr_to_tstr(const tchar *in, size_t in_nbytes, + tchar **out_ret, size_t *out_nbytes_ret) { - utf16lechar *ustr = utf16le_dupz(tstr, tsize); - if (!ustr) + tchar *out = MALLOC(in_nbytes + sizeof(tchar)); + if (unlikely(!out)) return WIMLIB_ERR_NOMEM; - *ustr_ret = ustr; - *usize_ret = tsize; + memcpy(out, in, in_nbytes); + out[in_nbytes / sizeof(tchar)] = 0; + *out_ret = out; + if (out_nbytes_ret) + *out_nbytes_ret = in_nbytes; return 0; } -#define utf16le_to_tstr tstr_to_utf16le +#if TCHAR_IS_UTF16LE -#endif +/* tstr(UTF-16LE) <=> UTF-16LE */ +# define tstr_to_utf16le tstr_to_tstr +# define utf16le_to_tstr tstr_to_tstr -DECLARE_CHAR_CONVERSION_FUNCTIONS(utf8, tstr, char, tchar); -DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf8, tchar, char); +/* tstr(UTF-16LE) <=> UTF-8 */ +# define tstr_to_utf8 utf16le_to_utf8 +# define utf8_to_tstr utf8_to_utf16le -extern int -utf8_to_tstr_simple(const char *utf8str, tchar **out); +#else -extern int -tstr_to_utf8_simple(const tchar *tstr, char **out); +/* tstr(UTF-8) <=> UTF-16LE */ +# define tstr_to_utf16le utf8_to_utf16le +# define utf16le_to_tstr utf16le_to_utf8 -extern int -cmp_utf16le_strings(const utf16lechar *s1, size_t n1, - const utf16lechar *s2, size_t n2, - bool ignore_case); +/* tstr(UTF-8) <=> UTF-8 */ +# define tstr_to_utf8 tstr_to_tstr +# define utf8_to_tstr tstr_to_tstr -extern int -cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2, - bool ignore_case); +#endif -/* Convert a string in the platform-dependent encoding to UTF-16LE, but if both - * encodings are UTF-16LE, simply re-use the string. Release with - * tstr_put_utf16le() when done. */ +/* Convert a tchar string to UTF-16LE, but if both encodings are UTF-16LE, then + * simply re-use the string. Release with tstr_put_utf16le() when done. */ static inline int -tstr_get_utf16le_and_len(const tchar *tstr, - const utf16lechar **ustr_ret, size_t *usize_ret) +tstr_get_utf16le_and_len(const tchar *in, + const utf16lechar **out_ret, size_t *out_nbytes_ret) { - size_t tsize = tstrlen(tstr) * sizeof(tchar); + size_t in_nbytes = tstrlen(in) * sizeof(tchar); #if TCHAR_IS_UTF16LE - /* No conversion or copy needed */ - *ustr_ret = tstr; - *usize_ret = tsize; + *out_ret = in; + if (out_nbytes_ret) + *out_nbytes_ret = in_nbytes; return 0; #else - return tstr_to_utf16le(tstr, tsize, (utf16lechar **)ustr_ret, usize_ret); + return tstr_to_utf16le(in, in_nbytes, + (utf16lechar **)out_ret, out_nbytes_ret); #endif } -/* Convert a string in the platform-dependent encoding to UTF-16LE, but if both - * encodings are UTF-16LE, simply re-use the string. Release with - * tstr_put_utf16le() when done. */ static inline int -tstr_get_utf16le(const tchar *tstr, const utf16lechar **ustr_ret) +tstr_get_utf16le(const tchar *in, const utf16lechar **out_ret) { -#if TCHAR_IS_UTF16LE - /* No conversion or copy needed */ - *ustr_ret = tstr; - return 0; -#else - size_t tsize = tstrlen(tstr) * sizeof(tchar); - size_t dummy; - return tstr_to_utf16le(tstr, tsize, (utf16lechar **)ustr_ret, &dummy); -#endif + return tstr_get_utf16le_and_len(in, out_ret, NULL); } /* Release a string acquired with tstr_get_utf16le() or * tstr_get_utf16le_and_len(). */ static inline void -tstr_put_utf16le(const utf16lechar *ustr) +tstr_put_utf16le(const utf16lechar *s) { #if !TCHAR_IS_UTF16LE - FREE((void *)ustr); + FREE((void *)s); #endif } -/* Convert a UTF16-LE string to the platform-dependent encoding, but if both - * encodings are UTF-16LE, simply re-use the string. Release with - * utf16le_put_tstr() when done. */ +/* Convert a UTF-16LE string to a tchar string, but if both encodings are + * UTF-16LE, then simply re-use the string. Release with utf16le_put_tstr() + * when done. */ static inline int -utf16le_get_tstr(const utf16lechar *ustr, size_t usize, - const tchar **tstr_ret, size_t *tsize_ret) +utf16le_get_tstr(const utf16lechar *in, size_t in_nbytes, + const tchar **out_ret, size_t *out_nbytes_ret) { #if TCHAR_IS_UTF16LE - /* No conversion or copy needed */ - *tstr_ret = ustr; - *tsize_ret = usize; + *out_ret = in; + if (out_nbytes_ret) + *out_nbytes_ret = in_nbytes; return 0; #else - return utf16le_to_tstr(ustr, usize, (tchar **)tstr_ret, tsize_ret); + return utf16le_to_tstr(in, in_nbytes, + (tchar **)out_ret, out_nbytes_ret); #endif } /* Release a string acquired with utf16le_get_tstr(). */ static inline void -utf16le_put_tstr(const tchar *tstr) +utf16le_put_tstr(const tchar *s) { #if !TCHAR_IS_UTF16LE - FREE((void *)tstr); + FREE((void *)s); #endif } + +/* UTF-16LE utilities */ + +extern u16 upcase[65536]; + +extern void +init_upcase(void); + +extern int +cmp_utf16le_strings(const utf16lechar *s1, size_t n1, + const utf16lechar *s2, size_t n2, + bool ignore_case); + +extern int +cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2, + bool ignore_case); + +extern utf16lechar * +utf16le_dupz(const void *s, size_t size); + +extern utf16lechar * +utf16le_dup(const utf16lechar *s); + +extern size_t +utf16le_len_bytes(const utf16lechar *s); + +extern size_t +utf16le_len_chars(const utf16lechar *s); + #endif /* _WIMLIB_ENCODING_H */ diff --git a/include/wimlib/ntfs_3g.h b/include/wimlib/ntfs_3g.h index 0ee9da26..0328348f 100644 --- a/include/wimlib/ntfs_3g.h +++ b/include/wimlib/ntfs_3g.h @@ -9,9 +9,6 @@ struct blob_descriptor; struct ntfs_location; struct read_blob_callbacks; -extern void -libntfs3g_global_init(void); - extern int read_ntfs_attribute_prefix(const struct blob_descriptor *blob, u64 size, const struct read_blob_callbacks *cbs); diff --git a/include/wimlib_tchar.h b/include/wimlib_tchar.h index 5a2038f0..2585fa66 100644 --- a/include/wimlib_tchar.h +++ b/include/wimlib_tchar.h @@ -6,9 +6,11 @@ #ifdef __WIN32__ #include -/* For Windows builds, the "tchar" type will be 2 bytes and will be equivalent - * to "wchar_t" and "utf16lechar". All indicate one code unit of a UTF16-LE - * string. */ +/* + * For Windows builds, the "tchar" type will be 2 bytes and will be equivalent + * to "wchar_t" and "utf16lechar". All indicate one coding unit of a string + * encoded in UTF-16LE with the additional possibility of unpaired surrogates. + */ typedef wchar_t tchar; # define TCHAR_IS_UTF16LE 1 # define _T(text) L##text @@ -66,9 +68,11 @@ typedef wchar_t tchar; # define trename win32_rename_replacement # define tglob win32_wglob #else /* __WIN32__ */ -/* For non-Windows builds, the "tchar" type will be one byte and will specify a - * string in the locale-dependent multibyte encoding. However, only UTF-8 is - * well supported in this library. */ +/* + * For non-Windows builds, the "tchar" type will be one byte and will specify a + * string encoded in UTF-8 with the additional possibility of surrogate + * codepoints. + */ typedef char tchar; # define TCHAR_IS_UTF16LE 0 # define T(text) text /* In this case, strings of "tchar" are simply strings of diff --git a/m4/iconv.m4 b/m4/iconv.m4 deleted file mode 100644 index 6a47236c..00000000 --- a/m4/iconv.m4 +++ /dev/null @@ -1,268 +0,0 @@ -# iconv.m4 serial 18 (gettext-0.18.2) -dnl Copyright (C) 2000-2002, 2007-2012 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. - -dnl From Bruno Haible. - -AC_DEFUN([AM_ICONV_LINKFLAGS_BODY], -[ - dnl Prerequisites of AC_LIB_LINKFLAGS_BODY. - AC_REQUIRE([AC_LIB_PREPARE_PREFIX]) - AC_REQUIRE([AC_LIB_RPATH]) - - dnl Search for libiconv and define LIBICONV, LTLIBICONV and INCICONV - dnl accordingly. - AC_LIB_LINKFLAGS_BODY([iconv]) -]) - -AC_DEFUN([AM_ICONV_LINK], -[ - dnl Some systems have iconv in libc, some have it in libiconv (OSF/1 and - dnl those with the standalone portable GNU libiconv installed). - AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles - - dnl Search for libiconv and define LIBICONV, LTLIBICONV and INCICONV - dnl accordingly. - AC_REQUIRE([AM_ICONV_LINKFLAGS_BODY]) - - dnl Add $INCICONV to CPPFLAGS before performing the following checks, - dnl because if the user has installed libiconv and not disabled its use - dnl via --without-libiconv-prefix, he wants to use it. The first - dnl AC_LINK_IFELSE will then fail, the second AC_LINK_IFELSE will succeed. - am_save_CPPFLAGS="$CPPFLAGS" - AC_LIB_APPENDTOVAR([CPPFLAGS], [$INCICONV]) - - AC_CACHE_CHECK([for iconv], [am_cv_func_iconv], [ - am_cv_func_iconv="no, consider installing GNU libiconv" - am_cv_lib_iconv=no - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include -#include - ]], - [[iconv_t cd = iconv_open("",""); - iconv(cd,NULL,NULL,NULL,NULL); - iconv_close(cd);]])], - [am_cv_func_iconv=yes]) - if test "$am_cv_func_iconv" != yes; then - am_save_LIBS="$LIBS" - LIBS="$LIBS $LIBICONV" - AC_LINK_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include -#include - ]], - [[iconv_t cd = iconv_open("",""); - iconv(cd,NULL,NULL,NULL,NULL); - iconv_close(cd);]])], - [am_cv_lib_iconv=yes] - [am_cv_func_iconv=yes]) - LIBS="$am_save_LIBS" - fi - ]) - if test "$am_cv_func_iconv" = yes; then - AC_CACHE_CHECK([for working iconv], [am_cv_func_iconv_works], [ - dnl This tests against bugs in AIX 5.1, AIX 6.1..7.1, HP-UX 11.11, - dnl Solaris 10. - am_save_LIBS="$LIBS" - if test $am_cv_lib_iconv = yes; then - LIBS="$LIBS $LIBICONV" - fi - AC_RUN_IFELSE( - [AC_LANG_SOURCE([[ -#include -#include -int main () -{ - int result = 0; - /* Test against AIX 5.1 bug: Failures are not distinguishable from successful - returns. */ - { - iconv_t cd_utf8_to_88591 = iconv_open ("ISO8859-1", "UTF-8"); - if (cd_utf8_to_88591 != (iconv_t)(-1)) - { - static const char input[] = "\342\202\254"; /* EURO SIGN */ - char buf[10]; - const char *inptr = input; - size_t inbytesleft = strlen (input); - char *outptr = buf; - size_t outbytesleft = sizeof (buf); - size_t res = iconv (cd_utf8_to_88591, - (char **) &inptr, &inbytesleft, - &outptr, &outbytesleft); - if (res == 0) - result |= 1; - iconv_close (cd_utf8_to_88591); - } - } - /* Test against Solaris 10 bug: Failures are not distinguishable from - successful returns. */ - { - iconv_t cd_ascii_to_88591 = iconv_open ("ISO8859-1", "646"); - if (cd_ascii_to_88591 != (iconv_t)(-1)) - { - static const char input[] = "\263"; - char buf[10]; - const char *inptr = input; - size_t inbytesleft = strlen (input); - char *outptr = buf; - size_t outbytesleft = sizeof (buf); - size_t res = iconv (cd_ascii_to_88591, - (char **) &inptr, &inbytesleft, - &outptr, &outbytesleft); - if (res == 0) - result |= 2; - iconv_close (cd_ascii_to_88591); - } - } - /* Test against AIX 6.1..7.1 bug: Buffer overrun. */ - { - iconv_t cd_88591_to_utf8 = iconv_open ("UTF-8", "ISO-8859-1"); - if (cd_88591_to_utf8 != (iconv_t)(-1)) - { - static const char input[] = "\304"; - static char buf[2] = { (char)0xDE, (char)0xAD }; - const char *inptr = input; - size_t inbytesleft = 1; - char *outptr = buf; - size_t outbytesleft = 1; - size_t res = iconv (cd_88591_to_utf8, - (char **) &inptr, &inbytesleft, - &outptr, &outbytesleft); - if (res != (size_t)(-1) || outptr - buf > 1 || buf[1] != (char)0xAD) - result |= 4; - iconv_close (cd_88591_to_utf8); - } - } -#if 0 /* This bug could be worked around by the caller. */ - /* Test against HP-UX 11.11 bug: Positive return value instead of 0. */ - { - iconv_t cd_88591_to_utf8 = iconv_open ("utf8", "iso88591"); - if (cd_88591_to_utf8 != (iconv_t)(-1)) - { - static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; - char buf[50]; - const char *inptr = input; - size_t inbytesleft = strlen (input); - char *outptr = buf; - size_t outbytesleft = sizeof (buf); - size_t res = iconv (cd_88591_to_utf8, - (char **) &inptr, &inbytesleft, - &outptr, &outbytesleft); - if ((int)res > 0) - result |= 8; - iconv_close (cd_88591_to_utf8); - } - } -#endif - /* Test against HP-UX 11.11 bug: No converter from EUC-JP to UTF-8 is - provided. */ - if (/* Try standardized names. */ - iconv_open ("UTF-8", "EUC-JP") == (iconv_t)(-1) - /* Try IRIX, OSF/1 names. */ - && iconv_open ("UTF-8", "eucJP") == (iconv_t)(-1) - /* Try AIX names. */ - && iconv_open ("UTF-8", "IBM-eucJP") == (iconv_t)(-1) - /* Try HP-UX names. */ - && iconv_open ("utf8", "eucJP") == (iconv_t)(-1)) - result |= 16; - return result; -}]])], - [am_cv_func_iconv_works=yes], - [am_cv_func_iconv_works=no], - [ -changequote(,)dnl - case "$host_os" in - aix* | hpux*) am_cv_func_iconv_works="guessing no" ;; - *) am_cv_func_iconv_works="guessing yes" ;; - esac -changequote([,])dnl - ]) - LIBS="$am_save_LIBS" - ]) - case "$am_cv_func_iconv_works" in - *no) am_func_iconv=no am_cv_lib_iconv=no ;; - *) am_func_iconv=yes ;; - esac - else - am_func_iconv=no am_cv_lib_iconv=no - fi - if test "$am_func_iconv" = yes; then - AC_DEFINE([HAVE_ICONV], [1], - [Define if you have the iconv() function and it works.]) - fi - if test "$am_cv_lib_iconv" = yes; then - AC_MSG_CHECKING([how to link with libiconv]) - AC_MSG_RESULT([$LIBICONV]) - else - dnl If $LIBICONV didn't lead to a usable library, we don't need $INCICONV - dnl either. - CPPFLAGS="$am_save_CPPFLAGS" - LIBICONV= - LTLIBICONV= - fi - AC_SUBST([LIBICONV]) - AC_SUBST([LTLIBICONV]) -]) - -dnl Define AM_ICONV using AC_DEFUN_ONCE for Autoconf >= 2.64, in order to -dnl avoid warnings like -dnl "warning: AC_REQUIRE: `AM_ICONV' was expanded before it was required". -dnl This is tricky because of the way 'aclocal' is implemented: -dnl - It requires defining an auxiliary macro whose name ends in AC_DEFUN. -dnl Otherwise aclocal's initial scan pass would miss the macro definition. -dnl - It requires a line break inside the AC_DEFUN_ONCE and AC_DEFUN expansions. -dnl Otherwise aclocal would emit many "Use of uninitialized value $1" -dnl warnings. -m4_define([gl_iconv_AC_DEFUN], - m4_version_prereq([2.64], - [[AC_DEFUN_ONCE( - [$1], [$2])]], - [m4_ifdef([gl_00GNULIB], - [[AC_DEFUN_ONCE( - [$1], [$2])]], - [[AC_DEFUN( - [$1], [$2])]])])) -gl_iconv_AC_DEFUN([AM_ICONV], -[ - AM_ICONV_LINK - if test "$am_cv_func_iconv" = yes; then - AC_MSG_CHECKING([for iconv declaration]) - AC_CACHE_VAL([am_cv_proto_iconv], [ - AC_COMPILE_IFELSE( - [AC_LANG_PROGRAM( - [[ -#include -#include -extern -#ifdef __cplusplus -"C" -#endif -#if defined(__STDC__) || defined(_MSC_VER) || defined(__cplusplus) -size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft); -#else -size_t iconv(); -#endif - ]], - [[]])], - [am_cv_proto_iconv_arg1=""], - [am_cv_proto_iconv_arg1="const"]) - am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);"]) - am_cv_proto_iconv=`echo "[$]am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'` - AC_MSG_RESULT([ - $am_cv_proto_iconv]) - AC_DEFINE_UNQUOTED([ICONV_CONST], [$am_cv_proto_iconv_arg1], - [Define as const if the declaration of iconv() needs const.]) - dnl Also substitute ICONV_CONST in the gnulib generated . - m4_ifdef([gl_ICONV_H_DEFAULTS], - [AC_REQUIRE([gl_ICONV_H_DEFAULTS]) - if test -n "$am_cv_proto_iconv_arg1"; then - ICONV_CONST="const" - fi - ]) - fi -]) diff --git a/programs/imagex.c b/programs/imagex.c index 0f8c45f1..1eb8489b 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -4620,31 +4620,6 @@ main(int argc, tchar **argv) imagex_info_file = stdout; invocation_name = tbasename(argv[0]); -#ifndef __WIN32__ - if (getenv("WIMLIB_IMAGEX_USE_UTF8")) { - init_flags |= WIMLIB_INIT_FLAG_ASSUME_UTF8; - } else { - char *codeset; - - setlocale(LC_ALL, ""); - codeset = nl_langinfo(CODESET); - if (!strstr(codeset, "UTF-8") && - !strstr(codeset, "UTF8") && - !strstr(codeset, "utf-8") && - !strstr(codeset, "utf8")) - { - fprintf(stderr, -"WARNING: Running %"TS" in a UTF-8 locale is recommended!\n" -" Maybe try: `export LANG=en_US.UTF-8'?\n" -" Alternatively, set the environmental variable WIMLIB_IMAGEX_USE_UTF8\n" -" to any value to force wimlib to use UTF-8.\n", - invocation_name); - - } - } - -#endif /* !__WIN32__ */ - { tchar *igcase = tgetenv(T("WIMLIB_IMAGEX_IGNORE_CASE")); if (igcase != NULL) { diff --git a/src/dentry.c b/src/dentry.c index db333849..dac85c35 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -484,7 +484,6 @@ int calculate_dentry_full_path(struct wim_dentry *dentry) { size_t ulen; - size_t dummy; const struct wim_dentry *d; if (dentry->d_full_path) @@ -513,7 +512,7 @@ calculate_dentry_full_path(struct wim_dentry *dentry) wimlib_assert(p == ubuf); return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar), - &dentry->d_full_path, &dummy); + &dentry->d_full_path, NULL); } /* diff --git a/src/encoding.c b/src/encoding.c index 4fd8712b..6d40605b 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -1,9 +1,7 @@ /* - * encoding.c - */ - -/* - * Copyright (C) 2012, 2013 Eric Biggers + * encoding.c - UTF-8 and UTF-16LE codecs and utility functions + * + * Copyright (C) 2012-2016 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -24,386 +22,265 @@ #endif #include -#include -#include #include -#include "wimlib.h" -#include "wimlib/alloca.h" -#include "wimlib/assert.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/list.h" +#include "wimlib/unaligned.h" #include "wimlib/util.h" +/* + * Allow unpaired surrogates, such as might exist in Windows-style filenames --- + * which are normally valid UTF-16LE, but are actually treated as opaque + * sequences of 16-bit WCHARs by Windows. When decoding "UTF-16LE", unpaired + * surrogates will be decoded as their surrogate codepoints; and when encoding + * to and from "UTF-8", the encoding will actually be WTF-8 ("Wobbly + * Transformation Format - 8-bit"), a superset of UTF-8 which permits the + * surrogate codepoints. + * + * In combination with also allowing the "non-character" codepoints U+FFFE and + * U+FFFF, the result is that every Windows-style filename can be translated to + * a UNIX-style filename. + * + * Unfortunately, the converse is not true: not every UNIX filename can be + * translated to a Windows filename. Only UNIX filenames that are valid "WTF-8" + * can be translated. I considered ways to define a bijective mapping, but + * there did not seem to be a straightforward way. The "UTF-8b" scheme, for + * example, would map each invalid byte 'b' to a surrogate "escape code" 'U+DC00 + * + b'. The problem with this was that surrogate escape codes can be combined + * to create a valid UTF-8 sequence, thus breaking the bijection by mapping + * multiple Windows filenames to a single UNIX filename. + */ +#define ALLOW_UNPAIRED_SURROGATES 1 -bool wimlib_mbs_is_utf8 = !TCHAR_IS_UTF16LE; - -/* List of iconv_t conversion descriptors for a specific character conversion. - * The idea is that it is not thread-safe to have just one conversion - * descriptor, but it also is inefficient to open a new conversion descriptor to - * convert every string. Both these problems can be solved by maintaining a - * list of conversion descriptors; then, a thread can use an existing conversion - * descriptor if available. */ -struct iconv_list_head { - const char *from_encoding; - const char *to_encoding; - struct list_head list; - pthread_mutex_t mutex; -}; - -struct iconv_node { - iconv_t cd; - struct list_head list; - struct iconv_list_head *head; -}; - -#define ICONV_LIST(name, from, to) \ -struct iconv_list_head name = { \ - .from_encoding = from, \ - .to_encoding = to, \ -} +#define INVALID_CODEPOINT 0xFFFFFFFF +#define VALIDATE(expr) if (validate && unlikely(!(expr))) goto invalid +#define IS_SURROGATE(c) ((c) >= 0xD800 && (c) < 0xE000) +#define IS_HIGH_SURROGATE(c) ((c) >= 0xD800 && (c) < 0xDC00) +#define IS_LOW_SURROGATE(c) ((c) >= 0xDC00 && (c) < 0xE000) +#define IS_UTF8_TAIL(c) (((c) & 0xC0) == 0x80) -static iconv_t * -get_iconv(struct iconv_list_head *head) +/* + * Decode the next Unicode codepoint from the string at @in, which has + * @remaining >= 1 bytes remaining. Return the number of bytes consumed and + * write the decoded codepoint to *c_ret. + * + * If the input might not be a valid string in the source encoding, then + * @validate must be specified as %true, and then on invalid input the function + * consumes at least one byte and sets *c_ret to INVALID_CODEPOINT. If the + * input is guaranteed to be valid, then @validate may be specified as %false. + */ +typedef unsigned (*decode_codepoint_fn)(const u8 *in, size_t remaining, + bool validate, u32 *c_ret); + +/* Encode the Unicode codepoint @c and return the number of bytes used. */ +typedef unsigned (*encode_codepoint_fn)(u32 c, u8 *out); + +static inline unsigned +utf8_decode_codepoint(const u8 *in, size_t remaining, bool validate, u32 *c_ret) { - iconv_t cd; - iconv_t *cd_p; - struct iconv_node *i; - - pthread_mutex_lock(&head->mutex); - if (list_empty(&head->list)) { - cd = iconv_open(head->to_encoding, head->from_encoding); - if (cd == (iconv_t)-1) { - ERROR_WITH_ERRNO("Failed to open iconv from %s to %s", - head->from_encoding, head->to_encoding); - cd_p = NULL; - } else { - i = MALLOC(sizeof(struct iconv_node)); - if (i) { - i->head = head; - i->cd = cd; - cd_p = &i->cd; - } else { - iconv_close(cd); - cd_p = NULL; - } - } - } else { - i = container_of(head->list.next, struct iconv_node, list); - list_del(head->list.next); - cd_p = &i->cd; + if (likely(in[0] < 0x80)) { /* U+0...U+7F */ + *c_ret = in[0]; + return 1; } - pthread_mutex_unlock(&head->mutex); - return cd_p; -} -static void -put_iconv(iconv_t *cd) -{ - int errno_save = errno; - struct iconv_node *i = container_of(cd, struct iconv_node, cd); - struct iconv_list_head *head = i->head; - - pthread_mutex_lock(&head->mutex); - list_add(&i->list, &head->list); - pthread_mutex_unlock(&head->mutex); - errno = errno_save; -} + if (in[0] < 0xE0) { /* U+80...U+7FF */ + VALIDATE(in[0] >= 0xC2 && remaining >= 2 && + IS_UTF8_TAIL(in[1])); + *c_ret = ((u32)(in[0] & 0x1F) << 6) | + ((u32)(in[1] & 0x3F) << 0); + return 2; + } + + if (in[0] < 0xF0) { /* U+800...U+FFFF, possibly excluding surrogates */ + VALIDATE(remaining >= 3 && + IS_UTF8_TAIL(in[1]) && + IS_UTF8_TAIL(in[2])); + *c_ret = ((u32)(in[0] & 0x0F) << 12) | + ((u32)(in[1] & 0x3F) << 6) | + ((u32)(in[2] & 0x3F) << 0); + VALIDATE(*c_ret >= 0x800); + #if !ALLOW_UNPAIRED_SURROGATES + VALIDATE(!IS_SURROGATE(*c_ret)); + #endif + return 3; + } -#define DEFINE_CHAR_CONVERSION_FUNCTIONS(varname1, longname1, chartype1,\ - varname2, longname2, chartype2,\ - earlyreturn_on_utf8_locale, \ - earlyreturn_expr, \ - worst_case_len_expr, \ - err_return, \ - err_msg, \ - modifier) \ -static ICONV_LIST(iconv_##varname1##_to_##varname2, \ - longname1, longname2); \ - \ -modifier int \ -varname1##_to_##varname2##_nbytes(const chartype1 *in, size_t in_nbytes,\ - size_t *out_nbytes_ret) \ -{ \ - iconv_t *cd = get_iconv(&iconv_##varname1##_to_##varname2); \ - if (cd == NULL) \ - return WIMLIB_ERR_ICONV_NOT_AVAILABLE; \ - \ - chartype2 *buf; \ - size_t bufsize; \ - bool buf_onheap; \ - bufsize = (worst_case_len_expr) * sizeof(chartype2); \ - /* Worst case length */ \ - if (bufsize <= STACK_MAX) { \ - buf = alloca(bufsize); \ - buf_onheap = false; \ - } else { \ - buf = MALLOC(bufsize); \ - if (!buf) \ - return WIMLIB_ERR_NOMEM; \ - buf_onheap = true; \ - } \ - \ - char *inbuf = (char*)in; \ - size_t inbytesleft = in_nbytes; \ - char *outbuf = (char*)buf; \ - size_t outbytesleft = bufsize; \ - size_t len; \ - int ret; \ - \ - len = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); \ - if (len == (size_t)-1) { \ - err_msg; \ - ret = err_return; \ - } else { \ - *out_nbytes_ret = bufsize - outbytesleft; \ - ret = 0; \ - } \ - put_iconv(cd); \ - if (buf_onheap) \ - FREE(buf); \ - return ret; \ -} \ - \ -modifier int \ -varname1##_to_##varname2##_buf(const chartype1 *in, size_t in_nbytes, \ - chartype2 *out) \ -{ \ - iconv_t *cd = get_iconv(&iconv_##varname1##_to_##varname2); \ - if (cd == NULL) \ - return WIMLIB_ERR_ICONV_NOT_AVAILABLE; \ - \ - char *inbuf = (char*)in; \ - size_t inbytesleft = in_nbytes; \ - char *outbuf = (char*)out; \ - const size_t LARGE_NUMBER = 1000000000; \ - size_t outbytesleft = LARGE_NUMBER; \ - size_t len; \ - int ret; \ - \ - len = iconv(*cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); \ - if (len == (size_t)-1) { \ - err_msg; \ - ret = err_return; \ - } else { \ - out[(LARGE_NUMBER-outbytesleft)/sizeof(chartype2)] = 0; \ - ret = 0; \ - } \ - put_iconv(cd); \ - return ret; \ -} \ - \ -modifier int \ -varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes, \ - chartype2 **out_ret, \ - size_t *out_nbytes_ret) \ -{ \ - int ret; \ - chartype2 *out; \ - size_t out_nbytes; \ - \ - if (earlyreturn_on_utf8_locale && wimlib_mbs_is_utf8) { \ - earlyreturn_expr; \ - /* Out same as in */ \ - out = MALLOC(in_nbytes + sizeof(chartype2)); \ - if (!out) \ - return WIMLIB_ERR_NOMEM; \ - memcpy(out, in, in_nbytes); \ - out[in_nbytes / sizeof(chartype2)] = 0; \ - *out_ret = out; \ - *out_nbytes_ret = in_nbytes; \ - return 0; \ - } \ - \ - ret = varname1##_to_##varname2##_nbytes(in, in_nbytes, \ - &out_nbytes); \ - if (ret) \ - return ret; \ - \ - out = MALLOC(out_nbytes + sizeof(chartype2)); \ - if (!out) \ - return WIMLIB_ERR_NOMEM; \ - \ - ret = varname1##_to_##varname2##_buf(in, in_nbytes, out); \ - if (ret) { \ - FREE(out); \ - } else { \ - *out_ret = out; \ - *out_nbytes_ret = out_nbytes; \ - } \ - return ret; \ + /* U+10000...U+10FFFF */ + VALIDATE(in[0] < 0xF8 && remaining >= 4 && + IS_UTF8_TAIL(in[1]) && + IS_UTF8_TAIL(in[2]) && + IS_UTF8_TAIL(in[3])); + *c_ret = ((u32)(in[0] & 0x07) << 18) | + ((u32)(in[1] & 0x3F) << 12) | + ((u32)(in[2] & 0x3F) << 6) | + ((u32)(in[3] & 0x3F) << 0); + VALIDATE(*c_ret >= 0x10000 && *c_ret <= 0x10FFFF); + return 4; + +invalid: + *c_ret = INVALID_CODEPOINT; + return 1; } -#if !TCHAR_IS_UTF16LE - -/* UNIX */ - -DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", tchar, - utf16le, "UTF-16LE", utf16lechar, - false, - , - in_nbytes * 2, - WIMLIB_ERR_INVALID_UTF8_STRING, - ERROR_WITH_ERRNO("Failed to convert UTF-8 string " - "to UTF-16LE string!"), - static) - -DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar, - utf8, "UTF-8", tchar, - false, - , - in_nbytes * 2, - WIMLIB_ERR_INVALID_UTF16_STRING, - ERROR_WITH_ERRNO("Failed to convert UTF-16LE string " - "to UTF-8 string!"), - static) - -DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar, - utf16le, "UTF-16LE", utf16lechar, - true, - return utf8_to_utf16le(in, in_nbytes, out_ret, out_nbytes_ret), - in_nbytes * 2, - WIMLIB_ERR_INVALID_MULTIBYTE_STRING, - ERROR_WITH_ERRNO("Failed to convert multibyte " - "string \"%"TS"\" to UTF-16LE string!", in); - ERROR("If the data you provided was UTF-8, please make sure " - "the character encoding\n" - " of your current locale is UTF-8."), - ) - -DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar, - tstr, "", tchar, - true, - return utf16le_to_utf8(in, in_nbytes, out_ret, out_nbytes_ret), - in_nbytes * 2, - WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE, - ERROR("Failed to convert UTF-16LE string to " - "multibyte string!"); - ERROR("This may be because the UTF-16LE string " - "could not be represented\n" - " in your locale's character encoding."), - ) -#endif +static inline unsigned +utf8_encode_codepoint(u32 c, u8 *out) +{ + if (likely(c < 0x80)) { + out[0] = c; + return 1; + } -/* tchar to UTF-8 and back */ -#if TCHAR_IS_UTF16LE - -/* Windows */ -DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar, - utf8, "UTF-8", char, - false, - , - in_nbytes * 2, - WIMLIB_ERR_INVALID_UTF16_STRING, - ERROR_WITH_ERRNO("Failed to convert UTF-16LE " - "string \"%"TS"\" to UTF-8 string!", in), - ) - -DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char, - tstr, "UTF-16LE", tchar, - false, - , - in_nbytes * 2, - WIMLIB_ERR_INVALID_UTF8_STRING, - ERROR_WITH_ERRNO("Failed to convert UTF-8 string " - "to UTF-16LE string!"), - ) -#else - -/* UNIX */ - -DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar, - utf8, "UTF-8", char, - true, - , - in_nbytes * 4, - WIMLIB_ERR_INVALID_MULTIBYTE_STRING, - ERROR_WITH_ERRNO("Failed to convert multibyte " - "string \"%"TS"\" to UTF-8 string!", in); - ERROR("If the data you provided was UTF-8, please make sure " - "the character\n" - " encoding of your current locale is UTF-8."), - ) - -DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char, - tstr, "", tchar, - true, - , - in_nbytes * 4, - WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE, - ERROR("Failed to convert UTF-8 string to " - "multibyte string!"); - ERROR("This may be because the UTF-8 data " - "could not be represented\n" - " in your locale's character encoding."), - ) -#endif + if (c < 0x800) { + out[0] = 0xC0 | (c >> 6); + out[1] = 0x80 | (c & 0x3F); + return 2; + } -int -tstr_to_utf8_simple(const tchar *tstr, char **out) -{ - size_t out_nbytes; - return tstr_to_utf8(tstr, tstrlen(tstr) * sizeof(tchar), - out, &out_nbytes); + if (c < 0x10000) { + out[0] = 0xE0 | (c >> 12); + out[1] = 0x80 | ((c >> 6) & 0x3F); + out[2] = 0x80 | (c & 0x3F); + return 3; + } + + out[0] = 0xF0 | (c >> 18); + out[1] = 0x80 | ((c >> 12) & 0x3F); + out[2] = 0x80 | ((c >> 6) & 0x3F); + out[3] = 0x80 | (c & 0x3F); + return 4; } -int -utf8_to_tstr_simple(const char *utf8str, tchar **out) +static inline unsigned +utf16le_decode_codepoint(const u8 *in, size_t remaining, bool validate, + u32 *c_ret) { - size_t out_nbytes; - return utf8_to_tstr(utf8str, strlen(utf8str), out, &out_nbytes); + u32 h, l; + + VALIDATE(remaining >= 2); + h = get_unaligned_le16(in); + if (unlikely(IS_SURROGATE(h))) { + /* Surrogate pairs are U+10000...U+10FFFF. + * Unpaired surrogates are U+D800...U+DFFF. */ + #if ALLOW_UNPAIRED_SURROGATES + if (unlikely(!IS_HIGH_SURROGATE(h) || remaining < 4)) + goto unpaired; + l = get_unaligned_le16(in + 2); + if (unlikely(!IS_LOW_SURROGATE(l))) + goto unpaired; + #else + VALIDATE(IS_HIGH_SURROGATE(h) && remaining >= 4); + l = get_unaligned_le16(in + 2); + VALIDATE(IS_LOW_SURROGATE(l)); + #endif + *c_ret = 0x10000 + ((h - 0xD800) << 10) + (l - 0xDC00); + return 4; + } +#if ALLOW_UNPAIRED_SURROGATES +unpaired: +#endif + *c_ret = h; + return 2; + +invalid: + *c_ret = INVALID_CODEPOINT; + return min(remaining, 2); } -static void -iconv_init(struct iconv_list_head *head) +static inline unsigned +utf16le_encode_codepoint(u32 c, u8 *out) { - pthread_mutex_init(&head->mutex, NULL); - INIT_LIST_HEAD(&head->list); + if (likely(c < 0x10000)) { + put_unaligned_le16(c, out); + return 2; + } + c -= 0x10000; + put_unaligned_le16(0xD800 + (c >> 10), out); + put_unaligned_le16(0xDC00 + (c & 0x3FF), out + 2); + return 4; } -static void -iconv_cleanup(struct iconv_list_head *head) +/* + * Convert the string @in of size @in_nbytes from the encoding given by the + * @decode_codepoint function to the encoding given by the @encode_codepoint + * function. @in does not need to be null-terminated, but a null terminator + * will be added to the output string. + * + * On success, write the allocated output string to @out_ret (must not be NULL) + * and its size excluding the null terminator to @out_nbytes_ret (may be NULL). + * + * If the input string is malformed, return @ilseq_err with errno set to EILSEQ. + * If out of memory, return WIMLIB_ERR_NOMEM with errno set to ENOMEM. + */ +static inline int +convert_string(const u8 * const in, const size_t in_nbytes, + u8 **out_ret, size_t *out_nbytes_ret, + int ilseq_err, + decode_codepoint_fn decode_codepoint, + encode_codepoint_fn encode_codepoint) { - pthread_mutex_destroy(&head->mutex); - while (!list_empty(&head->list)) { - struct iconv_node *i; - - i = container_of(head->list.next, struct iconv_node, list); - list_del(&i->list); - iconv_close(i->cd); - FREE(i); + const u8 * const in_end = in + in_nbytes; + const u8 *p_in; + u8 *p_out; + size_t out_nbytes = 0; + u8 *out; + u8 tmp[8]; /* assuming no codepoint requires > 8 bytes to encode */ + u32 c; + + /* Validate the input string and compute the output size. */ + for (p_in = in; p_in != in_end; ) { + p_in += (*decode_codepoint)(p_in, in_end - p_in, true, &c); + if (unlikely(c == INVALID_CODEPOINT)) { + errno = EILSEQ; + return ilseq_err; + } + out_nbytes += (*encode_codepoint)(c, tmp); } + + /* Allocate the output string, including space for a null terminator. */ + out = MALLOC(out_nbytes + (*encode_codepoint)(0, tmp)); + if (unlikely(!out)) + return WIMLIB_ERR_NOMEM; + + /* Do the conversion. */ + for (p_in = in, p_out = out; p_in != in_end; ) { + p_in += (*decode_codepoint)(p_in, in_end - p_in, false, &c); + p_out += (*encode_codepoint)(c, p_out); + } + + /* Add a null terminator. */ + (*encode_codepoint)(0, p_out); + + /* Return the output string and its size (by reference). */ + *out_ret = out; + if (out_nbytes_ret) + *out_nbytes_ret = out_nbytes; + return 0; } -void -iconv_global_init(void) +int +utf8_to_utf16le(const char *in, size_t in_nbytes, + utf16lechar **out_ret, size_t *out_nbytes_ret) { - iconv_init(&iconv_utf8_to_tstr); - iconv_init(&iconv_tstr_to_utf8); -#if !TCHAR_IS_UTF16LE - iconv_init(&iconv_utf16le_to_tstr); - iconv_init(&iconv_tstr_to_utf16le); - iconv_init(&iconv_utf16le_to_utf8); - iconv_init(&iconv_utf8_to_utf16le); -#endif + return convert_string((const u8 *)in, in_nbytes, + (u8 **)out_ret, out_nbytes_ret, + WIMLIB_ERR_INVALID_UTF8_STRING, + utf8_decode_codepoint, utf16le_encode_codepoint); } -void -iconv_global_cleanup(void) +int +utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes, + char **out_ret, size_t *out_nbytes_ret) { - iconv_cleanup(&iconv_utf8_to_tstr); - iconv_cleanup(&iconv_tstr_to_utf8); -#if !TCHAR_IS_UTF16LE - iconv_cleanup(&iconv_utf16le_to_tstr); - iconv_cleanup(&iconv_tstr_to_utf16le); - iconv_cleanup(&iconv_utf16le_to_utf8); - iconv_cleanup(&iconv_utf8_to_utf16le); -#endif + return convert_string((const u8 *)in, in_nbytes, + (u8 **)out_ret, out_nbytes_ret, + WIMLIB_ERR_INVALID_UTF16_STRING, + utf16le_decode_codepoint, utf8_encode_codepoint); } -/* A table that maps from UCS-2 characters to their upper case equivalents. +/* + * A table that maps from UCS-2 characters to their upper case equivalents. * Index and array values are both CPU endian. * Note: this is only an *approximation* of real UTF-16 case folding. */ @@ -484,38 +361,17 @@ init_upcase(void) /* Delta filter */ for (u32 i = 0; i < ARRAY_LEN(upcase); i++) upcase[i] += i; - -#if 0 - /* Sanity checks */ - wimlib_assert(upcase['a'] == 'A'); - wimlib_assert(upcase['A'] == 'A'); - wimlib_assert(upcase['z'] == 'Z'); - wimlib_assert(upcase['Z'] == 'Z'); - wimlib_assert(upcase['1'] == '1'); - wimlib_assert(upcase[0x00e9] == 0x00c9); /* Latin letter e, with acute accent */ - wimlib_assert(upcase[0x00c9] == 0x00c9); - wimlib_assert(upcase[0x03c1] == 0x03a1); /* Greek letter rho */ - wimlib_assert(upcase[0x03a1] == 0x03a1); - wimlib_assert(upcase[0x0436] == 0x0416); /* Cyrillic letter zhe */ - wimlib_assert(upcase[0x0416] == 0x0416); - wimlib_assert(upcase[0x0567] == 0x0537); /* Armenian letter eh */ - wimlib_assert(upcase[0x0537] == 0x0537); - wimlib_assert(upcase[0x24d0] == 0x24b6); /* Circled Latin letter A - (is that a real character???) */ - wimlib_assert(upcase[0x24b6] == 0x24b6); - wimlib_assert(upcase[0x2603] == 0x2603); /* Note to self: Upper case - snowman symbol does not - exist. */ -#endif } -/* Compare UTF-16LE strings case-sensitively (%ignore_case == false) or +/* + * Compare UTF-16LE strings case-sensitively (%ignore_case == false) or * case-insensitively (%ignore_case == true). * * This is implemented using the default upper-case table used by NTFS. It does * not handle all possible cases allowed by UTF-16LE. For example, different * normalizations of the same sequence of "characters" are not considered equal. - * It hopefully does the right thing most of the time though. */ + * It hopefully does the right thing most of the time though. + */ int cmp_utf16le_strings(const utf16lechar *s1, size_t n1, const utf16lechar *s2, size_t n2, @@ -567,32 +423,29 @@ cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2, } } -/* Duplicate a UTF-16LE string. The input string might not be null terminated - * and might be misaligned, but the returned string is guaranteed to be null +/* Duplicate a UTF-16 string. The input string might not be null terminated and + * might be misaligned, but the returned string is guaranteed to be null * terminated and properly aligned. */ utf16lechar * -utf16le_dupz(const void *ustr, size_t usize) +utf16le_dupz(const void *s, size_t size) { - utf16lechar *dup = MALLOC(usize + sizeof(utf16lechar)); + utf16lechar *dup = MALLOC(size + sizeof(utf16lechar)); if (dup) { - memcpy(dup, ustr, usize); - dup[usize / sizeof(utf16lechar)] = 0; + memcpy(dup, s, size); + dup[size / sizeof(utf16lechar)] = 0; } return dup; } -/* Duplicate a null-terminated UTF-16LE string. */ +/* Duplicate a null-terminated UTF-16 string. */ utf16lechar * -utf16le_dup(const utf16lechar *ustr) +utf16le_dup(const utf16lechar *s) { - const utf16lechar *p = ustr; - while (*p++) - ; - return memdup(ustr, (const u8 *)p - (const u8 *)ustr); + return memdup(s, utf16le_len_bytes(s) + sizeof(utf16lechar)); } -/* Return the length, in bytes, of a UTF-null terminated UTF-16 string, - * excluding the null terminator. */ +/* Return the length, in bytes, of a null terminated UTF-16 string, excluding + * the null terminator. */ size_t utf16le_len_bytes(const utf16lechar *s) { @@ -602,7 +455,7 @@ utf16le_len_bytes(const utf16lechar *s) return (p - s) * sizeof(utf16lechar); } -/* Return the length, in UTF-16 coding units, of a UTF-null terminated UTF-16 +/* Return the length, in UTF-16 coding units, of a null terminated UTF-16 * string, excluding the null terminator. */ size_t utf16le_len_chars(const utf16lechar *s) diff --git a/src/error.c b/src/error.c index db2a107f..26fc10a4 100644 --- a/src/error.c +++ b/src/error.c @@ -187,9 +187,6 @@ static const tchar * const error_strings[] = { = T("An error was returned by fuse_main()"), [WIMLIB_ERR_GLOB_HAD_NO_MATCHES] = T("The provided file glob did not match any files"), - [WIMLIB_ERR_ICONV_NOT_AVAILABLE] - = T("The iconv() function does not seem to work. " - "Maybe check to make sure the directory /usr/lib/gconv exists"), [WIMLIB_ERR_IMAGE_COUNT] = T("Inconsistent image count among the metadata " "resources, the WIM header, and/or the XML data"), @@ -215,8 +212,6 @@ static const tchar * const error_strings[] = { = T("An entry in the WIM's lookup table is invalid"), [WIMLIB_ERR_INVALID_METADATA_RESOURCE] = T("The metadata resource is invalid"), - [WIMLIB_ERR_INVALID_MULTIBYTE_STRING] - = T("A string was not valid in the current locale's character encoding"), [WIMLIB_ERR_INVALID_OVERLAY] = T("Conflicting files in overlay when creating a WIM image"), [WIMLIB_ERR_INVALID_PARAM] @@ -230,9 +225,9 @@ static const tchar * const error_strings[] = { [WIMLIB_ERR_INVALID_RESOURCE_HASH] = T("The SHA-1 message digest of a WIM resource did not match the expected value"), [WIMLIB_ERR_INVALID_UTF8_STRING] - = T("A string provided as input by the user was not a valid UTF-8 string"), + = T("A string was not a valid UTF-8 string"), [WIMLIB_ERR_INVALID_UTF16_STRING] - = T("A string in a WIM dentry is not a valid UTF-16LE string"), + = T("A string was not a valid UTF-16 string"), [WIMLIB_ERR_IS_DIRECTORY] = T("One of the specified paths to delete was a directory"), [WIMLIB_ERR_IS_SPLIT_WIM] diff --git a/src/iterate_dir.c b/src/iterate_dir.c index c5f5dad7..ad34e799 100644 --- a/src/iterate_dir.c +++ b/src/iterate_dir.c @@ -50,12 +50,11 @@ stream_to_wimlib_stream_entry(const struct wim_inode *inode, const u8 *hash; if (stream_is_named(strm)) { - size_t dummy; int ret; ret = utf16le_get_tstr(strm->stream_name, utf16le_len_bytes(strm->stream_name), - &wstream->stream_name, &dummy); + &wstream->stream_name, NULL); if (ret) return ret; } @@ -87,7 +86,6 @@ init_wimlib_dentry(struct wimlib_dir_entry *wdentry, struct wim_dentry *dentry, WIMStruct *wim, int flags) { int ret; - size_t dummy; const struct wim_inode *inode = dentry->d_inode; const struct wim_inode_stream *strm; struct wimlib_unix_data unix_data; @@ -95,12 +93,12 @@ init_wimlib_dentry(struct wimlib_dir_entry *wdentry, struct wim_dentry *dentry, u32 object_id_len; ret = utf16le_get_tstr(dentry->d_name, dentry->d_name_nbytes, - &wdentry->filename, &dummy); + &wdentry->filename, NULL); if (ret) return ret; ret = utf16le_get_tstr(dentry->d_short_name, dentry->d_short_name_nbytes, - &wdentry->dos_name, &dummy); + &wdentry->dos_name, NULL); if (ret) return ret; diff --git a/src/mount_image.c b/src/mount_image.c index 18c3a4ef..6f0c6e1c 100644 --- a/src/mount_image.c +++ b/src/mount_image.c @@ -2484,7 +2484,7 @@ wimlib_unmount_image_with_progress(const char *dir, int unmount_flags, int mount_flags; int ret; - ret = wimlib_global_init(WIMLIB_INIT_FLAG_ASSUME_UTF8); + ret = wimlib_global_init(0); if (ret) return ret; diff --git a/src/ntfs-3g_apply.c b/src/ntfs-3g_apply.c index eb48896b..e644da42 100644 --- a/src/ntfs-3g_apply.c +++ b/src/ntfs-3g_apply.c @@ -1082,9 +1082,3 @@ const struct apply_operations ntfs_3g_apply_ops = { .context_size = sizeof(struct ntfs_3g_apply_ctx), .single_tree_only = true, }; - -void -libntfs3g_global_init(void) -{ - ntfs_set_char_encoding(setlocale(LC_ALL, "")); -} diff --git a/src/registry.c b/src/registry.c index 36ecdd39..f7eca9ab 100644 --- a/src/registry.c +++ b/src/registry.c @@ -681,12 +681,11 @@ append_subkey_name(const struct nk *sub_nk, void *_next_subkey_p) subkey[i] = sub_nk->name[i]; subkey[name_size] = '\0'; } else { - size_t dummy; enum hive_status status; status = translate_wimlib_error( utf16le_to_tstr((utf16lechar *)sub_nk->name, - name_size, &subkey, &dummy)); + name_size, &subkey, NULL)); if (status != HIVE_OK) return status; } diff --git a/src/reparse.c b/src/reparse.c index f96fd49b..da8642f8 100644 --- a/src/reparse.c +++ b/src/reparse.c @@ -252,7 +252,7 @@ wim_inode_readlink(const struct wim_inode *inode, char *buf, size_t bufsize, if (parse_link_reparse_point(&rpbuf, rpbuflen, &link)) return -EINVAL; - /* Translate the substitute name to the current multibyte encoding. */ + /* Translate the substitute name to a multibyte string. */ if (utf16le_to_tstr(link.substitute_name, link.substitute_name_nbytes, &target_buffer, &target_len)) return -errno; diff --git a/src/wim.c b/src/wim.c index f6d1f3a5..8c3dbfc4 100644 --- a/src/wim.c +++ b/src/wim.c @@ -40,7 +40,6 @@ #include "wimlib/file_io.h" #include "wimlib/integrity.h" #include "wimlib/metadata.h" -#include "wimlib/ntfs_3g.h" /* for libntfs3g_global_init() */ #include "wimlib/security.h" #include "wimlib/wim.h" #include "wimlib/xml.h" @@ -162,7 +161,7 @@ wimlib_create_new_wim(enum wimlib_compression_type ctype, WIMStruct **wim_ret) int ret; WIMStruct *wim; - ret = wimlib_global_init(WIMLIB_INIT_FLAG_ASSUME_UTF8); + ret = wimlib_global_init(0); if (ret) return ret; @@ -790,7 +789,7 @@ open_wim_as_WIMStruct(const void *wim_filename_or_fd, int open_flags, WIMStruct *wim; int ret; - ret = wimlib_global_init(WIMLIB_INIT_FLAG_ASSUME_UTF8); + ret = wimlib_global_init(0); if (ret) return ret; @@ -936,21 +935,6 @@ wimlib_free(WIMStruct *wim) wim_decrement_refcnt(wim); } -static bool -test_locale_ctype_utf8(void) -{ -#ifdef __WIN32__ - return false; -#else - char *ctype = nl_langinfo(CODESET); - - return (strstr(ctype, "UTF-8") || - strstr(ctype, "UTF8") || - strstr(ctype, "utf8") || - strstr(ctype, "utf-8")); -#endif -} - /* API function documented in wimlib.h */ WIMLIBAPI u32 wimlib_get_version(void) @@ -999,19 +983,11 @@ wimlib_global_init(int init_flags) goto out_unlock; xml_global_init(); - if (!(init_flags & WIMLIB_INIT_FLAG_ASSUME_UTF8)) { - wimlib_mbs_is_utf8 = test_locale_ctype_utf8(); - #ifdef WITH_NTFS_3G - if (!wimlib_mbs_is_utf8) - libntfs3g_global_init(); - #endif - } #ifdef __WIN32__ ret = win32_global_init(init_flags); if (ret) goto out_unlock; #endif - iconv_global_init(); init_upcase(); if (init_flags & WIMLIB_INIT_FLAG_DEFAULT_CASE_SENSITIVE) default_ignore_case = false; @@ -1038,7 +1014,6 @@ wimlib_global_cleanup(void) goto out_unlock; xml_global_cleanup(); - iconv_global_cleanup(); #ifdef __WIN32__ win32_global_cleanup(); #endif diff --git a/src/xml.c b/src/xml.c index 18721b14..3812ce1e 100644 --- a/src/xml.c +++ b/src/xml.c @@ -64,12 +64,14 @@ struct wim_xml_info { /* The number of WIM images (the length of 'images') */ int image_count; +#if TCHAR_IS_UTF16LE /* Temporary memory for UTF-8 => 'tchar' string translations. When an * API function needs to return a 'tchar' string, it uses one of these * array slots to hold the string and returns a pointer to it. */ tchar *strings[128]; size_t next_string_idx; size_t num_strings; +#endif }; /*----------------------------------------------------------------------------* @@ -144,18 +146,21 @@ node_get_timestamp(const xmlNode *node) static int tstr_get_utf8(const tchar *tstr, const xmlChar **utf8_ret) { - if (wimlib_mbs_is_utf8) { - *utf8_ret = (xmlChar *)tstr; - return 0; - } - return tstr_to_utf8_simple(tstr, (char **)utf8_ret); +#if TCHAR_IS_UTF16LE + return utf16le_to_utf8(tstr, tstrlen(tstr) * sizeof(tchar), + (char **)utf8_ret, NULL); +#else + *utf8_ret = (const xmlChar *)tstr; + return 0; +#endif } static void tstr_put_utf8(const xmlChar *utf8) { - if (!wimlib_mbs_is_utf8) - FREE((void *)utf8); +#if TCHAR_IS_UTF16LE + FREE((char *)utf8); +#endif } /* Retrieve the text contents of an XML element as a 'tchar' string. If not @@ -163,26 +168,29 @@ tstr_put_utf8(const xmlChar *utf8) static const tchar * node_get_ttext(struct wim_xml_info *info, xmlNode *node) { - const xmlChar *text; - tchar **ttext_p; + const xmlChar *text = node_get_text(node); - text = node_get_text(node); +#if TCHAR_IS_UTF16LE + tchar **ttext_p; - if (!text || wimlib_mbs_is_utf8) - return (const tchar *)text; + if (!text) + return NULL; ttext_p = &info->strings[info->next_string_idx]; if (info->num_strings >= ARRAY_LEN(info->strings)) { FREE(*ttext_p); *ttext_p = NULL; } - if (utf8_to_tstr_simple(text, ttext_p)) + if (utf8_to_tstr(text, strlen(text), ttext_p, NULL)) return NULL; if (info->num_strings < ARRAY_LEN(info->strings)) info->num_strings++; info->next_string_idx++; info->next_string_idx %= ARRAY_LEN(info->strings); return *ttext_p; +#else + return text; +#endif } /* Unlink the specified node from its parent, then free it (recursively). */ @@ -335,10 +343,12 @@ static struct wim_xml_info * alloc_wim_xml_info(void) { struct wim_xml_info *info = MALLOC(sizeof(*info)); +#if TCHAR_IS_UTF16LE if (info) { info->next_string_idx = 0; info->num_strings = 0; } +#endif return info; } @@ -609,8 +619,10 @@ xml_free_info_struct(struct wim_xml_info *info) if (info) { xmlFreeDoc(info->doc); FREE(info->images); + #if TCHAR_IS_UTF16LE for (size_t i = 0; i < info->num_strings; i++) FREE(info->strings[i]); + #endif FREE(info); } } diff --git a/tools/windeps/Makefile b/tools/windeps/Makefile index 5b7dde52..4f6401fc 100644 --- a/tools/windeps/Makefile +++ b/tools/windeps/Makefile @@ -12,11 +12,9 @@ ARCHITECTURES := i686 x86_64 LIBXML2_VERSION := 2.9.4 WINPTHREADS_VERSION := 4.0.4 -WINICONV_VERSION := 0.0.6 LIBXML_URL := ftp://xmlsoft.org/libxml2/libxml2-$(LIBXML2_VERSION).tar.gz WINPTHREADS_URL := http://downloads.sourceforge.net/mingw-w64/mingw-w64/mingw-w64-release/mingw-w64-v$(WINPTHREADS_VERSION).tar.bz2 -WINICONV_URL := https://github.com/win-iconv/win-iconv/archive/$(WINICONV_VERSION).tar.gz LIBXML_SRCDIR := libxml2-$(LIBXML2_VERSION) @@ -42,17 +40,6 @@ $(WINPTHREADS_SRCDIR):$(WINPTHREADS_DIST) checksums_verified cp $@/COPYING COPYING.winpthreads MAKE_CLEAN_FILES += $(WINPTHREADS_SRCDIR) mingw-w64-v$(WINPTHREADS_VERSION) COPYING.winpthreads -WINICONV_SRCDIR := win-iconv-$(WINICONV_VERSION) -WINICONV_DIST := $(WINICONV_VERSION).tar.gz -SRCDIR_TARGETS += $(WINICONV_SRCDIR) -DIST_TARGETS += $(WINICONV_DIST) -$(WINICONV_DIST): - wget $(WINICONV_URL) -$(WINICONV_SRCDIR):$(WINICONV_DIST) checksums_verified - tar xvf $< -# win-iconv is public domain, so there's no license file. -MAKE_CLEAN_FILES += $(WINICONV_SRCDIR) - checksums_verified:$(DIST_TARGETS) sha256sum -c sha256sums @@ -104,35 +91,12 @@ $(1)_BUILD_TARGETS += winpthreads_$(1) MAKE_CLEAN_FILES += build_winpthreads_$(1) endef -# -# declare_winiconv_target(arch) -# -define declare_winiconv_target -winiconv_$(1):$(WINICONV_SRCDIR) - builddir=build_winiconv_$(1); \ - rm -rf $$$$builddir; \ - cp -r $(WINICONV_SRCDIR) $$$$builddir; \ - cd $$$$builddir; \ - $(MAKE) CC=$(1)-w64-mingw32-gcc \ - AR=$(1)-w64-mingw32-ar \ - RANLIB=$(1)-w64-mingw32-ranlib \ - DLLTOOL=$(1)-w64-mingw32-dlltool \ - CFLAGS=-O2 \ - prefix=$$$$PWD/../sysroot_$(1) \ - install; \ - rm -f ../sysroot_$(1)/lib/libiconv.dll.a; - -$(1)_BUILD_TARGETS += winiconv_$(1) -MAKE_CLEAN_FILES += build_winiconv_$(1) -endef - # # declare_arch_targets(arch) # define declare_arch_targets $(eval $(call declare_libxml_target,$(1))) $(eval $(call declare_winpthreads_target,$(1))) -$(eval $(call declare_winiconv_target,$(1))) sysroot_$(1): $($(1)_BUILD_TARGETS) diff --git a/tools/windeps/sha256sums b/tools/windeps/sha256sums index 038e2a07..1e940d8d 100644 --- a/tools/windeps/sha256sums +++ b/tools/windeps/sha256sums @@ -1,3 +1,2 @@ ffb911191e509b966deb55de705387f14156e1a56b21824357cdf0053233633c libxml2-2.9.4.tar.gz 89356a0aa8cf9f8b9dc8d92bc8dd01a131d4750c3acb30c6350a406316c42199 mingw-w64-v4.0.4.tar.bz2 -d464bbe0410f72b09f301bead9f1cf091e6aa15e97323961ecb9242c0e7f609b 0.0.6.tar.gz -- 2.43.0