From cf373e59a7f6ff7d1fd007c1f22defe508aa67d4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 9 Apr 2013 10:55:50 -0500 Subject: [PATCH 1/1] implement WIMLIB_INIT_FLAG_ASSUME_UTF8 --- Makefile.am | 2 +- programs/imagex.c | 18 +++++++++---- src/dentry.c | 14 +++++----- src/dentry.h | 3 +++ src/encoding.c | 66 ++++++++++++++++++++++++++++++++++----------- src/extract_image.c | 16 +++++------ src/wim.c | 15 ++++++----- src/wimlib.h | 19 ++++++++++--- 8 files changed, 105 insertions(+), 48 deletions(-) diff --git a/Makefile.am b/Makefile.am index 5aa4563d..24c07649 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,7 +6,7 @@ AM_CFLAGS = -std=gnu99 -fno-strict-aliasing lib_LTLIBRARIES = libwim.la -libwim_la_LDFLAGS = -version-info 6:0:1 $(WINDOWS_LDFLAGS) +libwim_la_LDFLAGS = -version-info 6:0:0 $(WINDOWS_LDFLAGS) libwim_la_SOURCES = \ src/add_image.c \ diff --git a/programs/imagex.c b/programs/imagex.c index 372e115b..c74bdb25 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -2526,11 +2526,16 @@ main(int argc, char **argv) { const struct imagex_command *cmd; int ret; + int init_flags = 0; #ifndef __WIN32__ - setlocale(LC_ALL, ""); - { - char *codeset = nl_langinfo(CODESET); + if (getenv("WIMLIB_IMAGEX_USE_UTF8")) { + init_flags |= WIMLIB_INIT_FLAG_ASSUME_UTF8; + } else { + char *codeset; + + setlocale(LC_ALL, ""); + codeset = nl_langinfo(CODESET); if (!strstr(codeset, "UTF-8") && !strstr(codeset, "UTF8") && !strstr(codeset, "utf-8") && @@ -2538,7 +2543,10 @@ main(int argc, char **argv) { fputs( "WARNING: Running "IMAGEX_PROGNAME" in a UTF-8 locale is recommended!\n" -" (Maybe try: `export LANG=en_US.UTF-8'?\n", stderr); +" Maybe try: `export LANG=en_US.UTF-8'?\n" +" Alternatively, set the environmental variable WIMLIB_IMAGEX_USE_UTF8\n" +" to any value to force wimlib to use UTF-8.\n", + stderr); } } @@ -2561,7 +2569,7 @@ main(int argc, char **argv) wimlib_set_print_errors(true); /* Do any initializations that the library needs */ - ret = wimlib_global_init(); + ret = wimlib_global_init(init_flags); if (ret) goto out_check_status; diff --git a/src/dentry.c b/src/dentry.c index 97ad628f..e9826b55 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -275,7 +275,7 @@ for_dentry_in_tree_depth(struct wim_dentry *root, /* Calculate the full path of @dentry. The full path of its parent must have * already been calculated, or it must be the root dentry. */ -static int +int calculate_dentry_full_path(struct wim_dentry *dentry) { tchar *full_path; @@ -573,12 +573,12 @@ get_parent_dentry(WIMStruct *w, const tchar *path) int print_dentry_full_path(struct wim_dentry *dentry, void *_ignore) { - tchar *full_path = dentry_full_path(dentry); - if (!full_path) - return WIMLIB_ERR_NOMEM; - tprintf(T("%"TS"\n"), full_path); - FREE(full_path); - dentry->_full_path = 0; + int ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; + tprintf(T("%"TS"\n"), dentry->_full_path); + FREE(dentry->_full_path); + dentry->_full_path = NULL; dentry->full_path_nbytes = 0; return 0; } diff --git a/src/dentry.h b/src/dentry.h index 5f7ccd5b..0021eeae 100644 --- a/src/dentry.h +++ b/src/dentry.h @@ -372,6 +372,9 @@ print_dentry(struct wim_dentry *dentry, void *lookup_table); extern int print_dentry_full_path(struct wim_dentry *entry, void *ignore); +extern int +calculate_dentry_full_path(struct wim_dentry *dentry); + extern tchar * dentry_full_path(struct wim_dentry *dentry); diff --git a/src/encoding.c b/src/encoding.c index 822e9f53..7d260046 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -112,7 +112,8 @@ static bool error_message_being_printed = false; #define DEFINE_CHAR_CONVERSION_FUNCTIONS(varname1, longname1, chartype1,\ varname2, longname2, chartype2,\ - earlyreturn, \ + earlyreturn_on_utf8_locale, \ + earlyreturn_expr, \ worst_case_len_expr, \ err_return, \ err_msg, \ @@ -194,7 +195,8 @@ varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes, \ chartype2 *out; \ size_t out_nbytes; \ \ - if (earlyreturn) { \ + if (earlyreturn_on_utf8_locale && wimlib_mbs_is_utf8) { \ + earlyreturn_expr; \ /* Out same as in */ \ out = MALLOC(in_nbytes + sizeof(chartype2)); \ if (!out) \ @@ -217,9 +219,7 @@ varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes, \ \ ret = varname1##_to_##varname2##_buf(in, in_nbytes, out); \ if (ret) { \ - int errno_save = errno; \ FREE(out); \ - errno = errno_save; \ } else { \ *out_ret = out; \ *out_nbytes_ret = out_nbytes; \ @@ -228,35 +228,64 @@ varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes, \ } #if !TCHAR_IS_UTF16LE -DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar, + +/* UNIX */ + +DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", tchar, utf16le, "UTF-16LE", utf16lechar, false, - in_nbytes * 4, + , + in_nbytes * 2, + WIMLIB_ERR_INVALID_UTF8_STRING, + ERROR_WITH_ERRNO("Failed to convert UTF-8 string " + "to UTF-16LE string!"), + static) + +DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar, + utf8, "UTF-8", tchar, + false, + , + in_nbytes * 2, + WIMLIB_ERR_INVALID_UTF16_STRING, + ERROR_WITH_ERRNO("Failed to convert UTF-16LE string " + "to UTF-8 string!"), + static) + +DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar, + utf16le, "UTF-16LE", utf16lechar, + true, + return utf8_to_utf16le(in, in_nbytes, out_ret, out_nbytes_ret), + in_nbytes * 2, WIMLIB_ERR_INVALID_MULTIBYTE_STRING, ERROR_WITH_ERRNO("Failed to convert multibyte " "string \"%"TS"\" to UTF-16LE string!", in); ERROR("If the data you provided was UTF-8, please make sure " - "the character encoding of your current locale is UTF-8."), + "the character encoding\n" + " of your current locale is UTF-8."), ) DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar, tstr, "", tchar, - false, + true, + return utf16le_to_utf8(in, in_nbytes, out_ret, out_nbytes_ret), in_nbytes * 2, WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE, ERROR("Failed to convert UTF-16LE string to " "multibyte string!"); ERROR("This may be because the UTF-16LE string " - "could not be represented in your " - "locale's character encoding."), + "could not be represented\n" + " in your locale's character encoding."), ) #endif /* tchar to UTF-8 and back */ #if TCHAR_IS_UTF16LE + +/* Windows */ DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar, utf8, "UTF-8", char, false, + , in_nbytes * 2, WIMLIB_ERR_INVALID_UTF16_STRING, ERROR_WITH_ERRNO("Failed to convert UTF-16LE " @@ -266,33 +295,40 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar, DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char, tstr, "UTF-16LE", tchar, false, + , in_nbytes * 2, WIMLIB_ERR_INVALID_UTF8_STRING, ERROR_WITH_ERRNO("Failed to convert UTF-8 string " "to UTF-16LE string!"), static) #else + +/* UNIX */ + DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar, utf8, "UTF-8", char, - wimlib_mbs_is_utf8, + true, + , in_nbytes * 4, WIMLIB_ERR_INVALID_MULTIBYTE_STRING, ERROR_WITH_ERRNO("Failed to convert multibyte " "string \"%"TS"\" to UTF-8 string!", in); ERROR("If the data you provided was UTF-8, please make sure " - "the character encoding of your current locale is UTF-8."), + "the character\n" + " encoding of your current locale is UTF-8."), static) DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char, tstr, "", tchar, - wimlib_mbs_is_utf8, + true, + , in_nbytes * 4, WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE, ERROR("Failed to convert UTF-8 string to " "multibyte string!"); ERROR("This may be because the UTF-8 data " - "could not be represented in your " - "locale's character encoding."), + "could not be represented\n" + " in your locale's character encoding."), static) #endif diff --git a/src/extract_image.c b/src/extract_image.c index b18270df..fc24d9bd 100644 --- a/src/extract_image.c +++ b/src/extract_image.c @@ -111,10 +111,8 @@ extract_regular_file_linked(struct wim_dentry *dentry, const char *p2; size_t i; - num_path_components = - get_num_path_components(dentry_full_path(dentry)) - 1; - num_output_dir_path_components = - get_num_path_components(args->target); + num_path_components = get_num_path_components(output_path) - 1; + num_output_dir_path_components = get_num_path_components(args->target); if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) { num_path_components++; @@ -494,13 +492,12 @@ apply_dentry_normal(struct wim_dentry *dentry, void *arg) struct apply_args *args = arg; tchar *output_path; size_t len; + int ret; len = tstrlen(args->target); if (dentry_is_root(dentry)) { output_path = (tchar*)args->target; } else { - if (!dentry_full_path(dentry)) - return WIMLIB_ERR_NOMEM; output_path = alloca(len * sizeof(tchar) + dentry->full_path_nbytes + sizeof(tchar)); memcpy(output_path, args->target, len * sizeof(tchar)); @@ -557,8 +554,9 @@ maybe_apply_dentry(struct wim_dentry *dentry, void *arg) if (dentry->is_extracted) return 0; - if (!dentry_full_path(dentry)) - return WIMLIB_ERR_NOMEM; + ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS) if (inode_unnamed_lte_resolved(dentry->d_inode)) @@ -566,7 +564,7 @@ maybe_apply_dentry(struct wim_dentry *dentry, void *arg) if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) && args->progress_func) { - args->progress.extract.cur_path = dentry_full_path(dentry); + args->progress.extract.cur_path = dentry->_full_path; args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY, &args->progress); } diff --git a/src/wim.c b/src/wim.c index 342281d3..a5044c68 100644 --- a/src/wim.c +++ b/src/wim.c @@ -690,16 +690,17 @@ test_locale_ctype_utf8() #endif } -/* Get global memory allocations out of the way, - * single-threaded programs like 'imagex'. */ WIMLIBAPI int -wimlib_global_init() +wimlib_global_init(int init_flags) { libxml_global_init(); -#ifdef WITH_NTFS_3G - libntfs3g_global_init(); -#endif - wimlib_mbs_is_utf8 = test_locale_ctype_utf8(); + if (!(init_flags & WIMLIB_INIT_FLAG_ASSUME_UTF8)) { + wimlib_mbs_is_utf8 = test_locale_ctype_utf8(); + #ifdef WITH_NTFS_3G + if (!wimlib_mbs_is_utf8) + libntfs3g_global_init(); + #endif + } #ifdef __WIN32__ win32_global_init(); #endif diff --git a/src/wimlib.h b/src/wimlib.h index 50f92521..841bc912 100644 --- a/src/wimlib.h +++ b/src/wimlib.h @@ -828,6 +828,10 @@ struct wimlib_capture_config { * deleting an image in this way. */ #define WIMLIB_WRITE_FLAG_SOFT_DELETE 0x00000010 +/** Assume that strings are represented in UTF-8, even if this is not the + * locale's character encoding. */ +#define WIMLIB_INIT_FLAG_ASSUME_UTF8 0x00000001 + #if 0 /**************************************************************** @@ -1562,17 +1566,24 @@ wimlib_get_part_number(const WIMStruct *wim, int *total_parts_ret); * threads, then you must call this function serially first. * * Since wimlib 1.3.0, you must call this function if the character encoding of - * the current locale is not UTF-8. + * the current locale is not UTF-8 and you do not want wimlib to assume a UTF-8 + * encoding. * * Since wimlib 1.3.2, you must call this function if using the Windows-native * build of the library so that certain functions can be dynamically loaded from * system DLLs. * - * This function currently always returns 0, but it may return other error codes - * in future releases. + * Since wimlib 1.3.3, this function takes the @a init_flags parameter. + * + * @param init_flags + * ::WIMLIB_INIT_FLAG_ASSUME_UTF8 if wimlib should assume that all input + * data, including filenames, are in UTF-8, and that UTF-8 data can be + * directly printed to the console. + * + * @return 0; other error codes may be returned in future releases. */ extern int -wimlib_global_init(); +wimlib_global_init(int init_flags); /** * Since wimlib 1.2.6: Cleanup function for wimlib. This is not re-entrant. -- 2.43.0