implement WIMLIB_INIT_FLAG_ASSUME_UTF8
authorEric Biggers <ebiggers3@gmail.com>
Tue, 9 Apr 2013 15:55:50 +0000 (10:55 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Tue, 9 Apr 2013 15:55:50 +0000 (10:55 -0500)
Makefile.am
programs/imagex.c
src/dentry.c
src/dentry.h
src/encoding.c
src/extract_image.c
src/wim.c
src/wimlib.h

index 5aa4563..24c0764 100644 (file)
@@ -6,7 +6,7 @@ AM_CFLAGS       = -std=gnu99 -fno-strict-aliasing
 
 lib_LTLIBRARIES = libwim.la
 
-libwim_la_LDFLAGS = -version-info 6:0:1 $(WINDOWS_LDFLAGS)
+libwim_la_LDFLAGS = -version-info 6:0:0 $(WINDOWS_LDFLAGS)
 
 libwim_la_SOURCES =            \
        src/add_image.c         \
index 372e115..c74bdb2 100644 (file)
@@ -2526,11 +2526,16 @@ main(int argc, char **argv)
 {
        const struct imagex_command *cmd;
        int ret;
+       int init_flags = 0;
 
 #ifndef __WIN32__
-       setlocale(LC_ALL, "");
-       {
-               char *codeset = nl_langinfo(CODESET);
+       if (getenv("WIMLIB_IMAGEX_USE_UTF8")) {
+               init_flags |= WIMLIB_INIT_FLAG_ASSUME_UTF8;
+       } else {
+               char *codeset;
+
+               setlocale(LC_ALL, "");
+               codeset = nl_langinfo(CODESET);
                if (!strstr(codeset, "UTF-8") &&
                    !strstr(codeset, "UTF8") &&
                    !strstr(codeset, "utf-8") &&
@@ -2538,7 +2543,10 @@ main(int argc, char **argv)
                {
                        fputs(
 "WARNING: Running "IMAGEX_PROGNAME" in a UTF-8 locale is recommended!\n"
-"         (Maybe try: `export LANG=en_US.UTF-8'?\n", stderr);
+"         Maybe try: `export LANG=en_US.UTF-8'?\n"
+"         Alternatively, set the environmental variable WIMLIB_IMAGEX_USE_UTF8\n"
+"         to any value to force wimlib to use UTF-8.\n",
+                       stderr);
 
                }
        }
@@ -2561,7 +2569,7 @@ main(int argc, char **argv)
        wimlib_set_print_errors(true);
 
        /* Do any initializations that the library needs */
-       ret = wimlib_global_init();
+       ret = wimlib_global_init(init_flags);
        if (ret)
                goto out_check_status;
 
index 97ad628..e9826b5 100644 (file)
@@ -275,7 +275,7 @@ for_dentry_in_tree_depth(struct wim_dentry *root,
 
 /* Calculate the full path of @dentry.  The full path of its parent must have
  * already been calculated, or it must be the root dentry. */
-static int
+int
 calculate_dentry_full_path(struct wim_dentry *dentry)
 {
        tchar *full_path;
@@ -573,12 +573,12 @@ get_parent_dentry(WIMStruct *w, const tchar *path)
 int
 print_dentry_full_path(struct wim_dentry *dentry, void *_ignore)
 {
-       tchar *full_path = dentry_full_path(dentry);
-       if (!full_path)
-               return WIMLIB_ERR_NOMEM;
-       tprintf(T("%"TS"\n"), full_path);
-       FREE(full_path);
-       dentry->_full_path = 0;
+       int ret = calculate_dentry_full_path(dentry);
+       if (ret)
+               return ret;
+       tprintf(T("%"TS"\n"), dentry->_full_path);
+       FREE(dentry->_full_path);
+       dentry->_full_path = NULL;
        dentry->full_path_nbytes = 0;
        return 0;
 }
index 5f7ccd5..0021eea 100644 (file)
@@ -372,6 +372,9 @@ print_dentry(struct wim_dentry *dentry, void *lookup_table);
 extern int
 print_dentry_full_path(struct wim_dentry *entry, void *ignore);
 
+extern int
+calculate_dentry_full_path(struct wim_dentry *dentry);
+
 extern tchar *
 dentry_full_path(struct wim_dentry *dentry);
 
index 822e9f5..7d26004 100644 (file)
@@ -112,7 +112,8 @@ static bool error_message_being_printed = false;
 
 #define DEFINE_CHAR_CONVERSION_FUNCTIONS(varname1, longname1, chartype1,\
                                         varname2, longname2, chartype2,\
-                                        earlyreturn,                   \
+                                        earlyreturn_on_utf8_locale,    \
+                                        earlyreturn_expr,              \
                                         worst_case_len_expr,           \
                                         err_return,                    \
                                         err_msg,                       \
@@ -194,7 +195,8 @@ varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes,             \
        chartype2 *out;                                                 \
        size_t out_nbytes;                                              \
                                                                        \
-       if (earlyreturn) {                                              \
+       if (earlyreturn_on_utf8_locale && wimlib_mbs_is_utf8) {         \
+               earlyreturn_expr;                                       \
                /* Out same as in */                                    \
                out = MALLOC(in_nbytes + sizeof(chartype2));            \
                if (!out)                                               \
@@ -217,9 +219,7 @@ varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes,             \
                                                                        \
        ret = varname1##_to_##varname2##_buf(in, in_nbytes, out);       \
        if (ret) {                                                      \
-               int errno_save = errno;                                 \
                FREE(out);                                              \
-               errno = errno_save;                                     \
        } else {                                                        \
                *out_ret = out;                                         \
                *out_nbytes_ret = out_nbytes;                           \
@@ -228,35 +228,64 @@ varname1##_to_##varname2(const chartype1 *in, size_t in_nbytes,           \
 }
 
 #if !TCHAR_IS_UTF16LE
-DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar,
+
+/* UNIX */
+
+DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", tchar,
                                 utf16le, "UTF-16LE", utf16lechar,
                                 false,
-                                in_nbytes * 4,
+                                ,
+                                in_nbytes * 2,
+                                WIMLIB_ERR_INVALID_UTF8_STRING,
+                                ERROR_WITH_ERRNO("Failed to convert UTF-8 string "
+                                                 "to UTF-16LE string!"),
+                                static)
+
+DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar,
+                                utf8, "UTF-8", tchar,
+                                false,
+                                ,
+                                in_nbytes * 2,
+                                WIMLIB_ERR_INVALID_UTF16_STRING,
+                                ERROR_WITH_ERRNO("Failed to convert UTF-16LE string "
+                                                 "to UTF-8 string!"),
+                                static)
+
+DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar,
+                                utf16le, "UTF-16LE", utf16lechar,
+                                true,
+                                return utf8_to_utf16le(in, in_nbytes, out_ret, out_nbytes_ret),
+                                in_nbytes * 2,
                                 WIMLIB_ERR_INVALID_MULTIBYTE_STRING,
                                 ERROR_WITH_ERRNO("Failed to convert multibyte "
                                                  "string \"%"TS"\" to UTF-16LE string!", in);
                                 ERROR("If the data you provided was UTF-8, please make sure "
-                                      "the character encoding of your current locale is UTF-8."),
+                                      "the character encoding\n"
+                                      "        of your current locale is UTF-8."),
                                 )
 
 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf16le, "UTF-16LE", utf16lechar,
                                 tstr, "", tchar,
-                                false,
+                                true,
+                                return utf16le_to_utf8(in, in_nbytes, out_ret, out_nbytes_ret),
                                 in_nbytes * 2,
                                 WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE,
                                 ERROR("Failed to convert UTF-16LE string to "
                                       "multibyte string!");
                                 ERROR("This may be because the UTF-16LE string "
-                                      "could not be represented in your "
-                                      "locale's character encoding."),
+                                      "could not be represented\n"
+                                      "        in your locale's character encoding."),
                                 )
 #endif
 
 /* tchar to UTF-8 and back */
 #if TCHAR_IS_UTF16LE
+
+/* Windows */
 DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar,
                                 utf8, "UTF-8", char,
                                 false,
+                                ,
                                 in_nbytes * 2,
                                 WIMLIB_ERR_INVALID_UTF16_STRING,
                                 ERROR_WITH_ERRNO("Failed to convert UTF-16LE "
@@ -266,33 +295,40 @@ DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "UTF-16LE", tchar,
 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
                                 tstr, "UTF-16LE", tchar,
                                 false,
+                                ,
                                 in_nbytes * 2,
                                 WIMLIB_ERR_INVALID_UTF8_STRING,
                                 ERROR_WITH_ERRNO("Failed to convert UTF-8 string "
                                                  "to UTF-16LE string!"),
                                 static)
 #else
+
+/* UNIX */
+
 DEFINE_CHAR_CONVERSION_FUNCTIONS(tstr, "", tchar,
                                 utf8, "UTF-8", char,
-                                wimlib_mbs_is_utf8,
+                                true,
+                                ,
                                 in_nbytes * 4,
                                 WIMLIB_ERR_INVALID_MULTIBYTE_STRING,
                                 ERROR_WITH_ERRNO("Failed to convert multibyte "
                                                  "string \"%"TS"\" to UTF-8 string!", in);
                                 ERROR("If the data you provided was UTF-8, please make sure "
-                                      "the character encoding of your current locale is UTF-8."),
+                                      "the character\n"
+                                      "        encoding of your current locale is UTF-8."),
                                 static)
 
 DEFINE_CHAR_CONVERSION_FUNCTIONS(utf8, "UTF-8", char,
                                 tstr, "", tchar,
-                                wimlib_mbs_is_utf8,
+                                true,
+                                ,
                                 in_nbytes * 4,
                                 WIMLIB_ERR_UNICODE_STRING_NOT_REPRESENTABLE,
                                 ERROR("Failed to convert UTF-8 string to "
                                       "multibyte string!");
                                 ERROR("This may be because the UTF-8 data "
-                                      "could not be represented in your "
-                                      "locale's character encoding."),
+                                      "could not be represented\n"
+                                      "        in your locale's character encoding."),
                                 static)
 #endif
 
index b18270d..fc24d9b 100644 (file)
@@ -111,10 +111,8 @@ extract_regular_file_linked(struct wim_dentry *dentry,
                const char *p2;
                size_t i;
 
-               num_path_components =
-                       get_num_path_components(dentry_full_path(dentry)) - 1;
-               num_output_dir_path_components =
-                       get_num_path_components(args->target);
+               num_path_components = get_num_path_components(output_path) - 1;
+               num_output_dir_path_components = get_num_path_components(args->target);
 
                if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
                        num_path_components++;
@@ -494,13 +492,12 @@ apply_dentry_normal(struct wim_dentry *dentry, void *arg)
        struct apply_args *args = arg;
        tchar *output_path;
        size_t len;
+       int ret;
 
        len = tstrlen(args->target);
        if (dentry_is_root(dentry)) {
                output_path = (tchar*)args->target;
        } else {
-               if (!dentry_full_path(dentry))
-                       return WIMLIB_ERR_NOMEM;
                output_path = alloca(len * sizeof(tchar) + dentry->full_path_nbytes +
                                     sizeof(tchar));
                memcpy(output_path, args->target, len * sizeof(tchar));
@@ -557,8 +554,9 @@ maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
        if (dentry->is_extracted)
                return 0;
 
-       if (!dentry_full_path(dentry))
-               return WIMLIB_ERR_NOMEM;
+       ret = calculate_dentry_full_path(dentry);
+       if (ret)
+               return ret;
 
        if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS)
                if (inode_unnamed_lte_resolved(dentry->d_inode))
@@ -566,7 +564,7 @@ maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
 
        if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
             args->progress_func) {
-               args->progress.extract.cur_path = dentry_full_path(dentry);
+               args->progress.extract.cur_path = dentry->_full_path;
                args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
                                    &args->progress);
        }
index 342281d..a5044c6 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -690,16 +690,17 @@ test_locale_ctype_utf8()
 #endif
 }
 
-/* Get global memory allocations out of the way,
- * single-threaded programs like 'imagex'. */
 WIMLIBAPI int
-wimlib_global_init()
+wimlib_global_init(int init_flags)
 {
        libxml_global_init();
-#ifdef WITH_NTFS_3G
-       libntfs3g_global_init();
-#endif
-       wimlib_mbs_is_utf8 = test_locale_ctype_utf8();
+       if (!(init_flags & WIMLIB_INIT_FLAG_ASSUME_UTF8)) {
+               wimlib_mbs_is_utf8 = test_locale_ctype_utf8();
+       #ifdef WITH_NTFS_3G
+               if (!wimlib_mbs_is_utf8)
+                       libntfs3g_global_init();
+       #endif
+       }
 #ifdef __WIN32__
        win32_global_init();
 #endif
index 50f9252..841bc91 100644 (file)
@@ -828,6 +828,10 @@ struct wimlib_capture_config {
  * deleting an image in this way. */
 #define WIMLIB_WRITE_FLAG_SOFT_DELETE                  0x00000010
 
+/** Assume that strings are represented in UTF-8, even if this is not the
+ * locale's character encoding. */
+#define WIMLIB_INIT_FLAG_ASSUME_UTF8                   0x00000001
+
 
 #if 0
 /****************************************************************
@@ -1562,17 +1566,24 @@ wimlib_get_part_number(const WIMStruct *wim, int *total_parts_ret);
  * threads, then you must call this function serially first.
  *
  * Since wimlib 1.3.0, you must call this function if the character encoding of
- * the current locale is not UTF-8.
+ * the current locale is not UTF-8 and you do not want wimlib to assume a UTF-8
+ * encoding.
  *
  * Since wimlib 1.3.2, you must call this function if using the Windows-native
  * build of the library so that certain functions can be dynamically loaded from
  * system DLLs.
  *
- * This function currently always returns 0, but it may return other error codes
- * in future releases.
+ * Since wimlib 1.3.3, this function takes the @a init_flags parameter.
+ *
+ * @param init_flags
+ *     ::WIMLIB_INIT_FLAG_ASSUME_UTF8 if wimlib should assume that all input
+ *     data, including filenames, are in UTF-8, and that UTF-8 data can be
+ *     directly printed to the console.
+ *
+ * @return 0; other error codes may be returned in future releases.
  */
 extern int
-wimlib_global_init();
+wimlib_global_init(int init_flags);
 
 /**
  * Since wimlib 1.2.6:  Cleanup function for wimlib.  This is not re-entrant.