Improve handling of invalid filenames
authorEric Biggers <ebiggers3@gmail.com>
Mon, 20 May 2013 16:12:24 +0000 (11:12 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 20 May 2013 16:12:24 +0000 (11:12 -0500)
12 files changed:
include/wimlib.h
include/wimlib/apply.h
include/wimlib/dentry.h
include/wimlib/list.h
include/wimlib/util.h
include/wimlib_tchar.h
programs/imagex.c
src/dentry.c
src/extract.c
src/ntfs-3g_apply.c
src/unix_apply.c
src/win32_apply.c

index 906a15c..02f7473 100644 (file)
@@ -722,6 +722,18 @@ struct wimlib_capture_config {
 /** Extract files to standard output rather than to the filesystem. */
 #define WIMLIB_EXTRACT_FLAG_TO_STDOUT                  0x00000400
 
+/** Instead of ignoring files and directories with names that cannot be
+ * represented on the current platform (note: Windows has more restrictions on
+ * filenames than UNIX), try to replace characters or append junk to the names
+ * so that they can be extracted in some form. */
+#define WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES  0x00000800
+
+/** On Windows, when there exist two or more files with the same case
+ * insensitive name (but different case sensitive names), try to extract them
+ * all by appending junk to the end of them, rather than extracting an
+ * arbitrarily only one.  */
+#define WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS         0x00001000
+
 /******************************
  * WIMLIB_MOUNT_FLAG_*
  ******************************/
index 44773bd..ed2f07a 100644 (file)
@@ -12,8 +12,8 @@ struct apply_args {
        WIMStruct *w;
        const tchar *target;
        unsigned target_nchars;
-       unsigned wim_source_path_nchars;
        struct wim_dentry *extract_root;
+       unsigned long invalid_sequence;
        tchar *target_realpath;
        unsigned target_realpath_len;
        int extract_flags;
index 1e08ae3..e515abe 100644 (file)
@@ -128,9 +128,20 @@ struct wim_dentry {
        /* The inode for this dentry */
        struct wim_inode *d_inode;
 
-       /* Red-black tree of sibling dentries */
+       /* Node for the parent's red-black tree of child dentries, sorted by
+        * case sensitive long name. */
        struct rb_node rb_node;
 
+#ifdef __WIN32__
+       /* Node for the parent's red-black tree of child dentries, sorted by
+        * case insensitive long name. */
+       struct rb_node rb_node_case_insensitive;
+
+       /* List of dentries in a directory that have different case sensitive
+        * long names but share the same case insensitive long name */
+       struct list_head case_insensitive_conflict_list;
+#endif
+
        /* Length of UTF-16LE encoded short filename, in bytes, not including
         * the terminating zero wide-character. */
        u16 short_name_nbytes;
@@ -149,6 +160,8 @@ struct wim_dentry {
        /* Only used during NTFS capture */
        u8 is_win32_name : 1;
 
+       u8 not_extracted : 1;
+
        /* Temporary list */
        struct list_head tmp_list;
 
@@ -193,8 +206,16 @@ struct wim_dentry {
        /* Pointer to the UTF-16LE filename (malloc()ed buffer). */
        utf16lechar *file_name;
 
-       /* Full path of this dentry */
+       /* Full path of this dentry in the WIM */
        tchar *_full_path;
+
+       /* Actual name to extract this dentry as. */
+       tchar *extraction_name;
+       size_t extraction_name_nchars;
+
+       /* List head for building a list of dentries that contain a certain
+        * stream. */
+       struct list_head extraction_stream_list;
 };
 
 #define rbnode_dentry(node) container_of(node, struct wim_dentry, rb_node)
@@ -299,6 +320,10 @@ struct wim_inode {
         * noted in the @attributes field.) */
        struct rb_root i_children;
 
+#ifdef __WIN32__
+       struct rb_root i_children_case_insensitive;
+#endif
+
        /* Next alternate data stream ID to be assigned */
        u32 i_next_stream_id;
 
@@ -388,6 +413,9 @@ extern int
 print_dentry_full_path(struct wim_dentry *entry, void *ignore);
 
 extern int
+calculate_dentry_full_path(struct wim_dentry *dentry);
+
+extern int
 calculate_dentry_tree_full_paths(struct wim_dentry *root);
 
 extern tchar *
index ff40742..8541d45 100644 (file)
@@ -111,6 +111,16 @@ static inline void list_del(struct list_head *entry)
 }
 
 /**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+       list_del(entry);
+       INIT_LIST_HEAD(entry);
+}
+
+/**
  * list_empty - tests whether a list is empty
  * @head: the list to test.
  */
index eedb8b0..889a3f5 100644 (file)
@@ -122,4 +122,10 @@ hash_u64(u64 n)
        return n * 0x9e37fffffffc0001ULL;
 }
 
+#ifdef __WIN32__
+#  define OS_PREFERRED_PATH_SEPARATOR L'\\'
+#else
+#  define OS_PREFERRED_PATH_SEPARATOR '/'
+#endif
+
 #endif /* _WIMLIB_UTIL_H */
index e902fab..6d81f02 100644 (file)
@@ -21,6 +21,7 @@ typedef wchar_t tchar;
  * with the "wide-character" functions. */
 #  define tmemchr      wmemchr
 #  define tmemcpy      wmemcpy
+#  define tmempcpy     wmempcpy
 #  define tstrcpy      wcscpy
 #  define tprintf      wprintf
 #  define tsprintf     swprintf
@@ -77,6 +78,7 @@ typedef char tchar;
  * string functions. */
 #  define tmemchr      memchr
 #  define tmemcpy      memcpy
+#  define tmempcpy     mempcpy
 #  define tstrcpy      strcpy
 #  define tprintf      printf
 #  define tsprintf     sprintf
index ceae9ea..9ee6df3 100644 (file)
@@ -104,6 +104,7 @@ IMAGEX_PROGNAME" apply WIMFILE [IMAGE_NUM | IMAGE_NAME | all]\n"
 "                    (DIRECTORY | NTFS_VOLUME) [--check] [--hardlink]\n"
 "                    [--symlink] [--verbose] [--ref=\"GLOB\"] [--unix-data]\n"
 "                    [--no-acls] [--strict-acls] [--rpfix] [--norpfix]\n"
+"                    [--force-all-files]\n"
 ),
 [CAPTURE] =
 T(
@@ -134,6 +135,7 @@ T(
 IMAGEX_PROGNAME" extract WIMFILE (IMAGE_NUM | IMAGE_NAME) [PATH...]\n"
 "              [--check] [--ref=\"GLOB\"] [--verbose] [--unix-data]\n"
 "              [--no-acls] [--strict-acls] [--to-stdout] [--dest-dir=DIR]\n"
+"              [--force-all-files]\n"
 ),
 [INFO] =
 T(
@@ -206,6 +208,7 @@ enum {
        IMAGEX_EXTRACT_XML_OPTION,
        IMAGEX_FLAGS_OPTION,
        IMAGEX_FORCE_OPTION,
+       IMAGEX_FORCE_ALL_FILES_OPTION,
        IMAGEX_HARDLINK_OPTION,
        IMAGEX_HEADER_OPTION,
        IMAGEX_LAZY_OPTION,
@@ -243,6 +246,7 @@ static const struct option apply_options[] = {
        {T("strict-acls"), no_argument,       NULL, IMAGEX_STRICT_ACLS_OPTION},
        {T("rpfix"),       no_argument,       NULL, IMAGEX_RPFIX_OPTION},
        {T("norpfix"),     no_argument,       NULL, IMAGEX_NORPFIX_OPTION},
+       {T("force-all-files"), no_argument,       NULL, IMAGEX_FORCE_ALL_FILES_OPTION},
        {NULL, 0, NULL, 0},
 };
 static const struct option capture_or_append_options[] = {
@@ -290,6 +294,7 @@ static const struct option extract_options[] = {
        {T("strict-acls"), no_argument,       NULL, IMAGEX_STRICT_ACLS_OPTION},
        {T("dest-dir"),    required_argument, NULL, IMAGEX_DEST_DIR_OPTION},
        {T("to-stdout"),   no_argument,       NULL, IMAGEX_TO_STDOUT_OPTION},
+       {T("force-all-files"), no_argument,       NULL, IMAGEX_FORCE_ALL_FILES_OPTION},
        {NULL, 0, NULL, 0},
 };
 
@@ -1528,6 +1533,10 @@ imagex_apply(int argc, tchar **argv)
                case IMAGEX_RPFIX_OPTION:
                        extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX;
                        break;
+               case IMAGEX_FORCE_ALL_FILES_OPTION:
+                       extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES;
+                       extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS;
+                       break;
                default:
                        usage(APPLY);
                        return -1;
@@ -2259,6 +2268,10 @@ imagex_extract(int argc, tchar **argv)
                        extract_flags |= WIMLIB_EXTRACT_FLAG_TO_STDOUT;
                        imagex_be_quiet = true;
                        break;
+               case IMAGEX_FORCE_ALL_FILES_OPTION:
+                       extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES;
+                       extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS;
+                       break;
                default:
                        goto out_usage;
                }
index d93d8ac..abe5095 100644 (file)
@@ -338,7 +338,7 @@ for_dentry_in_tree_depth(struct wim_dentry *root,
 
 /* Calculate the full path of @dentry.  The full path of its parent must have
  * already been calculated, or it must be the root dentry. */
-static int
+int
 calculate_dentry_full_path(struct wim_dentry *dentry)
 {
        tchar *full_path;
@@ -589,8 +589,20 @@ get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
                        node = node->rb_left;
                else if (result > 0)
                        node = node->rb_right;
-               else
+               else {
+               #ifdef __WIN32__
+                       if (!list_empty(&child->case_insensitive_conflict_list))
+                       {
+                               WARNING("Result of case-insensitive lookup is ambiguous "
+                                       "(returning \"%ls\" instead of \"%ls\")",
+                                       child->file_name,
+                                       container_of(child->case_insensitive_conflict_list.next,
+                                                    struct wim_dentry,
+                                                    case_insensitive_conflict_list)->file_name);
+                       }
+               #endif
                        return child;
+               }
        }
        return NULL;
 }
@@ -1088,23 +1100,27 @@ free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table)
  * @child: The dentry to link.
  *
  * Returns NULL if successful.  If @parent already contains a dentry with the
- * same name as @child (see compare_utf16le_names() for what names are
- * considered the "same"), the pointer to this duplicate dentry is returned.
+ * same case-sensitive name as @child, the pointer to this duplicate dentry is
+ * returned.
  */
 struct wim_dentry *
 dentry_add_child(struct wim_dentry * restrict parent,
                 struct wim_dentry * restrict child)
 {
+       struct rb_root *root;
+       struct rb_node **new;
+       struct rb_node *rb_parent;
+
        wimlib_assert(dentry_is_directory(parent));
        wimlib_assert(parent != child);
 
-       struct rb_root *root = &parent->d_inode->i_children;
-       struct rb_node **new = &(root->rb_node);
-       struct rb_node *rb_parent = NULL;
-
+       /* Case sensitive child dentry index */
+       root = &parent->d_inode->i_children;
+       new = &root->rb_node;
+       rb_parent = NULL;
        while (*new) {
                struct wim_dentry *this = rbnode_dentry(*new);
-               int result = dentry_compare_names(child, this);
+               int result = dentry_compare_names_case_sensitive(child, this);
 
                rb_parent = *new;
 
@@ -1118,6 +1134,34 @@ dentry_add_child(struct wim_dentry * restrict parent,
        child->parent = parent;
        rb_link_node(&child->rb_node, rb_parent, new);
        rb_insert_color(&child->rb_node, root);
+
+#ifdef __WIN32__
+       /* Case insensitive child dentry index */
+       root = &parent->d_inode->i_children_case_insensitive;
+       new = &root->rb_node;
+       rb_parent = NULL;
+       while (*new) {
+               struct wim_dentry *this = container_of(*new, struct wim_dentry,
+                                                      rb_node_case_insensitive);
+               int result = dentry_compare_names_case_insensitive(child, this);
+
+               rb_parent = *new;
+
+               if (result < 0)
+                       new = &((*new)->rb_left);
+               else if (result > 0)
+                       new = &((*new)->rb_right);
+               else {
+                       list_add(&child->case_insensitive_conflict_list,
+                                &this->case_insensitive_conflict_list);
+                       return NULL;
+
+               }
+       }
+       rb_link_node(&child->rb_node_case_insensitive, rb_parent, new);
+       rb_insert_color(&child->rb_node_case_insensitive, root);
+       INIT_LIST_HEAD(&child->case_insensitive_conflict_list);
+#endif
        return NULL;
 }
 
@@ -1125,8 +1169,14 @@ dentry_add_child(struct wim_dentry * restrict parent,
 void
 unlink_dentry(struct wim_dentry *dentry)
 {
-       if (!dentry_is_root(dentry))
+       if (!dentry_is_root(dentry)) {
                rb_erase(&dentry->rb_node, &dentry->parent->d_inode->i_children);
+       #ifdef __WIN32__
+               rb_erase(&dentry->rb_node_case_insensitive,
+                        &dentry->parent->d_inode->i_children_case_insensitive);
+               list_del(&dentry->case_insensitive_conflict_list);
+       #endif
+       }
 }
 
 /*
@@ -1400,39 +1450,6 @@ inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode,
 }
 #endif /* !__WIN32__ */
 
-/* Replace weird characters in filenames and alternate data stream names.
- *
- * In particular we do not want the path separator to appear in any names, as
- * that would make it possible for a "malicious" WIM to extract itself to any
- * location it wanted to. */
-static void
-replace_forbidden_characters(utf16lechar *name)
-{
-       utf16lechar *p;
-
-       for (p = name; *p; p++) {
-       #ifdef __WIN32__
-               if (wcschr(L"<>:\"/\\|?*", (wchar_t)*p))
-       #else
-               if (*p == cpu_to_le16('/'))
-       #endif
-               {
-                       if (name) {
-                               WARNING("File, directory, or stream name \"%"WS"\"\n"
-                                       "          contains forbidden characters; "
-                                       "substituting replacement characters.",
-                                       name);
-                               name = NULL;
-                       }
-               #ifdef __WIN32__
-                       *p = cpu_to_le16(0xfffd);
-               #else
-                       *p = cpu_to_le16('?');
-               #endif
-               }
-       }
-}
-
 /*
  * Reads the alternate data stream entries of a WIM dentry.
  *
@@ -1526,7 +1543,6 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode,
                               disk_entry->stream_name,
                               cur_entry->stream_name_nbytes);
                        cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0);
-                       replace_forbidden_characters(cur_entry->stream_name);
                }
 
                /* It's expected that the size of every ADS entry is a multiple
@@ -1732,7 +1748,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
                memcpy(file_name, p, file_name_nbytes);
                p += file_name_nbytes + 2;
                file_name[file_name_nbytes / 2] = cpu_to_le16(0);
-               replace_forbidden_characters(file_name);
        } else {
                file_name = NULL;
        }
@@ -1751,7 +1766,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
                memcpy(short_name, p, short_name_nbytes);
                p += short_name_nbytes + 2;
                short_name[short_name_nbytes / 2] = cpu_to_le16(0);
-               replace_forbidden_characters(short_name);
        } else {
                short_name = NULL;
        }
@@ -1880,30 +1894,11 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len,
                        const tchar *child_type, *duplicate_type;
                        child_type = dentry_get_file_type_string(child);
                        duplicate_type = dentry_get_file_type_string(duplicate);
-                       /* On UNIX, duplicates are exact.  On Windows,
-                        * duplicates may differ by case and we wish to provide
-                        * a different warning message in this case. */
-               #ifdef __WIN32__
-                       if (dentry_compare_names_case_sensitive(child, duplicate))
-                       {
-                               child->parent = dentry;
-                               WARNING("Ignoring %ls \"%ls\", which differs "
-                                       "only in case from %ls \"%ls\"",
-                                       child_type,
-                                       dentry_full_path(child),
-                                       duplicate_type,
-                                       dentry_full_path(duplicate));
-                       }
-                       else
-               #endif
-                       {
-                               WARNING("Ignoring duplicate %"TS" \"%"TS"\" "
-                                       "(the WIM image already contains a %"TS" "
-                                       "at that path with the exact same name)",
-                                       child_type, dentry_full_path(duplicate),
-                                       duplicate_type);
-                       }
-                       free_dentry(child);
+                       WARNING("Ignoring duplicate %"TS" \"%"TS"\" "
+                               "(the WIM image already contains a %"TS" "
+                               "at that path with the exact same name)",
+                               child_type, dentry_full_path(duplicate),
+                               duplicate_type);
                } else {
                        inode_add_dentry(child, child->d_inode);
                        /* If there are children of this child, call this
index f141563..5c6c38a 100644 (file)
@@ -30,6 +30,8 @@
 
 #include "wimlib/apply.h"
 #include "wimlib/dentry.h"
+#include "wimlib/encoding.h"
+#include "wimlib/endianness.h"
 #include "wimlib/error.h"
 #include "wimlib/lookup_table.h"
 #include "wimlib/paths.h"
@@ -55,27 +57,28 @@ do_apply_op(struct wim_dentry *dentry, struct apply_args *args,
                                     struct wim_dentry *, struct apply_args *))
 {
        tchar *p;
-       const tchar *full_path;
-       size_t full_path_nchars;
+       size_t extraction_path_nchars;
+       struct wim_dentry *d;
+       LIST_HEAD(ancestor_list);
 
-       wimlib_assert(dentry->_full_path != NULL);
-       full_path = dentry->_full_path + 1;
-       full_path_nchars = dentry->full_path_nbytes / sizeof(tchar) - 1;
-       tchar output_path[args->target_nchars + 1 +
-                        (full_path_nchars - args->wim_source_path_nchars) + 1];
-       p = output_path;
+       extraction_path_nchars = args->target_nchars;
 
-       tmemcpy(p, args->target, args->target_nchars);
-       p += args->target_nchars;
+       for (d = dentry; d != args->extract_root; d = d->parent) {
+               if (d->not_extracted)
+                       return 0;
+               extraction_path_nchars += d->extraction_name_nchars + 1;
+               list_add(&d->tmp_list, &ancestor_list);
+       }
+
+       tchar extraction_path[extraction_path_nchars + 1];
+       p = tmempcpy(extraction_path, args->target, args->target_nchars);
 
-       if (dentry != args->extract_root) {
-               *p++ = T('/');
-               tmemcpy(p, full_path + args->wim_source_path_nchars,
-                       full_path_nchars - args->wim_source_path_nchars);
-               p += full_path_nchars - args->wim_source_path_nchars;
+       list_for_each_entry(d, &ancestor_list, tmp_list) {
+               *p++ = OS_PREFERRED_PATH_SEPARATOR;
+               p = tmempcpy(p, d->extraction_name, d->extraction_name_nchars);
        }
        *p = T('\0');
-       return (*apply_dentry_func)(output_path, p - output_path,
+       return (*apply_dentry_func)(extraction_path, extraction_path_nchars,
                                    dentry, args);
 }
 
@@ -103,6 +106,17 @@ apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg)
 #endif
 }
 
+static bool
+dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
+{
+       const utf16lechar *file_name = dentry->file_name;
+       return file_name != NULL &&
+               file_name[0] == cpu_to_le16('.') &&
+               (file_name[1] == cpu_to_le16('\0') ||
+                (file_name[1] == cpu_to_le16('.') &&
+                 file_name[2] == cpu_to_le16('\0')));
+}
+
 /* Extract a dentry if it hasn't already been extracted and either
  * WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified, or the dentry is a directory
  * and/or has no unnamed stream. */
@@ -122,6 +136,9 @@ maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
 
        if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
             args->progress_func) {
+               ret = calculate_dentry_full_path(dentry);
+               if (ret)
+                       return ret;
                args->progress.extract.cur_path = dentry->_full_path;
                args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
                                    &args->progress);
@@ -189,13 +206,14 @@ dentry_find_streams_to_extract(struct wim_dentry *dentry, void *_ctx)
        struct list_head *stream_list = &ctx->stream_list;
        int extract_flags = ctx->extract_flags;
 
-       dentry->needs_extraction = 1;
+       if (!dentry->needs_extraction)
+               return 0;
 
        lte = inode_unnamed_lte_resolved(inode);
        if (lte) {
                if (!inode->i_visited)
                        maybe_add_stream_for_extraction(lte, stream_list);
-               list_add_tail(&dentry->tmp_list, &lte->lte_dentry_list);
+               list_add_tail(&dentry->extraction_stream_list, &lte->lte_dentry_list);
                dentry_added = true;
        }
 
@@ -222,7 +240,7 @@ dentry_find_streams_to_extract(struct wim_dentry *dentry, void *_ctx)
                                                                                stream_list);
                                        }
                                        if (!dentry_added) {
-                                               list_add_tail(&dentry->tmp_list,
+                                               list_add_tail(&dentry->extraction_stream_list,
                                                              &lte->lte_dentry_list);
                                                dentry_added = true;
                                        }
@@ -265,14 +283,6 @@ find_streams_for_extraction(struct wim_dentry *root,
        list_transfer(&ctx.stream_list, stream_list);
 }
 
-static int
-dentry_reset_needs_extraction(struct wim_dentry *dentry, void *_ignore)
-{
-       dentry->needs_extraction = 0;
-       dentry->d_inode->i_visited = 0;
-       return 0;
-}
-
 struct apply_operations {
        int (*apply_dentry)(struct wim_dentry *dentry, void *arg);
        int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg);
@@ -314,7 +324,7 @@ apply_stream_list(struct list_head *stream_list,
        list_for_each_entry(lte, stream_list, extraction_list) {
                /* For each dentry to be extracted that is a name for an inode
                 * containing the stream */
-               list_for_each_entry(dentry, &lte->lte_dentry_list, tmp_list) {
+               list_for_each_entry(dentry, &lte->lte_dentry_list, extraction_stream_list) {
                        /* Extract the dentry if it was not already
                         * extracted */
                        ret = maybe_apply_dentry(dentry, args);
@@ -402,6 +412,194 @@ extract_dentry_to_stdout(struct wim_dentry *dentry)
        return ret;
 }
 
+#ifdef __WIN32__
+static const utf16lechar replacement_char = cpu_to_le16(0xfffd);
+#else
+static const utf16lechar replacement_char = cpu_to_le16('?');
+#endif
+
+static bool
+file_name_valid(utf16lechar *name, size_t num_chars, bool fix)
+{
+       size_t i;
+
+       if (num_chars == 0)
+               return true;
+       for (i = 0; i < num_chars; i++) {
+               switch (name[i]) {
+       #ifdef __WIN32__
+               case cpu_to_le16('\\'):
+               case cpu_to_le16(':'):
+               case cpu_to_le16('*'):
+               case cpu_to_le16('?'):
+               case cpu_to_le16('"'):
+               case cpu_to_le16('<'):
+               case cpu_to_le16('>'):
+               case cpu_to_le16('|'):
+       #endif
+               case cpu_to_le16('/'):
+               case cpu_to_le16('\0'):
+                       if (fix)
+                               name[i] = replacement_char;
+                       else
+                               return false;
+               }
+       }
+
+       if (name[num_chars - 1] == cpu_to_le16(' ') ||
+           name[num_chars - 1] == cpu_to_le16('.'))
+       {
+               if (fix)
+                       name[num_chars - 1] = replacement_char;
+               else
+                       return false;
+       }
+       return true;
+}
+
+/*
+ * dentry_calculate_extraction_path-
+ *
+ * Calculate the actual filename component at which a WIM dentry will be
+ * extracted, handling invalid filenames "properly".
+ *
+ * dentry->extraction_name usually will be set the same as dentry->file_name (on
+ * UNIX, converted into the platform's multibyte encoding).  However, if the
+ * file name contains characters that are not valid on the current platform or
+ * has some other format that is not valid, leave dentry->extraction_name as
+ * NULL and clear dentry->needs_extraction to indicate that this dentry should
+ * not be extracted, unless the appropriate flag
+ * WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES is set in the extract flags, in
+ * which case a substitute filename will be created and set instead.
+ *
+ * Conflicts with case-insensitive names on Windows are handled similarly; see
+ * below.
+ */
+static int
+dentry_calculate_extraction_path(struct wim_dentry *dentry, void *_args)
+{
+       struct apply_args *args = _args;
+       int ret;
+
+       dentry->needs_extraction = 1;
+
+       if (dentry == args->extract_root)
+               return 0;
+
+       if (dentry_is_dot_or_dotdot(dentry)) {
+               /* WIM files shouldn't contain . or .. entries.  But if they are
+                * there, don't attempt to extract them. */
+               WARNING("Skipping extraction of unexpected . or .. file \"%"TS"\"",
+                       dentry_full_path(dentry));
+               goto skip_dentry;
+       }
+
+#ifdef __WIN32__
+       struct wim_dentry *other;
+       list_for_each_entry(other, &dentry->case_insensitive_conflict_list,
+                           case_insensitive_conflict_list)
+       {
+               if (other->needs_extraction) {
+                       if (args->extract_flags & WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS)
+                       {
+                               WARNING("\"%"TS"\" has the same case-insensitive "
+                                       "name as \"%"TS"\"; extracting dummy name instead",
+                                       dentry_full_path(dentry),
+                                       dentry_full_path(other));
+                               goto out_replace;
+                       } else {
+                               WARNING("Not extracting \"%"TS"\": has same case-insensitive "
+                                       "name as \"%"TS"\"",
+                                       dentry_full_path(dentry),
+                                       dentry_full_path(other));
+                               goto skip_dentry;
+                       }
+               }
+       }
+#endif
+
+       if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) {
+#ifdef __WIN32__
+               dentry->extraction_name = dentry->file_name;
+               dentry->extraction_name_nchars = dentry->file_name_nbytes / 2;
+               return 0;
+#else
+               return utf16le_to_tstr(dentry->file_name,
+                                      dentry->file_name_nbytes,
+                                      &dentry->extraction_name,
+                                      &dentry->extraction_name_nchars);
+#endif
+       } else {
+               if (args->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES)
+               {
+                       WARNING("\"%"TS"\" has an invalid filename "
+                               "that is not supported on this platform; "
+                               "extracting dummy name instead",
+                               dentry_full_path(dentry));
+                       goto out_replace;
+               } else {
+                       WARNING("Not extracting \"%"TS"\": has an invalid filename "
+                               "that is not supported on this platform",
+                               dentry_full_path(dentry));
+                       goto skip_dentry;
+               }
+       }
+
+out_replace:
+       {
+               utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2];
+
+               memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes);
+               file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true);
+
+               tchar *tchar_name;
+               size_t tchar_nchars;
+       #ifdef __WIN32__
+               tchar_name = utf16_name_copy;
+               tchar_nchars = dentry->file_name_nbytes / 2;
+       #else
+               ret = utf16le_to_tstr(utf16_name_copy,
+                                     dentry->file_name_nbytes,
+                                     &tchar_name, &tchar_nchars);
+               if (ret)
+                       return ret;
+       #endif
+               size_t fixed_name_num_chars = tchar_nchars;
+               tchar fixed_name[tchar_nchars + 50];
+               size_t extraction_name_nbytes;
+
+               tmemcpy(fixed_name, tchar_name, tchar_nchars);
+               fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars,
+                                                T(" (invalid filename #%lu)"),
+                                                ++args->invalid_sequence);
+               dentry->extraction_name = memdup(fixed_name, 2 * fixed_name_num_chars + 2);
+               if (!dentry->extraction_name)
+                       return WIMLIB_ERR_NOMEM;
+               dentry->extraction_name_nchars = fixed_name_num_chars;
+       }
+       return 0;
+skip_dentry:
+       dentry->needs_extraction = 0;
+       dentry->not_extracted = 1;
+       return 0;
+}
+
+static int
+dentry_reset_needs_extraction(struct wim_dentry *dentry, void *_ignore)
+{
+       dentry->needs_extraction = 0;
+       dentry->not_extracted = 0;
+       dentry->is_win32_name = 0;
+       dentry->d_inode->i_visited = 0;
+       dentry->d_inode->i_dos_name_extracted = 0;
+       FREE(dentry->d_inode->i_extracted_file);
+       dentry->d_inode->i_extracted_file = NULL;
+       if ((void*)dentry->extraction_name != (void*)dentry->file_name)
+               FREE(dentry->extraction_name);
+       dentry->extraction_name = NULL;
+       return 0;
+}
+
 /*
  * extract_tree - Extract a file or directory tree from the currently selected
  *               WIM image.
@@ -454,7 +652,6 @@ extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target,
        args.extract_flags          = extract_flags;
        args.progress_func          = progress_func;
        args.target_nchars          = tstrlen(target);
-       args.wim_source_path_nchars = tstrlen(wim_source_path);
 
        if (progress_func) {
                args.progress.extract.wimfile_name = wim->filename;
@@ -490,9 +687,12 @@ extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target,
        }
        args.extract_root = root;
 
-       ret = calculate_dentry_tree_full_paths(root);
+       /* Calculate the actual filename component of each extracted dentry, and
+        * in the process set the dentry->needs_extraction flag on dentries that
+        * will be extracted. */
+       ret = for_dentry_in_tree(root, dentry_calculate_extraction_path, &args);
        if (ret)
-               goto out_ntfs_umount;
+               goto out_dentry_reset_needs_extraction;
 
        /* Build a list of the streams that need to be extracted */
        find_streams_for_extraction(root,
index 0bf7989..ffe3a8b 100644 (file)
@@ -538,6 +538,10 @@ apply_dentry_ntfs(struct wim_dentry *dentry, void *arg)
        struct wim_dentry *other;
        int ret;
 
+       ret = calculate_dentry_full_path(dentry);
+       if (ret)
+               return ret;
+
        /* Treat the root dentry specially. */
        if (dentry_is_root(dentry))
                return apply_root_dentry_ntfs(dentry, vol, w,
index 6b3ede1..b1d9148 100644 (file)
@@ -95,8 +95,13 @@ unix_extract_regular_file_linked(struct wim_dentry *dentry,
                char *p;
                const char *p2;
                size_t i;
+               const struct wim_dentry *d;
 
-               num_path_components = get_num_path_components(dentry->_full_path) - 1;
+               num_path_components = 0;
+               for (d = dentry; d != args->extract_root; d = d->parent)
+                       num_path_components++;
+               wimlib_assert(num_path_components > 0);
+               num_path_components--;
                num_output_dir_path_components = get_num_path_components(args->target);
 
                if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
@@ -339,7 +344,7 @@ unix_extract_symlink(struct wim_dentry *dentry,
 
        if (ret <= 0) {
                ERROR("Could not read the symbolic link from dentry `%s'",
-                     dentry->_full_path);
+                     dentry_full_path(dentry));
                return WIMLIB_ERR_INVALID_DENTRY;
        }
        target[args->target_realpath_len + ret] = '\0';
@@ -380,35 +385,33 @@ unix_extract_symlink(struct wim_dentry *dentry,
 }
 
 static int
-unix_extract_directory(struct wim_dentry *dentry, const tchar *output_path,
+unix_extract_directory(struct wim_dentry *dentry, const char *output_path,
                       int extract_flags)
 {
        int ret;
        struct stat stbuf;
 
-       ret = tstat(output_path, &stbuf);
+       ret = stat(output_path, &stbuf);
        if (ret == 0) {
                if (S_ISDIR(stbuf.st_mode)) {
                        goto dir_exists;
                } else {
-                       ERROR("`%"TS"' is not a directory", output_path);
+                       ERROR("\"%s\" is not a directory", output_path);
                        return WIMLIB_ERR_MKDIR;
                }
        } else {
                if (errno != ENOENT) {
-                       ERROR_WITH_ERRNO("Failed to stat `%"TS"'", output_path);
+                       ERROR_WITH_ERRNO("Failed to stat \"%s\"", output_path);
                        return WIMLIB_ERR_STAT;
                }
        }
 
-       if (tmkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))
-       {
-               ERROR_WITH_ERRNO("Cannot create directory `%"TS"'", output_path);
+       if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) {
+               ERROR_WITH_ERRNO("Cannot create directory \"%s\"", output_path);
                return WIMLIB_ERR_MKDIR;
        }
 dir_exists:
        ret = 0;
-#ifndef __WIN32__
        if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
                struct wimlib_unix_data unix_data;
                ret = inode_get_unix_data(dentry->d_inode, &unix_data, NULL);
@@ -420,7 +423,6 @@ dir_exists:
                        ret = dir_apply_unix_data(output_path, &unix_data,
                                                  extract_flags);
        }
-#endif
        return ret;
 }
 
index 120ec21..ffee267 100644 (file)
@@ -1235,6 +1235,7 @@ win32_do_apply_dentry(const wchar_t *output_path,
            !(args->vol_flags & FILE_SUPPORTS_REPARSE_POINTS))
        {
                WARNING("Not extracting reparse point \"%ls\"", output_path);
+               dentry->not_extracted = 1;
        } else {
                /* Create the file, directory, or reparse point, and extract the
                 * data streams. */
@@ -1277,13 +1278,6 @@ win32_do_apply_dentry_timestamps(const wchar_t *path,
        HANDLE h;
        const struct wim_inode *inode = dentry->d_inode;
 
-       if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT &&
-           !(args->vol_flags & FILE_SUPPORTS_REPARSE_POINTS))
-       {
-               /* Skip reparse points not extracted */
-               return 0;
-       }
-
        /* Windows doesn't let you change the timestamps of the root directory
         * (at least on FAT, which is dumb but expected since FAT doesn't store
         * any metadata about the root directory...) */