From d55cda59032e0abe5f71cd6f16ade943d2713fee Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 20 May 2013 11:12:24 -0500 Subject: [PATCH 1/1] Improve handling of invalid filenames --- include/wimlib.h | 12 ++ include/wimlib/apply.h | 2 +- include/wimlib/dentry.h | 32 ++++- include/wimlib/list.h | 10 ++ include/wimlib/util.h | 6 + include/wimlib_tchar.h | 2 + programs/imagex.c | 13 ++ src/dentry.c | 135 ++++++++++----------- src/extract.c | 262 +++++++++++++++++++++++++++++++++++----- src/ntfs-3g_apply.c | 4 + src/unix_apply.c | 24 ++-- src/win32_apply.c | 8 +- 12 files changed, 388 insertions(+), 122 deletions(-) diff --git a/include/wimlib.h b/include/wimlib.h index 906a15c7..02f74732 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -722,6 +722,18 @@ struct wimlib_capture_config { /** Extract files to standard output rather than to the filesystem. */ #define WIMLIB_EXTRACT_FLAG_TO_STDOUT 0x00000400 +/** Instead of ignoring files and directories with names that cannot be + * represented on the current platform (note: Windows has more restrictions on + * filenames than UNIX), try to replace characters or append junk to the names + * so that they can be extracted in some form. */ +#define WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES 0x00000800 + +/** On Windows, when there exist two or more files with the same case + * insensitive name (but different case sensitive names), try to extract them + * all by appending junk to the end of them, rather than extracting an + * arbitrarily only one. */ +#define WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS 0x00001000 + /****************************** * WIMLIB_MOUNT_FLAG_* ******************************/ diff --git a/include/wimlib/apply.h b/include/wimlib/apply.h index 44773bd1..ed2f07a9 100644 --- a/include/wimlib/apply.h +++ b/include/wimlib/apply.h @@ -12,8 +12,8 @@ struct apply_args { WIMStruct *w; const tchar *target; unsigned target_nchars; - unsigned wim_source_path_nchars; struct wim_dentry *extract_root; + unsigned long invalid_sequence; tchar *target_realpath; unsigned target_realpath_len; int extract_flags; diff --git a/include/wimlib/dentry.h b/include/wimlib/dentry.h index 1e08ae3f..e515abec 100644 --- a/include/wimlib/dentry.h +++ b/include/wimlib/dentry.h @@ -128,9 +128,20 @@ struct wim_dentry { /* The inode for this dentry */ struct wim_inode *d_inode; - /* Red-black tree of sibling dentries */ + /* Node for the parent's red-black tree of child dentries, sorted by + * case sensitive long name. */ struct rb_node rb_node; +#ifdef __WIN32__ + /* Node for the parent's red-black tree of child dentries, sorted by + * case insensitive long name. */ + struct rb_node rb_node_case_insensitive; + + /* List of dentries in a directory that have different case sensitive + * long names but share the same case insensitive long name */ + struct list_head case_insensitive_conflict_list; +#endif + /* Length of UTF-16LE encoded short filename, in bytes, not including * the terminating zero wide-character. */ u16 short_name_nbytes; @@ -149,6 +160,8 @@ struct wim_dentry { /* Only used during NTFS capture */ u8 is_win32_name : 1; + u8 not_extracted : 1; + /* Temporary list */ struct list_head tmp_list; @@ -193,8 +206,16 @@ struct wim_dentry { /* Pointer to the UTF-16LE filename (malloc()ed buffer). */ utf16lechar *file_name; - /* Full path of this dentry */ + /* Full path of this dentry in the WIM */ tchar *_full_path; + + /* Actual name to extract this dentry as. */ + tchar *extraction_name; + size_t extraction_name_nchars; + + /* List head for building a list of dentries that contain a certain + * stream. */ + struct list_head extraction_stream_list; }; #define rbnode_dentry(node) container_of(node, struct wim_dentry, rb_node) @@ -299,6 +320,10 @@ struct wim_inode { * noted in the @attributes field.) */ struct rb_root i_children; +#ifdef __WIN32__ + struct rb_root i_children_case_insensitive; +#endif + /* Next alternate data stream ID to be assigned */ u32 i_next_stream_id; @@ -387,6 +412,9 @@ print_dentry(struct wim_dentry *dentry, void *lookup_table); extern int print_dentry_full_path(struct wim_dentry *entry, void *ignore); +extern int +calculate_dentry_full_path(struct wim_dentry *dentry); + extern int calculate_dentry_tree_full_paths(struct wim_dentry *root); diff --git a/include/wimlib/list.h b/include/wimlib/list.h index ff407428..8541d45d 100644 --- a/include/wimlib/list.h +++ b/include/wimlib/list.h @@ -110,6 +110,16 @@ static inline void list_del(struct list_head *entry) __list_del(entry->prev, entry->next); } +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + list_del(entry); + INIT_LIST_HEAD(entry); +} + /** * list_empty - tests whether a list is empty * @head: the list to test. diff --git a/include/wimlib/util.h b/include/wimlib/util.h index eedb8b0d..889a3f56 100644 --- a/include/wimlib/util.h +++ b/include/wimlib/util.h @@ -122,4 +122,10 @@ hash_u64(u64 n) return n * 0x9e37fffffffc0001ULL; } +#ifdef __WIN32__ +# define OS_PREFERRED_PATH_SEPARATOR L'\\' +#else +# define OS_PREFERRED_PATH_SEPARATOR '/' +#endif + #endif /* _WIMLIB_UTIL_H */ diff --git a/include/wimlib_tchar.h b/include/wimlib_tchar.h index e902fab3..6d81f022 100644 --- a/include/wimlib_tchar.h +++ b/include/wimlib_tchar.h @@ -21,6 +21,7 @@ typedef wchar_t tchar; * with the "wide-character" functions. */ # define tmemchr wmemchr # define tmemcpy wmemcpy +# define tmempcpy wmempcpy # define tstrcpy wcscpy # define tprintf wprintf # define tsprintf swprintf @@ -77,6 +78,7 @@ typedef char tchar; * string functions. */ # define tmemchr memchr # define tmemcpy memcpy +# define tmempcpy mempcpy # define tstrcpy strcpy # define tprintf printf # define tsprintf sprintf diff --git a/programs/imagex.c b/programs/imagex.c index ceae9ea6..9ee6df34 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -104,6 +104,7 @@ IMAGEX_PROGNAME" apply WIMFILE [IMAGE_NUM | IMAGE_NAME | all]\n" " (DIRECTORY | NTFS_VOLUME) [--check] [--hardlink]\n" " [--symlink] [--verbose] [--ref=\"GLOB\"] [--unix-data]\n" " [--no-acls] [--strict-acls] [--rpfix] [--norpfix]\n" +" [--force-all-files]\n" ), [CAPTURE] = T( @@ -134,6 +135,7 @@ T( IMAGEX_PROGNAME" extract WIMFILE (IMAGE_NUM | IMAGE_NAME) [PATH...]\n" " [--check] [--ref=\"GLOB\"] [--verbose] [--unix-data]\n" " [--no-acls] [--strict-acls] [--to-stdout] [--dest-dir=DIR]\n" +" [--force-all-files]\n" ), [INFO] = T( @@ -206,6 +208,7 @@ enum { IMAGEX_EXTRACT_XML_OPTION, IMAGEX_FLAGS_OPTION, IMAGEX_FORCE_OPTION, + IMAGEX_FORCE_ALL_FILES_OPTION, IMAGEX_HARDLINK_OPTION, IMAGEX_HEADER_OPTION, IMAGEX_LAZY_OPTION, @@ -243,6 +246,7 @@ static const struct option apply_options[] = { {T("strict-acls"), no_argument, NULL, IMAGEX_STRICT_ACLS_OPTION}, {T("rpfix"), no_argument, NULL, IMAGEX_RPFIX_OPTION}, {T("norpfix"), no_argument, NULL, IMAGEX_NORPFIX_OPTION}, + {T("force-all-files"), no_argument, NULL, IMAGEX_FORCE_ALL_FILES_OPTION}, {NULL, 0, NULL, 0}, }; static const struct option capture_or_append_options[] = { @@ -290,6 +294,7 @@ static const struct option extract_options[] = { {T("strict-acls"), no_argument, NULL, IMAGEX_STRICT_ACLS_OPTION}, {T("dest-dir"), required_argument, NULL, IMAGEX_DEST_DIR_OPTION}, {T("to-stdout"), no_argument, NULL, IMAGEX_TO_STDOUT_OPTION}, + {T("force-all-files"), no_argument, NULL, IMAGEX_FORCE_ALL_FILES_OPTION}, {NULL, 0, NULL, 0}, }; @@ -1528,6 +1533,10 @@ imagex_apply(int argc, tchar **argv) case IMAGEX_RPFIX_OPTION: extract_flags |= WIMLIB_EXTRACT_FLAG_RPFIX; break; + case IMAGEX_FORCE_ALL_FILES_OPTION: + extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES; + extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS; + break; default: usage(APPLY); return -1; @@ -2259,6 +2268,10 @@ imagex_extract(int argc, tchar **argv) extract_flags |= WIMLIB_EXTRACT_FLAG_TO_STDOUT; imagex_be_quiet = true; break; + case IMAGEX_FORCE_ALL_FILES_OPTION: + extract_flags |= WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES; + extract_flags |= WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS; + break; default: goto out_usage; } diff --git a/src/dentry.c b/src/dentry.c index d93d8ac7..abe5095c 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -338,7 +338,7 @@ for_dentry_in_tree_depth(struct wim_dentry *root, /* Calculate the full path of @dentry. The full path of its parent must have * already been calculated, or it must be the root dentry. */ -static int +int calculate_dentry_full_path(struct wim_dentry *dentry) { tchar *full_path; @@ -589,8 +589,20 @@ get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, node = node->rb_left; else if (result > 0) node = node->rb_right; - else + else { + #ifdef __WIN32__ + if (!list_empty(&child->case_insensitive_conflict_list)) + { + WARNING("Result of case-insensitive lookup is ambiguous " + "(returning \"%ls\" instead of \"%ls\")", + child->file_name, + container_of(child->case_insensitive_conflict_list.next, + struct wim_dentry, + case_insensitive_conflict_list)->file_name); + } + #endif return child; + } } return NULL; } @@ -1088,23 +1100,27 @@ free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table) * @child: The dentry to link. * * Returns NULL if successful. If @parent already contains a dentry with the - * same name as @child (see compare_utf16le_names() for what names are - * considered the "same"), the pointer to this duplicate dentry is returned. + * same case-sensitive name as @child, the pointer to this duplicate dentry is + * returned. */ struct wim_dentry * dentry_add_child(struct wim_dentry * restrict parent, struct wim_dentry * restrict child) { + struct rb_root *root; + struct rb_node **new; + struct rb_node *rb_parent; + wimlib_assert(dentry_is_directory(parent)); wimlib_assert(parent != child); - struct rb_root *root = &parent->d_inode->i_children; - struct rb_node **new = &(root->rb_node); - struct rb_node *rb_parent = NULL; - + /* Case sensitive child dentry index */ + root = &parent->d_inode->i_children; + new = &root->rb_node; + rb_parent = NULL; while (*new) { struct wim_dentry *this = rbnode_dentry(*new); - int result = dentry_compare_names(child, this); + int result = dentry_compare_names_case_sensitive(child, this); rb_parent = *new; @@ -1118,6 +1134,34 @@ dentry_add_child(struct wim_dentry * restrict parent, child->parent = parent; rb_link_node(&child->rb_node, rb_parent, new); rb_insert_color(&child->rb_node, root); + +#ifdef __WIN32__ + /* Case insensitive child dentry index */ + root = &parent->d_inode->i_children_case_insensitive; + new = &root->rb_node; + rb_parent = NULL; + while (*new) { + struct wim_dentry *this = container_of(*new, struct wim_dentry, + rb_node_case_insensitive); + int result = dentry_compare_names_case_insensitive(child, this); + + rb_parent = *new; + + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else { + list_add(&child->case_insensitive_conflict_list, + &this->case_insensitive_conflict_list); + return NULL; + + } + } + rb_link_node(&child->rb_node_case_insensitive, rb_parent, new); + rb_insert_color(&child->rb_node_case_insensitive, root); + INIT_LIST_HEAD(&child->case_insensitive_conflict_list); +#endif return NULL; } @@ -1125,8 +1169,14 @@ dentry_add_child(struct wim_dentry * restrict parent, void unlink_dentry(struct wim_dentry *dentry) { - if (!dentry_is_root(dentry)) + if (!dentry_is_root(dentry)) { rb_erase(&dentry->rb_node, &dentry->parent->d_inode->i_children); + #ifdef __WIN32__ + rb_erase(&dentry->rb_node_case_insensitive, + &dentry->parent->d_inode->i_children_case_insensitive); + list_del(&dentry->case_insensitive_conflict_list); + #endif + } } /* @@ -1400,39 +1450,6 @@ inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode, } #endif /* !__WIN32__ */ -/* Replace weird characters in filenames and alternate data stream names. - * - * In particular we do not want the path separator to appear in any names, as - * that would make it possible for a "malicious" WIM to extract itself to any - * location it wanted to. */ -static void -replace_forbidden_characters(utf16lechar *name) -{ - utf16lechar *p; - - for (p = name; *p; p++) { - #ifdef __WIN32__ - if (wcschr(L"<>:\"/\\|?*", (wchar_t)*p)) - #else - if (*p == cpu_to_le16('/')) - #endif - { - if (name) { - WARNING("File, directory, or stream name \"%"WS"\"\n" - " contains forbidden characters; " - "substituting replacement characters.", - name); - name = NULL; - } - #ifdef __WIN32__ - *p = cpu_to_le16(0xfffd); - #else - *p = cpu_to_le16('?'); - #endif - } - } -} - /* * Reads the alternate data stream entries of a WIM dentry. * @@ -1526,7 +1543,6 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, disk_entry->stream_name, cur_entry->stream_name_nbytes); cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0); - replace_forbidden_characters(cur_entry->stream_name); } /* It's expected that the size of every ADS entry is a multiple @@ -1732,7 +1748,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, memcpy(file_name, p, file_name_nbytes); p += file_name_nbytes + 2; file_name[file_name_nbytes / 2] = cpu_to_le16(0); - replace_forbidden_characters(file_name); } else { file_name = NULL; } @@ -1751,7 +1766,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, memcpy(short_name, p, short_name_nbytes); p += short_name_nbytes + 2; short_name[short_name_nbytes / 2] = cpu_to_le16(0); - replace_forbidden_characters(short_name); } else { short_name = NULL; } @@ -1880,30 +1894,11 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, const tchar *child_type, *duplicate_type; child_type = dentry_get_file_type_string(child); duplicate_type = dentry_get_file_type_string(duplicate); - /* On UNIX, duplicates are exact. On Windows, - * duplicates may differ by case and we wish to provide - * a different warning message in this case. */ - #ifdef __WIN32__ - if (dentry_compare_names_case_sensitive(child, duplicate)) - { - child->parent = dentry; - WARNING("Ignoring %ls \"%ls\", which differs " - "only in case from %ls \"%ls\"", - child_type, - dentry_full_path(child), - duplicate_type, - dentry_full_path(duplicate)); - } - else - #endif - { - WARNING("Ignoring duplicate %"TS" \"%"TS"\" " - "(the WIM image already contains a %"TS" " - "at that path with the exact same name)", - child_type, dentry_full_path(duplicate), - duplicate_type); - } - free_dentry(child); + WARNING("Ignoring duplicate %"TS" \"%"TS"\" " + "(the WIM image already contains a %"TS" " + "at that path with the exact same name)", + child_type, dentry_full_path(duplicate), + duplicate_type); } else { inode_add_dentry(child, child->d_inode); /* If there are children of this child, call this diff --git a/src/extract.c b/src/extract.c index f1415632..5c6c38a0 100644 --- a/src/extract.c +++ b/src/extract.c @@ -30,6 +30,8 @@ #include "wimlib/apply.h" #include "wimlib/dentry.h" +#include "wimlib/encoding.h" +#include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/lookup_table.h" #include "wimlib/paths.h" @@ -55,27 +57,28 @@ do_apply_op(struct wim_dentry *dentry, struct apply_args *args, struct wim_dentry *, struct apply_args *)) { tchar *p; - const tchar *full_path; - size_t full_path_nchars; + size_t extraction_path_nchars; + struct wim_dentry *d; + LIST_HEAD(ancestor_list); - wimlib_assert(dentry->_full_path != NULL); - full_path = dentry->_full_path + 1; - full_path_nchars = dentry->full_path_nbytes / sizeof(tchar) - 1; - tchar output_path[args->target_nchars + 1 + - (full_path_nchars - args->wim_source_path_nchars) + 1]; - p = output_path; + extraction_path_nchars = args->target_nchars; - tmemcpy(p, args->target, args->target_nchars); - p += args->target_nchars; + for (d = dentry; d != args->extract_root; d = d->parent) { + if (d->not_extracted) + return 0; + extraction_path_nchars += d->extraction_name_nchars + 1; + list_add(&d->tmp_list, &ancestor_list); + } + + tchar extraction_path[extraction_path_nchars + 1]; + p = tmempcpy(extraction_path, args->target, args->target_nchars); - if (dentry != args->extract_root) { - *p++ = T('/'); - tmemcpy(p, full_path + args->wim_source_path_nchars, - full_path_nchars - args->wim_source_path_nchars); - p += full_path_nchars - args->wim_source_path_nchars; + list_for_each_entry(d, &ancestor_list, tmp_list) { + *p++ = OS_PREFERRED_PATH_SEPARATOR; + p = tmempcpy(p, d->extraction_name, d->extraction_name_nchars); } *p = T('\0'); - return (*apply_dentry_func)(output_path, p - output_path, + return (*apply_dentry_func)(extraction_path, extraction_path_nchars, dentry, args); } @@ -103,6 +106,17 @@ apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg) #endif } +static bool +dentry_is_dot_or_dotdot(const struct wim_dentry *dentry) +{ + const utf16lechar *file_name = dentry->file_name; + return file_name != NULL && + file_name[0] == cpu_to_le16('.') && + (file_name[1] == cpu_to_le16('\0') || + (file_name[1] == cpu_to_le16('.') && + file_name[2] == cpu_to_le16('\0'))); +} + /* Extract a dentry if it hasn't already been extracted and either * WIMLIB_EXTRACT_FLAG_NO_STREAMS is not specified, or the dentry is a directory * and/or has no unnamed stream. */ @@ -122,6 +136,9 @@ maybe_apply_dentry(struct wim_dentry *dentry, void *arg) if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) && args->progress_func) { + ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; args->progress.extract.cur_path = dentry->_full_path; args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY, &args->progress); @@ -189,13 +206,14 @@ dentry_find_streams_to_extract(struct wim_dentry *dentry, void *_ctx) struct list_head *stream_list = &ctx->stream_list; int extract_flags = ctx->extract_flags; - dentry->needs_extraction = 1; + if (!dentry->needs_extraction) + return 0; lte = inode_unnamed_lte_resolved(inode); if (lte) { if (!inode->i_visited) maybe_add_stream_for_extraction(lte, stream_list); - list_add_tail(&dentry->tmp_list, <e->lte_dentry_list); + list_add_tail(&dentry->extraction_stream_list, <e->lte_dentry_list); dentry_added = true; } @@ -222,7 +240,7 @@ dentry_find_streams_to_extract(struct wim_dentry *dentry, void *_ctx) stream_list); } if (!dentry_added) { - list_add_tail(&dentry->tmp_list, + list_add_tail(&dentry->extraction_stream_list, <e->lte_dentry_list); dentry_added = true; } @@ -265,14 +283,6 @@ find_streams_for_extraction(struct wim_dentry *root, list_transfer(&ctx.stream_list, stream_list); } -static int -dentry_reset_needs_extraction(struct wim_dentry *dentry, void *_ignore) -{ - dentry->needs_extraction = 0; - dentry->d_inode->i_visited = 0; - return 0; -} - struct apply_operations { int (*apply_dentry)(struct wim_dentry *dentry, void *arg); int (*apply_dentry_timestamps)(struct wim_dentry *dentry, void *arg); @@ -314,7 +324,7 @@ apply_stream_list(struct list_head *stream_list, list_for_each_entry(lte, stream_list, extraction_list) { /* For each dentry to be extracted that is a name for an inode * containing the stream */ - list_for_each_entry(dentry, <e->lte_dentry_list, tmp_list) { + list_for_each_entry(dentry, <e->lte_dentry_list, extraction_stream_list) { /* Extract the dentry if it was not already * extracted */ ret = maybe_apply_dentry(dentry, args); @@ -402,6 +412,194 @@ extract_dentry_to_stdout(struct wim_dentry *dentry) return ret; } +#ifdef __WIN32__ +static const utf16lechar replacement_char = cpu_to_le16(0xfffd); +#else +static const utf16lechar replacement_char = cpu_to_le16('?'); +#endif + +static bool +file_name_valid(utf16lechar *name, size_t num_chars, bool fix) +{ + size_t i; + + if (num_chars == 0) + return true; + for (i = 0; i < num_chars; i++) { + switch (name[i]) { + #ifdef __WIN32__ + case cpu_to_le16('\\'): + case cpu_to_le16(':'): + case cpu_to_le16('*'): + case cpu_to_le16('?'): + case cpu_to_le16('"'): + case cpu_to_le16('<'): + case cpu_to_le16('>'): + case cpu_to_le16('|'): + #endif + case cpu_to_le16('/'): + case cpu_to_le16('\0'): + if (fix) + name[i] = replacement_char; + else + return false; + } + } + + if (name[num_chars - 1] == cpu_to_le16(' ') || + name[num_chars - 1] == cpu_to_le16('.')) + { + if (fix) + name[num_chars - 1] = replacement_char; + else + return false; + } + return true; +} + +/* + * dentry_calculate_extraction_path- + * + * Calculate the actual filename component at which a WIM dentry will be + * extracted, handling invalid filenames "properly". + * + * dentry->extraction_name usually will be set the same as dentry->file_name (on + * UNIX, converted into the platform's multibyte encoding). However, if the + * file name contains characters that are not valid on the current platform or + * has some other format that is not valid, leave dentry->extraction_name as + * NULL and clear dentry->needs_extraction to indicate that this dentry should + * not be extracted, unless the appropriate flag + * WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES is set in the extract flags, in + * which case a substitute filename will be created and set instead. + * + * Conflicts with case-insensitive names on Windows are handled similarly; see + * below. + */ +static int +dentry_calculate_extraction_path(struct wim_dentry *dentry, void *_args) +{ + struct apply_args *args = _args; + int ret; + + dentry->needs_extraction = 1; + + if (dentry == args->extract_root) + return 0; + + if (dentry_is_dot_or_dotdot(dentry)) { + /* WIM files shouldn't contain . or .. entries. But if they are + * there, don't attempt to extract them. */ + WARNING("Skipping extraction of unexpected . or .. file \"%"TS"\"", + dentry_full_path(dentry)); + goto skip_dentry; + } + +#ifdef __WIN32__ + struct wim_dentry *other; + list_for_each_entry(other, &dentry->case_insensitive_conflict_list, + case_insensitive_conflict_list) + { + if (other->needs_extraction) { + if (args->extract_flags & WIMLIB_EXTRACT_FLAG_ALL_CASE_CONFLICTS) + { + WARNING("\"%"TS"\" has the same case-insensitive " + "name as \"%"TS"\"; extracting dummy name instead", + dentry_full_path(dentry), + dentry_full_path(other)); + goto out_replace; + } else { + WARNING("Not extracting \"%"TS"\": has same case-insensitive " + "name as \"%"TS"\"", + dentry_full_path(dentry), + dentry_full_path(other)); + goto skip_dentry; + } + } + } +#endif + + if (file_name_valid(dentry->file_name, dentry->file_name_nbytes / 2, false)) { +#ifdef __WIN32__ + dentry->extraction_name = dentry->file_name; + dentry->extraction_name_nchars = dentry->file_name_nbytes / 2; + return 0; +#else + return utf16le_to_tstr(dentry->file_name, + dentry->file_name_nbytes, + &dentry->extraction_name, + &dentry->extraction_name_nchars); +#endif + } else { + if (args->extract_flags & WIMLIB_EXTRACT_FLAG_REPLACE_INVALID_FILENAMES) + { + WARNING("\"%"TS"\" has an invalid filename " + "that is not supported on this platform; " + "extracting dummy name instead", + dentry_full_path(dentry)); + goto out_replace; + } else { + WARNING("Not extracting \"%"TS"\": has an invalid filename " + "that is not supported on this platform", + dentry_full_path(dentry)); + goto skip_dentry; + } + } + +out_replace: + { + utf16lechar utf16_name_copy[dentry->file_name_nbytes / 2]; + + memcpy(utf16_name_copy, dentry->file_name, dentry->file_name_nbytes); + file_name_valid(utf16_name_copy, dentry->file_name_nbytes / 2, true); + + tchar *tchar_name; + size_t tchar_nchars; + #ifdef __WIN32__ + tchar_name = utf16_name_copy; + tchar_nchars = dentry->file_name_nbytes / 2; + #else + ret = utf16le_to_tstr(utf16_name_copy, + dentry->file_name_nbytes, + &tchar_name, &tchar_nchars); + if (ret) + return ret; + #endif + size_t fixed_name_num_chars = tchar_nchars; + tchar fixed_name[tchar_nchars + 50]; + size_t extraction_name_nbytes; + + tmemcpy(fixed_name, tchar_name, tchar_nchars); + fixed_name_num_chars += tsprintf(fixed_name + tchar_nchars, + T(" (invalid filename #%lu)"), + ++args->invalid_sequence); + dentry->extraction_name = memdup(fixed_name, 2 * fixed_name_num_chars + 2); + if (!dentry->extraction_name) + return WIMLIB_ERR_NOMEM; + dentry->extraction_name_nchars = fixed_name_num_chars; + } + return 0; +skip_dentry: + dentry->needs_extraction = 0; + dentry->not_extracted = 1; + return 0; +} + +static int +dentry_reset_needs_extraction(struct wim_dentry *dentry, void *_ignore) +{ + dentry->needs_extraction = 0; + dentry->not_extracted = 0; + dentry->is_win32_name = 0; + dentry->d_inode->i_visited = 0; + dentry->d_inode->i_dos_name_extracted = 0; + FREE(dentry->d_inode->i_extracted_file); + dentry->d_inode->i_extracted_file = NULL; + if ((void*)dentry->extraction_name != (void*)dentry->file_name) + FREE(dentry->extraction_name); + dentry->extraction_name = NULL; + return 0; +} + /* * extract_tree - Extract a file or directory tree from the currently selected * WIM image. @@ -454,7 +652,6 @@ extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target, args.extract_flags = extract_flags; args.progress_func = progress_func; args.target_nchars = tstrlen(target); - args.wim_source_path_nchars = tstrlen(wim_source_path); if (progress_func) { args.progress.extract.wimfile_name = wim->filename; @@ -490,9 +687,12 @@ extract_tree(WIMStruct *wim, const tchar *wim_source_path, const tchar *target, } args.extract_root = root; - ret = calculate_dentry_tree_full_paths(root); + /* Calculate the actual filename component of each extracted dentry, and + * in the process set the dentry->needs_extraction flag on dentries that + * will be extracted. */ + ret = for_dentry_in_tree(root, dentry_calculate_extraction_path, &args); if (ret) - goto out_ntfs_umount; + goto out_dentry_reset_needs_extraction; /* Build a list of the streams that need to be extracted */ find_streams_for_extraction(root, diff --git a/src/ntfs-3g_apply.c b/src/ntfs-3g_apply.c index 0bf7989b..ffe3a8b0 100644 --- a/src/ntfs-3g_apply.c +++ b/src/ntfs-3g_apply.c @@ -538,6 +538,10 @@ apply_dentry_ntfs(struct wim_dentry *dentry, void *arg) struct wim_dentry *other; int ret; + ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; + /* Treat the root dentry specially. */ if (dentry_is_root(dentry)) return apply_root_dentry_ntfs(dentry, vol, w, diff --git a/src/unix_apply.c b/src/unix_apply.c index 6b3ede19..b1d91482 100644 --- a/src/unix_apply.c +++ b/src/unix_apply.c @@ -95,8 +95,13 @@ unix_extract_regular_file_linked(struct wim_dentry *dentry, char *p; const char *p2; size_t i; + const struct wim_dentry *d; - num_path_components = get_num_path_components(dentry->_full_path) - 1; + num_path_components = 0; + for (d = dentry; d != args->extract_root; d = d->parent) + num_path_components++; + wimlib_assert(num_path_components > 0); + num_path_components--; num_output_dir_path_components = get_num_path_components(args->target); if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) { @@ -339,7 +344,7 @@ unix_extract_symlink(struct wim_dentry *dentry, if (ret <= 0) { ERROR("Could not read the symbolic link from dentry `%s'", - dentry->_full_path); + dentry_full_path(dentry)); return WIMLIB_ERR_INVALID_DENTRY; } target[args->target_realpath_len + ret] = '\0'; @@ -380,35 +385,33 @@ unix_extract_symlink(struct wim_dentry *dentry, } static int -unix_extract_directory(struct wim_dentry *dentry, const tchar *output_path, +unix_extract_directory(struct wim_dentry *dentry, const char *output_path, int extract_flags) { int ret; struct stat stbuf; - ret = tstat(output_path, &stbuf); + ret = stat(output_path, &stbuf); if (ret == 0) { if (S_ISDIR(stbuf.st_mode)) { goto dir_exists; } else { - ERROR("`%"TS"' is not a directory", output_path); + ERROR("\"%s\" is not a directory", output_path); return WIMLIB_ERR_MKDIR; } } else { if (errno != ENOENT) { - ERROR_WITH_ERRNO("Failed to stat `%"TS"'", output_path); + ERROR_WITH_ERRNO("Failed to stat \"%s\"", output_path); return WIMLIB_ERR_STAT; } } - if (tmkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) - { - ERROR_WITH_ERRNO("Cannot create directory `%"TS"'", output_path); + if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) { + ERROR_WITH_ERRNO("Cannot create directory \"%s\"", output_path); return WIMLIB_ERR_MKDIR; } dir_exists: ret = 0; -#ifndef __WIN32__ if (extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) { struct wimlib_unix_data unix_data; ret = inode_get_unix_data(dentry->d_inode, &unix_data, NULL); @@ -420,7 +423,6 @@ dir_exists: ret = dir_apply_unix_data(output_path, &unix_data, extract_flags); } -#endif return ret; } diff --git a/src/win32_apply.c b/src/win32_apply.c index 120ec210..ffee267c 100644 --- a/src/win32_apply.c +++ b/src/win32_apply.c @@ -1235,6 +1235,7 @@ win32_do_apply_dentry(const wchar_t *output_path, !(args->vol_flags & FILE_SUPPORTS_REPARSE_POINTS)) { WARNING("Not extracting reparse point \"%ls\"", output_path); + dentry->not_extracted = 1; } else { /* Create the file, directory, or reparse point, and extract the * data streams. */ @@ -1277,13 +1278,6 @@ win32_do_apply_dentry_timestamps(const wchar_t *path, HANDLE h; const struct wim_inode *inode = dentry->d_inode; - if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT && - !(args->vol_flags & FILE_SUPPORTS_REPARSE_POINTS)) - { - /* Skip reparse points not extracted */ - return 0; - } - /* Windows doesn't let you change the timestamps of the root directory * (at least on FAT, which is dumb but expected since FAT doesn't store * any metadata about the root directory...) */ -- 2.43.0