]> wimlib.net Git - wimlib/blobdiff - src/extract_image.c
implement WIMLIB_INIT_FLAG_ASSUME_UTF8
[wimlib] / src / extract_image.c
index d637a10633410452e7b2372eb89bc90bb86bea57..fc24d9bd6df04e8c34ac1e892be210a189aeb19a 100644 (file)
 #endif
 
 #ifndef __WIN32__
+
+/* Returns the number of components of @path.  */
+static unsigned
+get_num_path_components(const char *path)
+{
+       unsigned num_components = 0;
+       while (*path) {
+               while (*path == '/')
+                       path++;
+               if (*path)
+                       num_components++;
+               while (*path && *path != '/')
+                       path++;
+       }
+       return num_components;
+}
+
+static const char *
+path_next_part(const char *path)
+{
+       while (*path && *path != '/')
+               path++;
+       while (*path && *path == '/')
+               path++;
+       return path;
+}
+
 static int
 extract_regular_file_linked(struct wim_dentry *dentry,
-                           const mbchar *output_path,
+                           const char *output_path,
                            struct apply_args *args,
                            struct wim_lookup_table_entry *lte)
 {
@@ -80,14 +107,12 @@ extract_regular_file_linked(struct wim_dentry *dentry,
                int num_path_components;
                int num_output_dir_path_components;
                size_t extracted_file_len;
-               mbchar *p;
-               const mbchar *p2;
+               char *p;
+               const char *p2;
                size_t i;
 
-               num_path_components =
-                       get_num_path_components(dentry->full_path) - 1;
-               num_output_dir_path_components =
-                       get_num_path_components(args->target);
+               num_path_components = get_num_path_components(output_path) - 1;
+               num_output_dir_path_components = get_num_path_components(args->target);
 
                if (args->extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
                        num_path_components++;
@@ -95,7 +120,7 @@ extract_regular_file_linked(struct wim_dentry *dentry,
                }
                extracted_file_len = strlen(lte->extracted_file);
 
-               mbchar buf[extracted_file_len + 3 * num_path_components + 1];
+               char buf[extracted_file_len + 3 * num_path_components + 1];
                p = &buf[0];
 
                for (i = 0; i < num_path_components; i++) {
@@ -106,8 +131,10 @@ extract_regular_file_linked(struct wim_dentry *dentry,
                p2 = lte->extracted_file;
                while (*p2 == '/')
                        p2++;
-               while (num_output_dir_path_components--)
-                       p2 = path_next_part(p2, NULL);
+               while (num_output_dir_path_components > 0) {
+                       p2 = path_next_part(p2);
+                       num_output_dir_path_components--;
+               }
                strcpy(p, p2);
                if (symlink(buf, output_path) != 0) {
                        ERROR_WITH_ERRNO("Failed to symlink `%s' to `%s'",
@@ -119,7 +146,7 @@ extract_regular_file_linked(struct wim_dentry *dentry,
 }
 
 static int
-symlink_apply_unix_data(const mbchar *link,
+symlink_apply_unix_data(const char *link,
                        const struct wimlib_unix_data *unix_data)
 {
        if (lchown(link, unix_data->uid, unix_data->gid)) {
@@ -160,7 +187,7 @@ fd_apply_unix_data(int fd, const struct wimlib_unix_data *unix_data)
 }
 
 static int
-dir_apply_unix_data(const mbchar *dir, const struct wimlib_unix_data *unix_data)
+dir_apply_unix_data(const char *dir, const struct wimlib_unix_data *unix_data)
 {
        int dfd = open(dir, O_RDONLY);
        int ret;
@@ -180,7 +207,7 @@ dir_apply_unix_data(const mbchar *dir, const struct wimlib_unix_data *unix_data)
 static int
 extract_regular_file_unlinked(struct wim_dentry *dentry,
                              struct apply_args *args,
-                             const mbchar *output_path,
+                             const char *output_path,
                              struct wim_lookup_table_entry *lte)
 {
        /* Normal mode of extraction.  Regular files and hard links are
@@ -239,7 +266,7 @@ extract_regular_file_unlinked(struct wim_dentry *dentry,
        }
 
        ret = extract_wim_resource_to_fd(lte, out_fd, wim_resource_size(lte));
-       if (ret != 0) {
+       if (ret) {
                ERROR("Failed to extract resource to `%s'", output_path);
                goto out;
        }
@@ -271,7 +298,7 @@ out:
 static int
 extract_regular_file(struct wim_dentry *dentry,
                     struct apply_args *args,
-                    const mbchar *output_path)
+                    const char *output_path)
 {
        struct wim_lookup_table_entry *lte;
        const struct wim_inode *inode = dentry->d_inode;
@@ -295,16 +322,16 @@ extract_regular_file(struct wim_dentry *dentry,
 static int
 extract_symlink(struct wim_dentry *dentry,
                struct apply_args *args,
-               const mbchar *output_path)
+               const char *output_path)
 {
-       mbchar target[4096];
+       char target[4096];
        ssize_t ret = inode_readlink(dentry->d_inode, target,
-                                    sizeof(target), args->w, 0);
+                                    sizeof(target), args->w, false);
        struct wim_lookup_table_entry *lte;
 
        if (ret <= 0) {
                ERROR("Could not read the symbolic link from dentry `%s'",
-                     dentry->full_path);
+                     dentry_full_path(dentry));
                return WIMLIB_ERR_INVALID_DENTRY;
        }
        ret = symlink(target, output_path);
@@ -335,31 +362,31 @@ extract_symlink(struct wim_dentry *dentry,
 
 static int
 extract_directory(struct wim_dentry *dentry,
-                 const mbchar *output_path, bool is_root)
+                 const tchar *output_path, bool is_root)
 {
        int ret;
        struct stat stbuf;
 
-       ret = stat(output_path, &stbuf);
+       ret = tstat(output_path, &stbuf);
        if (ret == 0) {
                if (S_ISDIR(stbuf.st_mode)) {
                        /*if (!is_root)*/
                                /*WARNING("`%s' already exists", output_path);*/
                        goto dir_exists;
                } else {
-                       ERROR("`%s' is not a directory", output_path);
+                       ERROR("`%"TS"' is not a directory", output_path);
                        return WIMLIB_ERR_MKDIR;
                }
        } else {
                if (errno != ENOENT) {
-                       ERROR_WITH_ERRNO("Failed to stat `%s'", output_path);
+                       ERROR_WITH_ERRNO("Failed to stat `%"TS"'", output_path);
                        return WIMLIB_ERR_STAT;
                }
        }
 
-       if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))
+       if (tmkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH))
        {
-               ERROR_WITH_ERRNO("Cannot create directory `%s'", output_path);
+               ERROR_WITH_ERRNO("Cannot create directory `%"TS"'", output_path);
                return WIMLIB_ERR_MKDIR;
        }
 dir_exists:
@@ -380,7 +407,7 @@ dir_exists:
 }
 
 #ifndef __WIN32__
-static int unix_do_apply_dentry(const mbchar *output_path,
+static int unix_do_apply_dentry(const char *output_path,
                                size_t output_path_len,
                                struct wim_dentry *dentry,
                                struct apply_args *args)
@@ -398,7 +425,7 @@ static int unix_do_apply_dentry(const mbchar *output_path,
 }
 
 static int
-unix_do_apply_dentry_timestamps(const mbchar *output_path,
+unix_do_apply_dentry_timestamps(const char *output_path,
                                size_t output_path_len,
                                const struct wim_dentry *dentry,
                                struct apply_args *args)
@@ -463,18 +490,20 @@ static int
 apply_dentry_normal(struct wim_dentry *dentry, void *arg)
 {
        struct apply_args *args = arg;
+       tchar *output_path;
        size_t len;
-       mbchar *output_path;
+       int ret;
 
-       len = strlen(args->target);
+       len = tstrlen(args->target);
        if (dentry_is_root(dentry)) {
-               output_path = (mbchar*)args->target;
+               output_path = (tchar*)args->target;
        } else {
-               output_path = alloca(len + dentry->full_path_nbytes + 1);
-               memcpy(output_path, args->target, len);
-               memcpy(output_path + len, dentry->full_path, dentry->full_path_nbytes);
-               output_path[len + dentry->full_path_nbytes] = '\0';
-               len += dentry->full_path_nbytes;
+               output_path = alloca(len * sizeof(tchar) + dentry->full_path_nbytes +
+                                    sizeof(tchar));
+               memcpy(output_path, args->target, len * sizeof(tchar));
+               memcpy(output_path + len, dentry->_full_path, dentry->full_path_nbytes);
+               len += dentry->full_path_nbytes / sizeof(tchar);
+               output_path[len] = T('\0');
        }
 #ifdef __WIN32__
        return win32_do_apply_dentry(output_path, len, dentry, args);
@@ -490,19 +519,23 @@ apply_dentry_timestamps_normal(struct wim_dentry *dentry, void *arg)
 {
        struct apply_args *args = arg;
        size_t len;
-       mbchar *output_path;
+       tchar *output_path;
 
-       len = strlen(args->target);
+       len = tstrlen(args->target);
        if (dentry_is_root(dentry)) {
-               output_path = (mbchar*)args->target;
+               output_path = (tchar*)args->target;
        } else {
-               output_path = alloca(len + dentry->full_path_nbytes + 1);
-               memcpy(output_path, args->target, len);
-               memcpy(output_path + len, dentry->full_path, dentry->full_path_nbytes);
-               output_path[len + dentry->full_path_nbytes] = '\0';
-               len += dentry->full_path_nbytes;
+               if (!dentry_full_path(dentry))
+                       return WIMLIB_ERR_NOMEM;
+               output_path = alloca(len * sizeof(tchar) + dentry->full_path_nbytes +
+                                    sizeof(tchar));
+               memcpy(output_path, args->target, len * sizeof(tchar));
+               memcpy(output_path + len, dentry->_full_path, dentry->full_path_nbytes);
+               len += dentry->full_path_nbytes / sizeof(tchar);
+               output_path[len] = T('\0');
        }
 
+
 #ifdef __WIN32__
        return win32_do_apply_dentry_timestamps(output_path, len, dentry, args);
 #else
@@ -521,13 +554,17 @@ maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
        if (dentry->is_extracted)
                return 0;
 
+       ret = calculate_dentry_full_path(dentry);
+       if (ret)
+               return ret;
+
        if (args->extract_flags & WIMLIB_EXTRACT_FLAG_NO_STREAMS)
                if (inode_unnamed_lte_resolved(dentry->d_inode))
                        return 0;
 
        if ((args->extract_flags & WIMLIB_EXTRACT_FLAG_VERBOSE) &&
             args->progress_func) {
-               args->progress.extract.cur_path = dentry->full_path;
+               args->progress.extract.cur_path = dentry->_full_path;
                args->progress_func(WIMLIB_PROGRESS_MSG_EXTRACT_DENTRY,
                                    &args->progress);
        }
@@ -537,54 +574,6 @@ maybe_apply_dentry(struct wim_dentry *dentry, void *arg)
        return ret;
 }
 
-static int
-cmp_streams_by_wim_position(const void *p1, const void *p2)
-{
-       const struct wim_lookup_table_entry *lte1, *lte2;
-       lte1 = *(const struct wim_lookup_table_entry**)p1;
-       lte2 = *(const struct wim_lookup_table_entry**)p2;
-       if (lte1->resource_entry.offset < lte2->resource_entry.offset)
-               return -1;
-       else if (lte1->resource_entry.offset > lte2->resource_entry.offset)
-               return 1;
-       else
-               return 0;
-}
-
-static int
-sort_stream_list_by_wim_position(struct list_head *stream_list)
-{
-       struct list_head *cur;
-       size_t num_streams;
-       struct wim_lookup_table_entry **array;
-       size_t i;
-       size_t array_size;
-
-       num_streams = 0;
-       list_for_each(cur, stream_list)
-               num_streams++;
-       array_size = num_streams * sizeof(array[0]);
-       array = MALLOC(array_size);
-       if (!array) {
-               ERROR("Failed to allocate %zu bytes to sort stream entries",
-                     array_size);
-               return WIMLIB_ERR_NOMEM;
-       }
-       cur = stream_list->next;
-       for (i = 0; i < num_streams; i++) {
-               array[i] = container_of(cur, struct wim_lookup_table_entry, staging_list);
-               cur = cur->next;
-       }
-
-       qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
-
-       INIT_LIST_HEAD(stream_list);
-       for (i = 0; i < num_streams; i++)
-               list_add_tail(&array[i]->staging_list, stream_list);
-       FREE(array);
-       return 0;
-}
-
 static void
 calculate_bytes_to_extract(struct list_head *stream_list,
                           int extract_flags,
@@ -595,7 +584,7 @@ calculate_bytes_to_extract(struct list_head *stream_list,
        u64 num_streams = 0;
 
        /* For each stream to be extracted... */
-       list_for_each_entry(lte, stream_list, staging_list) {
+       list_for_each_entry(lte, stream_list, extraction_list) {
                if (extract_flags &
                    (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
                {
@@ -623,7 +612,7 @@ maybe_add_stream_for_extraction(struct wim_lookup_table_entry *lte,
 {
        if (++lte->out_refcnt == 1) {
                INIT_LIST_HEAD(&lte->inode_list);
-               list_add_tail(&lte->staging_list, stream_list);
+               list_add_tail(&lte->extraction_list, stream_list);
        }
 }
 
@@ -662,18 +651,17 @@ inode_find_streams_for_extraction(struct wim_inode *inode,
 }
 
 static void
-find_streams_for_extraction(struct hlist_head *inode_list,
+find_streams_for_extraction(struct wim_image_metadata *imd,
                            struct list_head *stream_list,
                            struct wim_lookup_table *lookup_table,
                            int extract_flags)
 {
        struct wim_inode *inode;
-       struct hlist_node *cur;
        struct wim_dentry *dentry;
 
        for_lookup_table_entry(lookup_table, lte_zero_out_refcnt, NULL);
        INIT_LIST_HEAD(stream_list);
-       hlist_for_each_entry(inode, cur, inode_list, i_hlist) {
+       image_for_each_inode(inode, imd) {
                if (!inode->i_resolved)
                        inode_resolve_ltes(inode, lookup_table);
                inode_for_each_dentry(dentry, inode)
@@ -722,7 +710,7 @@ apply_stream_list(struct list_head *stream_list,
         * sequential reading of the WIM can be implemented. */
 
        /* For each distinct stream to be extracted */
-       list_for_each_entry(lte, stream_list, staging_list) {
+       list_for_each_entry(lte, stream_list, extraction_list) {
                /* For each inode that contains the stream */
                list_for_each_entry(inode, &lte->inode_list, i_lte_inode_list) {
                        /* For each dentry that points to the inode */
@@ -754,25 +742,59 @@ apply_stream_list(struct list_head *stream_list,
        return 0;
 }
 
+static int
+sort_stream_list_by_wim_position(struct list_head *stream_list)
+{
+       struct list_head *cur;
+       size_t num_streams;
+       struct wim_lookup_table_entry **array;
+       size_t i;
+       size_t array_size;
+
+       num_streams = 0;
+       list_for_each(cur, stream_list)
+               num_streams++;
+       array_size = num_streams * sizeof(array[0]);
+       array = MALLOC(array_size);
+       if (!array) {
+               ERROR("Failed to allocate %zu bytes to sort stream entries",
+                     array_size);
+               return WIMLIB_ERR_NOMEM;
+       }
+       cur = stream_list->next;
+       for (i = 0; i < num_streams; i++) {
+               array[i] = container_of(cur, struct wim_lookup_table_entry, extraction_list);
+               cur = cur->next;
+       }
+
+       qsort(array, num_streams, sizeof(array[0]), cmp_streams_by_wim_position);
+
+       INIT_LIST_HEAD(stream_list);
+       for (i = 0; i < num_streams; i++)
+               list_add_tail(&array[i]->extraction_list, stream_list);
+       FREE(array);
+       return 0;
+}
+
+
 /* Extracts the image @image from the WIM @w to the directory or NTFS volume
  * @target. */
 static int
 extract_single_image(WIMStruct *w, int image,
-                    const mbchar *target, int extract_flags,
+                    const tchar *target, int extract_flags,
                     wimlib_progress_func_t progress_func)
 {
        int ret;
        struct list_head stream_list;
-       struct hlist_head *inode_list;
 
        struct apply_args args;
        const struct apply_operations *ops;
 
+       memset(&args, 0, sizeof(args));
+
        args.w                  = w;
        args.target             = target;
        args.extract_flags      = extract_flags;
-       args.num_utime_warnings = 0;
-       args.stream_list        = &stream_list;
        args.progress_func      = progress_func;
 
        if (progress_func) {
@@ -788,7 +810,8 @@ extract_single_image(WIMStruct *w, int image,
        if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
                args.vol = ntfs_mount(target, 0);
                if (!args.vol) {
-                       ERROR_WITH_ERRNO("Failed to mount NTFS volume `%s'", target);
+                       ERROR_WITH_ERRNO("Failed to mount NTFS volume `%"TS"'",
+                                        target);
                        return WIMLIB_ERR_NTFS_3G;
                }
                ops = &ntfs_apply_operations;
@@ -797,13 +820,12 @@ extract_single_image(WIMStruct *w, int image,
                ops = &normal_apply_operations;
 
        ret = select_wim_image(w, image);
-       if (ret != 0)
+       if (ret)
                goto out;
 
-       inode_list = &w->image_metadata[image - 1].inode_list;
-
        /* Build a list of the streams that need to be extracted */
-       find_streams_for_extraction(inode_list, &stream_list,
+       find_streams_for_extraction(wim_get_current_image_metadata(w),
+                                   &stream_list,
                                    w->lookup_table, extract_flags);
 
        /* Calculate the number of bytes of data that will be extracted */
@@ -869,7 +891,8 @@ out:
        /* Unmount the NTFS volume */
        if (extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
                if (ntfs_umount(args.vol, FALSE) != 0) {
-                       ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%s'", args.target);
+                       ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%"TS"'",
+                                        args.target);
                        if (ret == 0)
                                ret = WIMLIB_ERR_NTFS_3G;
                }
@@ -878,39 +901,53 @@ out:
        return ret;
 }
 
+static const tchar *filename_forbidden_chars =
+T(
+#ifdef __WIN32__
+"<>:\"/\\|?*"
+#else
+"/"
+#endif
+);
+
+/* This function checks if it is okay to use a WIM image's name as a directory
+ * name.  */
+static bool
+image_name_ok_as_dir(const tchar *image_name)
+{
+       return image_name && *image_name &&
+               !tstrpbrk(image_name, filename_forbidden_chars);
+}
 
 /* Extracts all images from the WIM to the directory @target, with the images
  * placed in subdirectories named by their image names. */
 static int
-extract_all_images(WIMStruct *w, const mbchar *target,
+extract_all_images(WIMStruct *w,
+                  const tchar *target,
                   int extract_flags,
                   wimlib_progress_func_t progress_func)
 {
        size_t image_name_max_len = max(xml_get_max_image_name_len(w), 20);
-       size_t output_path_len = strlen(target);
-       mbchar buf[output_path_len + 1 + image_name_max_len + 1];
+       size_t output_path_len = tstrlen(target);
+       tchar buf[output_path_len + 1 + image_name_max_len + 1];
        int ret;
        int image;
-       const utf8char *image_name;
+       const tchar *image_name;
 
        ret = extract_directory(NULL, target, true);
-       if (ret != 0)
+       if (ret)
                return ret;
 
-       memcpy(buf, target, output_path_len);
-       buf[output_path_len] = '/';
+       tmemcpy(buf, target, output_path_len);
+       buf[output_path_len] = T('/');
        for (image = 1; image <= w->hdr.image_count; image++) {
                image_name = wimlib_get_image_name(w, image);
-               if (image_name && *image_name &&
-                   (wimlib_mbs_is_utf8 || !utf8_str_contains_nonascii_chars(image_name))
-                   && strchr(image_name, '/') == NULL)
-               {
-                       strcpy(buf + output_path_len + 1, image_name);
+               if (image_name_ok_as_dir(image_name)) {
+                       tstrcpy(buf + output_path_len + 1, image_name);
                } else {
-                       /* Image name is empty, or may not be representable in
-                        * the current locale, or contains path separators.  Use
-                        * the image number instead. */
-                       sprintf(buf + output_path_len + 1, "%d", image);
+                       /* Image name is empty, or contains forbidden
+                        * characters. */
+                       tsprintf(buf + output_path_len + 1, T("%d"), image);
                }
                ret = extract_single_image(w, image, buf, extract_flags,
                                           progress_func);
@@ -922,13 +959,14 @@ extract_all_images(WIMStruct *w, const mbchar *target,
 
 /* Extracts a single image or all images from a WIM file to a directory or NTFS
  * volume. */
-WIMLIBAPI int wimlib_extract_image(WIMStruct *w,
-                                  int image,
-                                  const char *target,
-                                  int extract_flags,
-                                  WIMStruct **additional_swms,
-                                  unsigned num_additional_swms,
-                                  wimlib_progress_func_t progress_func)
+WIMLIBAPI int
+wimlib_extract_image(WIMStruct *w,
+                    int image,
+                    const tchar *target,
+                    int extract_flags,
+                    WIMStruct **additional_swms,
+                    unsigned num_additional_swms,
+                    wimlib_progress_func_t progress_func)
 {
        struct wim_lookup_table *joined_tab, *w_tab_save;
        int ret;
@@ -977,13 +1015,17 @@ WIMLIBAPI int wimlib_extract_image(WIMStruct *w,
        }
 
        ret = verify_swm_set(w, additional_swms, num_additional_swms);
-       if (ret != 0)
+       if (ret)
+               return ret;
+
+       ret = wim_checksum_unhashed_streams(w);
+       if (ret)
                return ret;
 
        if (num_additional_swms) {
                ret = new_joined_lookup_table(w, additional_swms,
                                              num_additional_swms, &joined_tab);
-               if (ret != 0)
+               if (ret)
                        return ret;
                w_tab_save = w->lookup_table;
                w->lookup_table = joined_tab;