]> wimlib.net Git - wimlib/blobdiff - src/extract.c
Remove unused 'wim' argument to read_metadata_resource()
[wimlib] / src / extract.c
index c312429cc7e9555e48ee43e514d9269219e5b24e..39899c4094e2c5eb80b296bc93d6e09161123de0 100644 (file)
@@ -8,20 +8,18 @@
 /*
  * Copyright (C) 2012, 2013, 2014 Eric Biggers
  *
- * This file is part of wimlib, a library for working with WIM files.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
  *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
 /*
         WIMLIB_EXTRACT_FLAG_NO_PRESERVE_DIR_STRUCTURE  |       \
         WIMLIB_EXTRACT_FLAG_WIMBOOT)
 
+/* Send WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE or
+ * WIMLIB_PROGRESS_MSG_EXTRACT_METADATA.  */
+int
+do_file_extract_progress(struct apply_ctx *ctx, enum wimlib_progress_msg msg)
+{
+       ctx->count_until_file_progress = 500;  /* Arbitrary value to limit calls  */
+       return extract_progress(ctx, msg);
+}
+
+static int
+start_file_phase(struct apply_ctx *ctx, uint64_t end_file_count, enum wimlib_progress_msg msg)
+{
+       ctx->progress.extract.current_file_count = 0;
+       ctx->progress.extract.end_file_count = end_file_count;
+       return do_file_extract_progress(ctx, msg);
+}
+
+int
+start_file_structure_phase(struct apply_ctx *ctx, uint64_t end_file_count)
+{
+       return start_file_phase(ctx, end_file_count, WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE);
+}
+
+int
+start_file_metadata_phase(struct apply_ctx *ctx, uint64_t end_file_count)
+{
+       return start_file_phase(ctx, end_file_count, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA);
+}
+
+static int
+end_file_phase(struct apply_ctx *ctx, enum wimlib_progress_msg msg)
+{
+       ctx->progress.extract.current_file_count = ctx->progress.extract.end_file_count;
+       return do_file_extract_progress(ctx, msg);
+}
+
+int
+end_file_structure_phase(struct apply_ctx *ctx)
+{
+       return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE);
+}
+
+int
+end_file_metadata_phase(struct apply_ctx *ctx)
+{
+       return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA);
+}
+
 /* Check whether the extraction of a dentry should be skipped completely.  */
 static bool
 dentry_is_supported(struct wim_dentry *dentry,
@@ -217,7 +263,7 @@ load_streams_from_pipe(struct apply_ctx *ctx,
                        lte_unbind_wim_resource_spec(found_lte);
                        lte_bind_wim_resource_spec(needed_lte, rspec);
 
-                       ret = (*cbs->begin_stream)(needed_lte, 0,
+                       ret = (*cbs->begin_stream)(needed_lte,
                                                   cbs->begin_stream_ctx);
                        if (ret) {
                                lte_unbind_wim_resource_spec(needed_lte);
@@ -260,34 +306,79 @@ load_streams_from_pipe(struct apply_ctx *ctx,
        }
        ret = 0;
 out:
-       if (found_lte->resource_location != RESOURCE_IN_WIM)
+       if (found_lte && found_lte->resource_location != RESOURCE_IN_WIM)
                FREE(rspec);
        free_lookup_table_entry(found_lte);
        return ret;
 }
 
+/* Creates a temporary file opened for writing.  The open file descriptor is
+ * returned in @fd_ret and its name is returned in @name_ret (dynamically
+ * allocated).  */
+static int
+create_temporary_file(struct filedes *fd_ret, tchar **name_ret)
+{
+       tchar *name;
+       int open_flags;
+       int raw_fd;
+
+retry:
+       name = ttempnam(NULL, T("wimlib"));
+       if (!name) {
+               ERROR_WITH_ERRNO("Failed to create temporary filename");
+               return WIMLIB_ERR_NOMEM;
+       }
+
+       open_flags = O_WRONLY | O_CREAT | O_EXCL | O_BINARY;
+#ifdef __WIN32__
+       open_flags |= _O_SHORT_LIVED;
+#endif
+       raw_fd = topen(name, open_flags, 0600);
+
+       if (raw_fd < 0) {
+               if (errno == EEXIST) {
+                       FREE(name);
+                       goto retry;
+               }
+               ERROR_WITH_ERRNO("Failed to create temporary file "
+                                "\"%"TS"\"", name);
+               FREE(name);
+               return WIMLIB_ERR_OPEN;
+       }
+
+       filedes_init(fd_ret, raw_fd);
+       *name_ret = name;
+       return 0;
+}
+
 static int
-begin_extract_stream_with_progress(struct wim_lookup_table_entry *lte,
-                                  u32 flags, void *_ctx)
+begin_extract_stream_wrapper(struct wim_lookup_table_entry *lte, void *_ctx)
 {
        struct apply_ctx *ctx = _ctx;
 
        ctx->cur_stream = lte;
+       ctx->cur_stream_offset = 0;
 
-       return (*ctx->saved_cbs->begin_stream)(lte, flags,
-                                              ctx->saved_cbs->begin_stream_ctx);
+       if (unlikely(lte->out_refcnt > MAX_OPEN_STREAMS))
+               return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name);
+       else
+               return (*ctx->saved_cbs->begin_stream)(lte, ctx->saved_cbs->begin_stream_ctx);
 }
 
 static int
-consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
+extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx)
 {
        struct apply_ctx *ctx = _ctx;
        union wimlib_progress_info *progress = &ctx->progress;
        int ret;
 
+       ctx->cur_stream_offset += size;
+
        if (likely(ctx->supported_features.hard_links)) {
                progress->extract.completed_bytes +=
                        (u64)size * ctx->cur_stream->out_refcnt;
+               if (ctx->cur_stream_offset == ctx->cur_stream->size)
+                       progress->extract.completed_streams += ctx->cur_stream->out_refcnt;
        } else {
                const struct stream_owner *owners = stream_owners(ctx->cur_stream);
                for (u32 i = 0; i < ctx->cur_stream->out_refcnt; i++) {
@@ -299,6 +390,8 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
                                            d_extraction_alias_node)
                        {
                                progress->extract.completed_bytes += size;
+                               if (ctx->cur_stream_offset == ctx->cur_stream->size)
+                                       progress->extract.completed_streams++;
                        }
                }
        }
@@ -313,13 +406,117 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
                {
                        ctx->next_progress = UINT64_MAX;
                } else {
-                       ctx->next_progress += progress->extract.total_bytes / 128;
-                       if (ctx->next_progress > progress->extract.total_bytes)
+                       /* Send new message as soon as another 1/128 of the
+                        * total has been extracted.  (Arbitrary number.)  */
+                       ctx->next_progress =
+                               progress->extract.completed_bytes +
+                                       progress->extract.total_bytes / 128;
+
+                       /* ... Unless that would be more than 5000000 bytes, in
+                        * which case send the next after the next 5000000
+                        * bytes.  (Another arbitrary number.)  */
+                       if (progress->extract.completed_bytes + 5000000 <
+                           ctx->next_progress)
+                               ctx->next_progress =
+                                       progress->extract.completed_bytes + 5000000;
+
+                       /* ... But always send a message as soon as we're
+                        * completely done.  */
+                       if (progress->extract.total_bytes < ctx->next_progress)
                                ctx->next_progress = progress->extract.total_bytes;
                }
        }
-       return (*ctx->saved_cbs->consume_chunk)(chunk, size,
-                                               ctx->saved_cbs->consume_chunk_ctx);
+
+       if (unlikely(filedes_valid(&ctx->tmpfile_fd))) {
+               /* Just extracting to temporary file for now.  */
+               ret = full_write(&ctx->tmpfile_fd, chunk, size);
+               if (ret) {
+                       ERROR_WITH_ERRNO("Error writing data to "
+                                        "temporary file \"%"TS"\"",
+                                        ctx->tmpfile_name);
+               }
+               return ret;
+       } else {
+               return (*ctx->saved_cbs->consume_chunk)(chunk, size,
+                                                       ctx->saved_cbs->consume_chunk_ctx);
+       }
+}
+
+static int
+extract_from_tmpfile(const tchar *tmpfile_name, struct apply_ctx *ctx)
+{
+       struct wim_lookup_table_entry tmpfile_lte;
+       struct wim_lookup_table_entry *orig_lte = ctx->cur_stream;
+       const struct read_stream_list_callbacks *cbs = ctx->saved_cbs;
+       int ret;
+       const u32 orig_refcnt = orig_lte->out_refcnt;
+
+       BUILD_BUG_ON(MAX_OPEN_STREAMS < ARRAY_LEN(orig_lte->inline_stream_owners));
+
+       struct stream_owner *owners = orig_lte->stream_owners;
+
+       /* Copy the stream's data from the temporary file to each of its
+        * destinations.
+        *
+        * This is executed only in the very uncommon case that a
+        * single-instance stream is being extracted to more than
+        * MAX_OPEN_STREAMS locations!  */
+
+       memcpy(&tmpfile_lte, orig_lte, sizeof(struct wim_lookup_table_entry));
+       tmpfile_lte.resource_location = RESOURCE_IN_FILE_ON_DISK;
+       tmpfile_lte.file_on_disk = ctx->tmpfile_name;
+       ret = 0;
+       for (u32 i = 0; i < orig_refcnt; i++) {
+
+               /* Note: it usually doesn't matter whether we pass the original
+                * stream entry to callbacks provided by the extraction backend
+                * as opposed to the tmpfile stream entry, since they shouldn't
+                * actually read data from the stream other than through the
+                * read_stream_prefix() call below.  But for
+                * WIMLIB_EXTRACT_FLAG_WIMBOOT mode on Windows it does matter
+                * because it needs the original stream location in order to
+                * create the external backing reference.  */
+
+               orig_lte->out_refcnt = 1;
+               orig_lte->inline_stream_owners[0] = owners[i];
+
+               ret = (*cbs->begin_stream)(orig_lte, cbs->begin_stream_ctx);
+               if (ret)
+                       break;
+
+               /* Extra SHA-1 isn't necessary here, but it shouldn't hurt as
+                * this case is very rare anyway.  */
+               ret = extract_stream(&tmpfile_lte, tmpfile_lte.size,
+                                    cbs->consume_chunk,
+                                    cbs->consume_chunk_ctx);
+
+               ret = (*cbs->end_stream)(orig_lte, ret, cbs->end_stream_ctx);
+               if (ret)
+                       break;
+       }
+       FREE(owners);
+       orig_lte->out_refcnt = 0;
+       return ret;
+}
+
+static int
+end_extract_stream_wrapper(struct wim_lookup_table_entry *stream,
+                          int status, void *_ctx)
+{
+       struct apply_ctx *ctx = _ctx;
+
+       if (unlikely(filedes_valid(&ctx->tmpfile_fd))) {
+               filedes_close(&ctx->tmpfile_fd);
+               if (!status)
+                       status = extract_from_tmpfile(ctx->tmpfile_name, ctx);
+               filedes_invalidate(&ctx->tmpfile_fd);
+               tunlink(ctx->tmpfile_name);
+               FREE(ctx->tmpfile_name);
+               return status;
+       } else {
+               return (*ctx->saved_cbs->end_stream)(stream, status,
+                                                    ctx->saved_cbs->end_stream_ctx);
+       }
 }
 
 /*
@@ -332,30 +529,34 @@ consume_chunk_with_progress(const void *chunk, size_t size, void *_ctx)
  *
  * This also works if the WIM is being read from a pipe, whereas attempting to
  * read streams directly (e.g. with read_full_stream_into_buf()) will not.
+ *
+ * This also will split up streams that will need to be extracted to more than
+ * MAX_OPEN_STREAMS locations, as measured by the 'out_refcnt' of each stream.
+ * Therefore, the apply_operations implementation need not worry about running
+ * out of file descriptors, unless it might open more than one file descriptor
+ * per nominal destination (e.g. Win32 currently might because the destination
+ * file system might not support hard links).
  */
 int
 extract_stream_list(struct apply_ctx *ctx,
                    const struct read_stream_list_callbacks *cbs)
 {
        struct read_stream_list_callbacks wrapper_cbs = {
-               .begin_stream      = begin_extract_stream_with_progress,
+               .begin_stream      = begin_extract_stream_wrapper,
                .begin_stream_ctx  = ctx,
-               .consume_chunk     = consume_chunk_with_progress,
+               .consume_chunk     = extract_chunk_wrapper,
                .consume_chunk_ctx = ctx,
-               .end_stream        = cbs->end_stream,
-               .end_stream_ctx    = cbs->end_stream_ctx,
+               .end_stream        = end_extract_stream_wrapper,
+               .end_stream_ctx    = ctx,
        };
-       if (ctx->progfunc) {
-               ctx->saved_cbs = cbs;
-               cbs = &wrapper_cbs;
-       }
+       ctx->saved_cbs = cbs;
        if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
-               return load_streams_from_pipe(ctx, cbs);
+               return load_streams_from_pipe(ctx, &wrapper_cbs);
        } else {
                return read_stream_list(&ctx->stream_list,
                                        offsetof(struct wim_lookup_table_entry,
                                                 extraction_list),
-                                       cbs, VERIFY_STREAM_HASHES);
+                                       &wrapper_cbs, VERIFY_STREAM_HASHES);
        }
 }
 
@@ -536,6 +737,7 @@ destroy_dentry_list(struct list_head *dentry_list)
                inode = dentry->d_inode;
                dentry_reset_extraction_list_node(dentry);
                inode->i_visited = 0;
+               inode->i_can_externally_back = 0;
                if ((void *)dentry->d_extraction_name != (void *)dentry->file_name)
                        FREE(dentry->d_extraction_name);
                dentry->d_extraction_name = NULL;
@@ -612,12 +814,14 @@ dentry_calculate_extraction_name(struct wim_dentry *dentry,
        if (dentry_is_root(dentry))
                return 0;
 
+#ifdef WITH_NTFS_3G
        if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_NTFS) {
                dentry->d_extraction_name = dentry->file_name;
                dentry->d_extraction_name_nchars = dentry->file_name_nbytes /
                                                   sizeof(utf16lechar);
                return 0;
        }
+#endif
 
        if (!ctx->supported_features.case_sensitive_filenames) {
                struct wim_dentry *other;
@@ -819,7 +1023,7 @@ ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx,
                return 0;
 
        ctx->progress.extract.total_bytes += lte->size;
-       ctx->progress.extract.num_streams++;
+       ctx->progress.extract.total_streams++;
 
        if (inode->i_visited)
                return 0;
@@ -875,23 +1079,43 @@ ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx,
 }
 
 static int
-dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
+ref_unnamed_stream(struct wim_dentry *dentry, struct apply_ctx *ctx)
 {
        struct wim_inode *inode = dentry->d_inode;
        int ret;
+       u16 stream_idx;
+       struct wim_lookup_table_entry *stream;
 
-       /* The unnamed data stream will always be extracted, except in an
-        * unlikely case.  */
-       if (!inode_is_encrypted_directory(inode)) {
-               u16 stream_idx;
-               struct wim_lookup_table_entry *stream;
+       if (unlikely(inode_is_encrypted_directory(inode)))
+               return 0;
 
-               stream = inode_unnamed_stream_resolved(inode, &stream_idx);
-               ret = ref_stream(stream, stream_idx, dentry, ctx);
-               if (ret)
-                       return ret;
+       if (unlikely(ctx->apply_ops->will_externally_back)) {
+               ret = (*ctx->apply_ops->will_externally_back)(dentry, ctx);
+               if (ret >= 0) {
+                       if (ret) /* Error */
+                               return ret;
+                       /* Will externally back */
+                       return 0;
+               }
+               /* Won't externally back */
        }
 
+       stream = inode_unnamed_stream_resolved(inode, &stream_idx);
+       return ref_stream(stream, stream_idx, dentry, ctx);
+}
+
+static int
+dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
+{
+       struct wim_inode *inode = dentry->d_inode;
+       int ret;
+
+       /* The unnamed data stream will almost always be extracted, but there
+        * exist cases in which it won't be.  */
+       ret = ref_unnamed_stream(dentry, ctx);
+       if (ret)
+               return ret;
+
        /* Named data streams will be extracted only if supported in the current
         * extraction mode and volume, and to avoid complications, if not doing
         * a linked extraction.  */
@@ -1227,6 +1451,8 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
                ctx->progress.extract.target = target;
        }
        INIT_LIST_HEAD(&ctx->stream_list);
+       filedes_invalidate(&ctx->tmpfile_fd);
+       ctx->apply_ops = ops;
 
        ret = (*ops->get_supported_features)(target, &ctx->supported_features);
        if (ret)
@@ -1251,12 +1477,12 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
        if (ret)
                goto out_cleanup;
 
+       dentry_list_build_inode_alias_lists(&dentry_list);
+
        ret = dentry_list_ref_streams(&dentry_list, ctx);
        if (ret)
                goto out_cleanup;
 
-       dentry_list_build_inode_alias_lists(&dentry_list);
-
        if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
                /* When extracting from a pipe, the number of bytes of data to
                 * extract can't be determined in the normal way (examining the
@@ -1314,23 +1540,20 @@ out:
 static int
 mkdir_if_needed(const tchar *target)
 {
-       struct stat stbuf;
-       if (tstat(target, &stbuf)) {
-               if (errno == ENOENT) {
-                       if (tmkdir(target, 0755)) {
-                               ERROR_WITH_ERRNO("Failed to create directory "
-                                                "\"%"TS"\"", target);
-                               return WIMLIB_ERR_MKDIR;
-                       }
-               } else {
-                       ERROR_WITH_ERRNO("Failed to stat \"%"TS"\"", target);
-                       return WIMLIB_ERR_STAT;
-               }
-       } else if (!S_ISDIR(stbuf.st_mode)) {
-               ERROR("\"%"TS"\" is not a directory", target);
-               return WIMLIB_ERR_NOTDIR;
-       }
-       return 0;
+       if (!tmkdir(target, 0755))
+               return 0;
+
+       if (errno == EEXIST)
+               return 0;
+
+#ifdef __WIN32__
+       /* _wmkdir() fails with EACCES if called on a drive root directory.  */
+       if (errno == EACCES)
+               return 0;
+#endif
+
+       ERROR_WITH_ERRNO("Failed to create directory \"%"TS"\"", target);
+       return WIMLIB_ERR_MKDIR;
 }
 
 /* Make sure the extraction flags make sense, and update them if needed.  */
@@ -1361,12 +1584,16 @@ check_extract_flags(const WIMStruct *wim, int *extract_flags_p)
        }
 #endif
 
-#ifndef __WIN32__
        if (extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT) {
+#ifdef __WIN32__
+               if (!wim->filename)
+                       return WIMLIB_ERR_NO_FILENAME;
+#else
                ERROR("WIMBoot extraction is only supported on Windows!");
                return WIMLIB_ERR_UNSUPPORTED;
-       }
 #endif
+       }
+
 
        if ((extract_flags & (WIMLIB_EXTRACT_FLAG_RPFIX |
                              WIMLIB_EXTRACT_FLAG_NORPFIX |
@@ -1790,7 +2017,7 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd,
                if (i == image) {
                        /* Metadata resource is for the image being extracted.
                         * Parse it and save the metadata in memory.  */
-                       ret = read_metadata_resource(pwm, imd);
+                       ret = read_metadata_resource(imd);
                        if (ret)
                                goto out_wimlib_free;
                        imd->modified = 1;