Make generic extraction code aware of external backing and optimize on Win32 side
authorEric Biggers <ebiggers3@gmail.com>
Wed, 27 Aug 2014 03:48:00 +0000 (22:48 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Wed, 27 Aug 2014 03:48:03 +0000 (22:48 -0500)
include/wimlib/apply.h
src/extract.c
src/win32_apply.c

index 8a1bf87..ed86481 100644 (file)
@@ -33,6 +33,8 @@ struct wim_features {
 
 struct wim_lookup_table_entry;
 struct read_stream_list_callbacks;
+struct apply_operations;
+struct wim_dentry;
 
 struct apply_ctx {
        /* The WIMStruct from which files are being extracted from the currently
@@ -62,6 +64,7 @@ struct apply_ctx {
        struct wim_features supported_features;
 
        /* The members below should not be used outside of extract.c  */
+       const struct apply_operations *apply_ops;
        u64 next_progress;
        unsigned long invalid_sequence;
        unsigned long num_streams_remaining;
@@ -127,14 +130,105 @@ extern int
 extract_stream_list(struct apply_ctx *ctx,
                    const struct read_stream_list_callbacks *cbs);
 
+/*
+ * Represents an extraction backend.
+ */
 struct apply_operations {
+
+       /* Name of the extraction backend.  */
        const char *name;
+
+       /*
+        * Query the features supported by the extraction backend.
+        *
+        * @target
+        *      The target string that was provided by the user.  (Often a
+        *      directory, but extraction backends are free to interpret this
+        *      differently.)
+        *
+        * @supported_features
+        *      A structure, each of whose members represents a feature that may
+        *      be supported by the extraction backend.  For each feature that
+        *      the extraction backend supports, this routine must set the
+        *      corresponding member to a nonzero value.
+        *
+        * Return 0 if successful; otherwise a positive wimlib error code.
+        */
        int (*get_supported_features)(const tchar *target,
                                      struct wim_features *supported_features);
 
+       /*
+        * Main extraction routine.
+        *
+        * The extraction backend is provided a list of dentries that have been
+        * prepared for extraction.  It is free to extract them in any way that
+        * it chooses.  Ideally, it should choose a method that maximizes
+        * performance.
+        *
+        * The target string will be provided in ctx->common.target.  This might
+        * be a directory, although extraction backends are free to interpret it
+        * as they wish.  TODO: in some cases, the common extraction code also
+        * interprets the target string.  This should be completely isolated to
+        * extraction backends.
+        *
+        * The extraction flags will be provided in ctx->common.extract_flags.
+        * Extraction backends should examine them and implement the behaviors
+        * for as many flags as possible.  Some flags are already handled by the
+        * common extraction code.  TODO: this needs to be better formalized.
+        *
+        * @dentry_list, the list of dentries, will be ordered such that the
+        * ancestor of any dentry always precedes any descendents.  Unless
+        * @single_tree_only is set, it's possible that the dentries consist of
+        * multiple disconnected trees.
+        *
+        * 'd_extraction_name' and 'd_extraction_name_nchars' of each dentry
+        * will be set to indicate the actual name with which the dentry should
+        * be extracted.  This may or may not be the same as 'file_name'.
+        * TODO: really, the extraction backends should be responsible for
+        * generating 'd_extraction_name'.
+        *
+        * Each dentry will refer to a valid inode in 'd_inode'.
+        * 'd_inode->i_extraction_aliases' will contain a list of just the
+        * dentries of that inode being extracted.  This will be a (possibly
+        * nonproper) subset of the 'd_inode->i_dentry' list.
+        *
+        * The streams required to be extracted will already be prepared in
+        * 'apply_ctx'.  The extraction backend should call
+        * extract_stream_list() to extract them.
+        *
+        * The will_extract_dentry() utility function, given an arbitrary dentry
+        * in the WIM image (which may not be in the extraction list), can be
+        * used to determine if that dentry is in the extraction list.
+        *
+        * Return 0 if successful; otherwise a positive wimlib error code.
+        */
        int (*extract)(struct list_head *dentry_list, struct apply_ctx *ctx);
 
+       /*
+        * Query whether the unnamed data stream of the specified file will be
+        * extracted as "externally backed".  If so, the extraction backend is
+        * assumed to handle this separately, and the common extraction code
+        * will not register a usage of that stream.
+        *
+        * This routine is optional.
+        *
+        * Return:
+        *      < 0 if the file will *not* be externally backed.
+        *      = 0 if the file will be externally backed.
+        *      > 0 (wimlib error code) if another error occurred.
+        */
+       int (*will_externally_back)(struct wim_dentry *dentry, struct apply_ctx *ctx);
+
+       /*
+        * Size of the backend-specific extraction context.  It must contain
+        * 'struct apply_ctx' as its first member.
+        */
        size_t context_size;
+
+       /*
+        * Set this if the extraction backend only supports extracting dentries
+        * that form a single tree, not multiple trees.
+        */
        bool single_tree_only;
 };
 
index 3b04704..caf56b4 100644 (file)
@@ -1041,23 +1041,43 @@ ref_stream(struct wim_lookup_table_entry *lte, u32 stream_idx,
 }
 
 static int
-dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
+ref_unnamed_stream(struct wim_dentry *dentry, struct apply_ctx *ctx)
 {
        struct wim_inode *inode = dentry->d_inode;
        int ret;
+       u16 stream_idx;
+       struct wim_lookup_table_entry *stream;
 
-       /* The unnamed data stream will always be extracted, except in an
-        * unlikely case.  */
-       if (!inode_is_encrypted_directory(inode)) {
-               u16 stream_idx;
-               struct wim_lookup_table_entry *stream;
+       if (unlikely(inode_is_encrypted_directory(inode)))
+               return 0;
 
-               stream = inode_unnamed_stream_resolved(inode, &stream_idx);
-               ret = ref_stream(stream, stream_idx, dentry, ctx);
-               if (ret)
-                       return ret;
+       if (unlikely(ctx->apply_ops->will_externally_back)) {
+               ret = (*ctx->apply_ops->will_externally_back)(dentry, ctx);
+               if (ret >= 0) {
+                       if (ret) /* Error */
+                               return ret;
+                       /* Will externally back */
+                       return 0;
+               }
+               /* Won't externally back */
        }
 
+       stream = inode_unnamed_stream_resolved(inode, &stream_idx);
+       return ref_stream(stream, stream_idx, dentry, ctx);
+}
+
+static int
+dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
+{
+       struct wim_inode *inode = dentry->d_inode;
+       int ret;
+
+       /* The unnamed data stream will almost always be extracted, but there
+        * exist cases in which it won't be.  */
+       ret = ref_unnamed_stream(dentry, ctx);
+       if (ret)
+               return ret;
+
        /* Named data streams will be extracted only if supported in the current
         * extraction mode and volume, and to avoid complications, if not doing
         * a linked extraction.  */
@@ -1394,6 +1414,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
        }
        INIT_LIST_HEAD(&ctx->stream_list);
        filedes_invalidate(&ctx->tmpfile_fd);
+       ctx->apply_ops = ops;
 
        ret = (*ops->get_supported_features)(target, &ctx->supported_features);
        if (ret)
index a6b37c4..03f2449 100644 (file)
@@ -312,6 +312,89 @@ in_prepopulate_list(struct wim_dentry *dentry,
                                  wcslen(dentry->_full_path), pats);
 }
 
+static const wchar_t *
+current_path(struct win32_apply_ctx *ctx);
+
+static void
+build_extraction_path(const struct wim_dentry *dentry,
+                     struct win32_apply_ctx *ctx);
+
+#define WIM_BACKING_NOT_ENABLED                -1
+#define WIM_BACKING_NOT_POSSIBLE       -2
+#define WIM_BACKING_EXCLUDED           -3
+
+/*
+ * Determines if the unnamed data stream of a file will be created as an
+ * external backing, as opposed to a standard extraction.
+ */
+static int
+win32_will_externally_back(struct wim_dentry *dentry, struct apply_ctx *_ctx)
+{
+       struct win32_apply_ctx *ctx = (struct win32_apply_ctx *)_ctx;
+       struct wim_lookup_table_entry *stream;
+       int ret;
+
+       if (!(ctx->common.extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT))
+               return WIM_BACKING_NOT_ENABLED;
+
+       if (dentry->d_inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
+                                            FILE_ATTRIBUTE_REPARSE_POINT |
+                                            FILE_ATTRIBUTE_ENCRYPTED))
+               return WIM_BACKING_NOT_POSSIBLE;
+
+       stream = inode_unnamed_lte_resolved(dentry->d_inode);
+
+       if (!stream ||
+           stream->resource_location != RESOURCE_IN_WIM ||
+           stream->rspec->wim != ctx->common.wim ||
+           stream->size != stream->rspec->uncompressed_size)
+               return WIM_BACKING_NOT_POSSIBLE;
+
+       ret = calculate_dentry_full_path(dentry);
+       if (ret)
+               return ret;
+
+       if (in_prepopulate_list(dentry, ctx))
+               return WIM_BACKING_EXCLUDED;
+
+       return 0;
+}
+
+static int
+set_external_backing(struct wim_dentry *dentry, struct win32_apply_ctx *ctx)
+{
+       int ret;
+
+       ret = win32_will_externally_back(dentry, &ctx->common);
+       if (ret > 0) /* Error.  */
+               return ret;
+
+       if (ret < 0 && ret != WIM_BACKING_EXCLUDED)
+               return 0; /* Not externally backing, other than due to exclusion.  */
+
+       build_extraction_path(dentry, ctx);
+
+       if (ret == WIM_BACKING_EXCLUDED) {
+               /* Not externally backing due to exclusion.  */
+               union wimlib_progress_info info;
+
+               info.wimboot_exclude.path_in_wim = dentry->_full_path;
+               info.wimboot_exclude.extraction_path = current_path(ctx);
+
+               return call_progress(ctx->common.progfunc,
+                                    WIMLIB_PROGRESS_MSG_WIMBOOT_EXCLUDE,
+                                    &info, ctx->common.progctx);
+       } else {
+               /* Externally backing.  */
+               return wimboot_set_pointer(&ctx->attr,
+                                          current_path(ctx),
+                                          inode_unnamed_lte_resolved(dentry->d_inode),
+                                          ctx->wimboot.data_source_id,
+                                          ctx->wimboot.wim_lookup_table_hash,
+                                          ctx->wimboot.wof_running);
+       }
+}
+
 /* Calculates the SHA-1 message digest of the WIM's lookup table.  */
 static int
 hash_lookup_table(WIMStruct *wim, u8 hash[SHA1_HASH_SIZE])
@@ -650,11 +733,11 @@ prepare_target(struct list_head *dentry_list, struct win32_apply_ctx *ctx)
 /* When creating an inode that will have a short (DOS) name, we create it using
  * the long name associated with the short name.  This ensures that the short
  * name gets associated with the correct long name.  */
-static const struct wim_dentry *
+static struct wim_dentry *
 first_extraction_alias(const struct wim_inode *inode)
 {
-       const struct list_head *next = inode->i_extraction_aliases.next;
-       const struct wim_dentry *dentry;
+       struct list_head *next = inode->i_extraction_aliases.next;
+       struct wim_dentry *dentry;
 
        do {
                dentry = list_entry(next, struct wim_dentry,
@@ -1323,7 +1406,7 @@ create_links(HANDLE h, const struct wim_dentry *first_dentry,
 static int
 create_nondirectory(const struct wim_inode *inode, struct win32_apply_ctx *ctx)
 {
-       const struct wim_dentry *first_dentry;
+       struct wim_dentry *first_dentry;
        HANDLE h;
        int ret;
 
@@ -1342,6 +1425,10 @@ create_nondirectory(const struct wim_inode *inode, struct win32_apply_ctx *ctx)
        if (!ret)
                ret = create_links(h, first_dentry, ctx);
 
+       /* "WIMBoot" extraction: set external backing by the WIM file if needed.  */
+       if (!ret && unlikely(ctx->common.extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT))
+               ret = set_external_backing(first_dentry, ctx);
+
        (*func_NtClose)(h);
        return ret;
 }
@@ -1461,42 +1548,6 @@ begin_extract_stream_instance(const struct wim_lookup_table_entry *stream,
                return prepare_data_buffer(ctx, stream->size);
        }
 
-       /* Extracting unnamed data stream in WIMBoot mode?  */
-       if (unlikely(ctx->common.extract_flags & WIMLIB_EXTRACT_FLAG_WIMBOOT)
-           && (stream_name_nchars == 0)
-           && (stream->resource_location == RESOURCE_IN_WIM)
-           && (stream->rspec->wim == ctx->common.wim)
-           && (stream->size == stream->rspec->uncompressed_size))
-       {
-               int ret = calculate_dentry_full_path(dentry);
-               if (ret)
-                       return ret;
-               if (in_prepopulate_list(dentry, ctx)) {
-                       union wimlib_progress_info info;
-
-                       info.wimboot_exclude.path_in_wim = dentry->_full_path;
-                       info.wimboot_exclude.extraction_path = current_path(ctx);
-
-                       ret = call_progress(ctx->common.progfunc,
-                                           WIMLIB_PROGRESS_MSG_WIMBOOT_EXCLUDE,
-                                           &info, ctx->common.progctx);
-                       FREE(dentry->_full_path);
-                       dentry->_full_path = NULL;
-                       if (ret)
-                               return ret;
-                       /* Go on and open the file for normal extraction.  */
-               } else {
-                       FREE(dentry->_full_path);
-                       dentry->_full_path = NULL;
-                       return wimboot_set_pointer(&ctx->attr,
-                                                  current_path(ctx),
-                                                  stream,
-                                                  ctx->wimboot.data_source_id,
-                                                  ctx->wimboot.wim_lookup_table_hash,
-                                                  ctx->wimboot.wof_running);
-               }
-       }
-
        if (ctx->num_open_handles == MAX_OPEN_STREAMS) {
                /* XXX: Fix this.  But because of the checks in
                 * extract_stream_list(), this can now only happen on a
@@ -2243,6 +2294,7 @@ const struct apply_operations win32_apply_ops = {
        .name                   = "Windows",
        .get_supported_features = win32_get_supported_features,
        .extract                = win32_extract,
+       .will_externally_back   = win32_will_externally_back,
        .context_size           = sizeof(struct win32_apply_ctx),
 };