wimlib-imagex, wimlib: Optimize appending new image as update of existing
authorEric Biggers <ebiggers3@gmail.com>
Mon, 19 Aug 2013 04:07:14 +0000 (23:07 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 19 Aug 2013 04:08:04 +0000 (23:08 -0500)
debian/wimlib9.install [moved from debian/wimlib7.install with 100% similarity]
doc/imagex-capture.1.in
include/wimlib.h
include/wimlib/dentry.h
programs/imagex.c
src/dentry.c
src/extract.c

index 14522e7..e12c8bf 100644 (file)
@@ -393,6 +393,22 @@ Ensure the resulting WIM is in the normal, non-pipable WIM format.  This is the
 default for \fB@IMAGEX_PROGNAME@ capture\fR, except when writing to standard
 output (\fIWIMFILE\fR specified as "-"), and also for \fB@IMAGEX_PROGNAME@
 append\fR, except when appending to a WIM that is already pipable.
+.TP
+\fB--delta-from\fR=\fIIMAGE\fR
+Only for \fB@IMAGEX_PROGNAME@ append\fR: declares that the new image being added
+from \fISOURCE\fR is mostly the same as the existing image \fIIMAGE\fR in
+\fIWIMFILE\fR, but captured at a later point in time, possibly with some
+modifications in the intervening time.  This is designed to be used in
+incremental backups of the same filesystem or directory tree.  \fIIMAGE\fR can
+be a 1-based index or name of an existing image in \fIWIMFILE\fR.  It can also
+be a negative integer to index backwards into the images (e.g.  -1 means the
+last existing image).
+.IP ""
+When this option is provided, the append of the new image will be optimized by
+not reading files that, based on metadata such as timestamps, appear not to have
+been modified since they were archived in the existing \fIIMAGE\fR.  Barring
+manipulation of timestamps, this option only affects performance and does not
+change the resulting WIM file.
 .SH NOTES
 \fB@IMAGEX_PROGNAME@ append\fR does not support appending an image to a split WIM.
 .PP
index f938add..3d8b317 100644 (file)
@@ -2625,6 +2625,70 @@ extern int
 wimlib_print_metadata(WIMStruct *wim, int image) _wimlib_deprecated;
 
 /**
+ * Declares that a newly added image is mostly the same as a prior image, but
+ * captured at a later point in time, possibly with some modifications in the
+ * intervening time.  This is designed to be used in incremental backups of the
+ * same filesystem or directory tree.
+ *
+ * This function compares the directory tree of the newly added image against
+ * that of the old image.  Any files that are present in both the newly added
+ * image and the old image and have timestamps that indicate they haven't been
+ * modified are deemed not to have been modified.  Such files will not be read
+ * from the filesystem when the WIM is being written or overwritten.  Note that
+ * these unchanged files will still be "archived" and will be logically present
+ * in the new image; the optimization is that they don't need to actually be
+ * read from the filesystem because the WIM already contains them.
+ *
+ * This function is provided to optimize incremental backups.  The resulting WIM
+ * file will still be the same regardless of whether this function is called.
+ * (This is, however, assuming that timestamps have not been manipulated or
+ * unmaintained as to trick this function into thinking a file has not been
+ * modified when really it has.  To partly guard against such cases, other
+ * metadata such as file sizes will be checked as well.)
+ *
+ * This function must be called after adding the new image (e.g. with
+ * wimlib_add_image()), but before writing the updated WIM file (e.g. with
+ * wimlib_overwrite()).
+ *
+ * @p wim
+ *     Pointer to the ::WIMStruct for a WIM.
+ * @p new_image
+ *     1-based index in the WIM of the newly added image.  This image can have
+ *     been added with wimlib_add_image() or wimlib_add_image_multisource(), or
+ *     wimlib_add_empty_image() followed by wimlib_update_image().
+ * @p template_image
+ *     1-based index in the WIM of a template image that reflects a prior state
+ *     of the directory tree being captured.
+ * @p flags
+ *     Reserved; must be 0.
+ * @p progress_func
+ *     Currently ignored, but reserved for a function that will be called with
+ *     information about the operation.  Use NULL if no additional information
+ *     is desired.
+ *
+ * @return 0 on success; nonzero on error.
+ *
+ * @retval ::WIMLIB_ERR_INVALID_IMAGE
+ *     @p new_image and/or @p template_image were not a valid image indices in
+ *     the WIM.
+ * @retval ::WIMLIB_ERR_NOMEM
+ *     Failed to allocate needed memory.
+ * @retval ::WIMLIB_ERR_INVALID_PARAM
+ *     @p new_image was equal to @p template_image, or @p new_image specified
+ *     an image that had not been modified since opening the WIM.
+ *
+ * This function can additionally return ::WIMLIB_ERR_DECOMPRESSION,
+ * ::WIMLIB_ERR_INVALID_METADATA_RESOURCE, ::WIMLIB_ERR_NOMEM,
+ * ::WIMLIB_ERR_READ, or ::WIMLIB_ERR_UNEXPECTED_END_OF_FILE, all of which
+ * indicate failure (for different reasons) to read the metadata resource for
+ * the template image.
+ */
+extern int
+wimlib_reference_template_image(WIMStruct *wim, int new_image,
+                               int template_image, int flags,
+                               wimlib_progress_func_t progress_func);
+
+/**
  * Translates a string specifying the name or number of an image in the WIM into
  * the number of the image.  The images are numbered starting at 1.
  *
index 05b7e49..2a40ef4 100644 (file)
@@ -492,6 +492,9 @@ new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret);
 extern int
 new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret);
 
+extern void
+dentry_tree_clear_inode_visited(struct wim_dentry *root);
+
 extern int
 new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret);
 
index d6479b4..be4b910 100644 (file)
@@ -125,6 +125,7 @@ enum {
        IMAGEX_COMPRESS_OPTION,
        IMAGEX_CONFIG_OPTION,
        IMAGEX_DEBUG_OPTION,
+       IMAGEX_DELTA_FROM_OPTION,
        IMAGEX_DEREFERENCE_OPTION,
        IMAGEX_DEST_DIR_OPTION,
        IMAGEX_EXTRACT_XML_OPTION,
@@ -201,6 +202,7 @@ static const struct option capture_or_append_options[] = {
        {T("norpfix"),     no_argument,       NULL, IMAGEX_NORPFIX_OPTION},
        {T("pipable"),     no_argument,       NULL, IMAGEX_PIPABLE_OPTION},
        {T("not-pipable"), no_argument,       NULL, IMAGEX_NOT_PIPABLE_OPTION},
+       {T("delta-from"),  required_argument, NULL, IMAGEX_DELTA_FROM_OPTION},
        {NULL, 0, NULL, 0},
 };
 
@@ -1625,6 +1627,8 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
        const tchar *name;
        const tchar *desc;
        const tchar *flags_element = NULL;
+       const tchar *template_image_name_or_num = NULL;
+       int template_image;
        WIMStruct *wim;
        int ret;
        unsigned num_threads = 0;
@@ -1706,6 +1710,14 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
                case IMAGEX_NOT_PIPABLE_OPTION:
                        write_flags |= WIMLIB_WRITE_FLAG_NOT_PIPABLE;
                        break;
+               case IMAGEX_DELTA_FROM_OPTION:
+                       if (cmd == CMD_CAPTURE) {
+                               imagex_error(T("--delta-from=IMAGE is only "
+                                              "valid for append."));
+                               goto out_usage;
+                       }
+                       template_image_name_or_num = optarg;
+                       break;
                default:
                        goto out_usage;
                }
@@ -1854,6 +1866,30 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
                        tsprintf(name_end, T(" (%lu)"), conflict_idx);
                }
        }
+
+       if (template_image_name_or_num) {
+               template_image = wimlib_resolve_image(wim, template_image_name_or_num);
+               if (template_image_name_or_num[0] == T('-')) {
+                       tchar *tmp;
+                       unsigned long n;
+                       struct wimlib_wim_info info;
+
+                       wimlib_get_wim_info(wim, &info);
+                       n = tstrtoul(template_image_name_or_num + 1, &tmp, 10);
+                       if (n >= 1 && n <= info.image_count &&
+                           *tmp == T('\0') &&
+                           tmp != template_image_name_or_num + 1)
+                       {
+                               template_image = info.image_count - (n - 1);
+                       }
+               }
+               ret = verify_image_exists_and_is_single(template_image,
+                                                       template_image_name_or_num,
+                                                       wimfile);
+               if (ret)
+                       goto out_wimlib_free;
+       }
+
        ret = wimlib_add_image_multisource(wim,
                                           capture_sources,
                                           num_sources,
@@ -1864,10 +1900,11 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
        if (ret)
                goto out_wimlib_free;
 
-       if (desc || flags_element) {
-               /* User provided <DESCRIPTION> or <FLAGS> element.  Get the
-                * index of the image we just added, then use it to call the
-                * appropriate functions.  */
+       if (desc || flags_element || template_image_name_or_num) {
+               /* User provided <DESCRIPTION> or <FLAGS> element, or an image
+                * on which the added one is to be based has been specified with
+                * --delta-from=IMAGE.  Get the index of the image we just
+                *  added, then use it to call the appropriate functions.  */
                struct wimlib_wim_info info;
 
                wimlib_get_wim_info(wim, &info);
@@ -1886,6 +1923,15 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
                        if (ret)
                                goto out_wimlib_free;
                }
+
+               if (template_image_name_or_num) {
+                       ret = wimlib_reference_template_image(wim,
+                                                             info.image_count,
+                                                             template_image,
+                                                             0, NULL);
+                       if (ret)
+                               goto out_wimlib_free;
+               }
        }
 
        /* Write the new WIM or overwrite the existing WIM with the new image
@@ -3443,7 +3489,7 @@ T(
 "                    [--dereference] [--config=FILE] [--threads=NUM_THREADS]\n"
 "                    [--rebuild] [--unix-data] [--source-list] [--no-acls]\n"
 "                    [--strict-acls] [--rpfix] [--norpfix] [--pipable]\n"
-"                    [--not-pipable]\n"
+"                    [--not-pipable] [--delta-from=IMAGE]\n"
 ),
 [CMD_APPLY] =
 T(
@@ -3460,8 +3506,7 @@ T(
 "                    [--nocheck] [--compress=TYPE] [--flags EDITION_ID]\n"
 "                    [--verbose] [--dereference] [--config=FILE]\n"
 "                    [--threads=NUM_THREADS] [--unix-data] [--source-list]\n"
-"                    [--no-acls] [--strict-acls] [--rpfix] [--norpfix]\n"
-"                    [--pipable] [--not-pipable]\n"
+"                    [--no-acls] [--strict-acls] [--norpfix] [--pipable]\n"
 ),
 [CMD_DELETE] =
 T(
@@ -3536,24 +3581,25 @@ T(
 };
 
 static const tchar *invocation_name;
-static bool using_cmd_from_invocation_name = false;
+static int invocation_cmd = CMD_NONE;
 
 static const tchar *get_cmd_string(int cmd, bool nospace)
 {
-
-       if (using_cmd_from_invocation_name || cmd == CMD_NONE) {
-               return invocation_name;
+       static tchar buf[50];
+       if (cmd == CMD_NONE) {
+               tsprintf(buf, T("%"TS), T(IMAGEX_PROGNAME));
+       } else if (invocation_cmd != CMD_NONE) {
+               tsprintf(buf, T("wim%"TS), imagex_commands[cmd].name);
        } else {
                const tchar *format;
-               static tchar buf[50];
 
                if (nospace)
                        format = T("%"TS"-%"TS"");
                else
                        format = T("%"TS" %"TS"");
                tsprintf(buf, format, invocation_name, imagex_commands[cmd].name);
-               return buf;
        }
+       return buf;
 }
 
 static void
@@ -3694,7 +3740,7 @@ main(int argc, char **argv)
                        if (!tstrcmp(invocation_name + 3,
                                     imagex_commands[i].name))
                        {
-                               using_cmd_from_invocation_name = true;
+                               invocation_cmd = i;
                                cmd = i;
                                break;
                        }
index c9a84d7..ca9fe0b 100644 (file)
@@ -1087,6 +1087,19 @@ new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret)
 }
 
 static int
+dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore)
+{
+       dentry->d_inode->i_visited = 0;
+       return 0;
+}
+
+void
+dentry_tree_clear_inode_visited(struct wim_dentry *root)
+{
+       for_dentry_in_tree(root, dentry_clear_inode_visited, NULL);
+}
+
+static int
 init_ads_entry(struct wim_ads_entry *ads_entry, const void *name,
               size_t name_nbytes, bool is_utf16le)
 {
@@ -2486,3 +2499,125 @@ wimlib_iterate_dir_tree(WIMStruct *wim, int image, const tchar *path,
        wim->private = &ctx;
        return for_image(wim, image, image_do_iterate_dir_tree);
 }
+
+static bool
+inode_stream_sizes_consistent(const struct wim_inode *inode_1,
+                             const struct wim_inode *inode_2,
+                             const struct wim_lookup_table *lookup_table)
+{
+       if (inode_1->i_num_ads != inode_2->i_num_ads)
+               return false;
+       for (unsigned i = 0; i <= inode_1->i_num_ads; i++) {
+               const struct wim_lookup_table_entry *lte_1, *lte_2;
+
+               lte_1 = inode_stream_lte(inode_1, i, lookup_table);
+               lte_2 = inode_stream_lte(inode_2, i, lookup_table);
+               if (lte_1 && lte_2) {
+                       if (wim_resource_size(lte_1) != wim_resource_size(lte_2))
+                               return false;
+               } else if (lte_1 && wim_resource_size(lte_1)) {
+                       return false;
+               } else if (lte_2 && wim_resource_size(lte_2)) {
+                       return false;
+               }
+       }
+       return true;
+}
+
+static void
+inode_replace_ltes(struct wim_inode *inode,
+                  struct wim_inode *template_inode,
+                  struct wim_lookup_table *lookup_table)
+{
+       for (unsigned i = 0; i <= inode->i_num_ads; i++) {
+               struct wim_lookup_table_entry *lte, *lte_template;
+
+               lte = inode_stream_lte(inode, i, lookup_table);
+               if (lte) {
+                       for (unsigned j = 0; j < inode->i_nlink; j++)
+                               lte_decrement_refcnt(lte, lookup_table);
+                       lte_template = inode_stream_lte(template_inode, i,
+                                                       lookup_table);
+                       if (i == 0)
+                               inode->i_lte = lte_template;
+                       else
+                               inode->i_ads_entries[i - 1].lte = lte_template;
+                       if (lte_template)
+                               lte_template->refcnt += inode->i_nlink;
+               }
+       }
+       inode->i_resolved = 1;
+}
+
+static int
+dentry_reference_template(struct wim_dentry *dentry, void *_wim)
+{
+       int ret;
+       struct wim_dentry *template_dentry;
+       struct wim_inode *inode, *template_inode;
+       WIMStruct *wim = _wim;
+
+       if (dentry->d_inode->i_visited)
+               return 0;
+
+       ret = calculate_dentry_full_path(dentry);
+       if (ret)
+               return ret;
+
+       template_dentry = get_dentry(wim, dentry->_full_path);
+       if (!template_dentry) {
+               DEBUG("\"%"TS"\": newly added file", dentry->_full_path);
+               return 0;
+       }
+
+       inode = dentry->d_inode;
+       template_inode = template_dentry->d_inode;
+
+       if (inode->i_last_write_time == template_inode->i_last_write_time
+           && inode->i_creation_time == template_inode->i_creation_time
+           && inode->i_last_access_time >= template_inode->i_last_access_time
+           && inode_stream_sizes_consistent(inode, template_inode,
+                                            wim->lookup_table))
+       {
+               /*DEBUG("\"%"TS"\": No change detected", dentry->_full_path);*/
+               inode_replace_ltes(inode, template_inode, wim->lookup_table);
+               inode->i_visited = 1;
+       } else {
+               DEBUG("\"%"TS"\": change detected!", dentry->_full_path);
+       }
+       return 0;
+}
+
+/* API function documented in wimlib.h  */
+WIMLIBAPI int
+wimlib_reference_template_image(WIMStruct *wim, int new_image, int template_image,
+                               int flags, wimlib_progress_func_t progress_func)
+{
+       int ret;
+       struct wim_image_metadata *new_imd;
+
+       if (wim->hdr.part_number != 1)
+               return WIMLIB_ERR_SPLIT_UNSUPPORTED;
+
+       if (new_image < 1 || new_image > wim->hdr.image_count)
+               return WIMLIB_ERR_INVALID_IMAGE;
+
+       if (template_image < 1 || template_image > wim->hdr.image_count)
+               return WIMLIB_ERR_INVALID_IMAGE;
+
+       if (new_image == template_image)
+               return WIMLIB_ERR_INVALID_PARAM;
+
+       new_imd = wim->image_metadata[new_image - 1];
+       if (!new_imd->modified)
+               return WIMLIB_ERR_INVALID_PARAM;
+
+       ret = select_wim_image(wim, template_image);
+       if (ret)
+               return ret;
+
+       ret = for_dentry_in_tree(new_imd->root_dentry,
+                                dentry_reference_template, wim);
+       dentry_tree_clear_inode_visited(new_imd->root_dentry);
+       return ret;
+}
index 1784cdf..c04f11b 100644 (file)
@@ -1802,20 +1802,13 @@ dentry_tally_features(struct wim_dentry *dentry, void *_features)
        return 0;
 }
 
-static int
-dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore)
-{
-       dentry->d_inode->i_visited = 0;
-       return 0;
-}
-
 /* Tally the features necessary to extract a dentry tree.  */
 static void
 dentry_tree_get_features(struct wim_dentry *root, struct wim_features *features)
 {
        memset(features, 0, sizeof(struct wim_features));
        for_dentry_in_tree(root, dentry_tally_features, features);
-       for_dentry_in_tree(root, dentry_clear_inode_visited, NULL);
+       dentry_tree_clear_inode_visited(root);
 }
 
 static u32