From: Eric Biggers Date: Mon, 19 Aug 2013 04:07:14 +0000 (-0500) Subject: wimlib-imagex, wimlib: Optimize appending new image as update of existing X-Git-Tag: v1.5.0~31 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=41ae41d4dbe9a5c8413304b5e9ae02eec76d69b1 wimlib-imagex, wimlib: Optimize appending new image as update of existing --- diff --git a/debian/wimlib7.install b/debian/wimlib9.install similarity index 100% rename from debian/wimlib7.install rename to debian/wimlib9.install diff --git a/doc/imagex-capture.1.in b/doc/imagex-capture.1.in index 14522e7c..e12c8bfe 100644 --- a/doc/imagex-capture.1.in +++ b/doc/imagex-capture.1.in @@ -393,6 +393,22 @@ Ensure the resulting WIM is in the normal, non-pipable WIM format. This is the default for \fB@IMAGEX_PROGNAME@ capture\fR, except when writing to standard output (\fIWIMFILE\fR specified as "-"), and also for \fB@IMAGEX_PROGNAME@ append\fR, except when appending to a WIM that is already pipable. +.TP +\fB--delta-from\fR=\fIIMAGE\fR +Only for \fB@IMAGEX_PROGNAME@ append\fR: declares that the new image being added +from \fISOURCE\fR is mostly the same as the existing image \fIIMAGE\fR in +\fIWIMFILE\fR, but captured at a later point in time, possibly with some +modifications in the intervening time. This is designed to be used in +incremental backups of the same filesystem or directory tree. \fIIMAGE\fR can +be a 1-based index or name of an existing image in \fIWIMFILE\fR. It can also +be a negative integer to index backwards into the images (e.g. -1 means the +last existing image). +.IP "" +When this option is provided, the append of the new image will be optimized by +not reading files that, based on metadata such as timestamps, appear not to have +been modified since they were archived in the existing \fIIMAGE\fR. Barring +manipulation of timestamps, this option only affects performance and does not +change the resulting WIM file. .SH NOTES \fB@IMAGEX_PROGNAME@ append\fR does not support appending an image to a split WIM. .PP diff --git a/include/wimlib.h b/include/wimlib.h index f938add0..3d8b3177 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -2624,6 +2624,70 @@ wimlib_print_header(const WIMStruct *wim) _wimlib_deprecated; extern int wimlib_print_metadata(WIMStruct *wim, int image) _wimlib_deprecated; +/** + * Declares that a newly added image is mostly the same as a prior image, but + * captured at a later point in time, possibly with some modifications in the + * intervening time. This is designed to be used in incremental backups of the + * same filesystem or directory tree. + * + * This function compares the directory tree of the newly added image against + * that of the old image. Any files that are present in both the newly added + * image and the old image and have timestamps that indicate they haven't been + * modified are deemed not to have been modified. Such files will not be read + * from the filesystem when the WIM is being written or overwritten. Note that + * these unchanged files will still be "archived" and will be logically present + * in the new image; the optimization is that they don't need to actually be + * read from the filesystem because the WIM already contains them. + * + * This function is provided to optimize incremental backups. The resulting WIM + * file will still be the same regardless of whether this function is called. + * (This is, however, assuming that timestamps have not been manipulated or + * unmaintained as to trick this function into thinking a file has not been + * modified when really it has. To partly guard against such cases, other + * metadata such as file sizes will be checked as well.) + * + * This function must be called after adding the new image (e.g. with + * wimlib_add_image()), but before writing the updated WIM file (e.g. with + * wimlib_overwrite()). + * + * @p wim + * Pointer to the ::WIMStruct for a WIM. + * @p new_image + * 1-based index in the WIM of the newly added image. This image can have + * been added with wimlib_add_image() or wimlib_add_image_multisource(), or + * wimlib_add_empty_image() followed by wimlib_update_image(). + * @p template_image + * 1-based index in the WIM of a template image that reflects a prior state + * of the directory tree being captured. + * @p flags + * Reserved; must be 0. + * @p progress_func + * Currently ignored, but reserved for a function that will be called with + * information about the operation. Use NULL if no additional information + * is desired. + * + * @return 0 on success; nonzero on error. + * + * @retval ::WIMLIB_ERR_INVALID_IMAGE + * @p new_image and/or @p template_image were not a valid image indices in + * the WIM. + * @retval ::WIMLIB_ERR_NOMEM + * Failed to allocate needed memory. + * @retval ::WIMLIB_ERR_INVALID_PARAM + * @p new_image was equal to @p template_image, or @p new_image specified + * an image that had not been modified since opening the WIM. + * + * This function can additionally return ::WIMLIB_ERR_DECOMPRESSION, + * ::WIMLIB_ERR_INVALID_METADATA_RESOURCE, ::WIMLIB_ERR_NOMEM, + * ::WIMLIB_ERR_READ, or ::WIMLIB_ERR_UNEXPECTED_END_OF_FILE, all of which + * indicate failure (for different reasons) to read the metadata resource for + * the template image. + */ +extern int +wimlib_reference_template_image(WIMStruct *wim, int new_image, + int template_image, int flags, + wimlib_progress_func_t progress_func); + /** * Translates a string specifying the name or number of an image in the WIM into * the number of the image. The images are numbered starting at 1. diff --git a/include/wimlib/dentry.h b/include/wimlib/dentry.h index 05b7e493..2a40ef42 100644 --- a/include/wimlib/dentry.h +++ b/include/wimlib/dentry.h @@ -492,6 +492,9 @@ new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret); extern int new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret); +extern void +dentry_tree_clear_inode_visited(struct wim_dentry *root); + extern int new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret); diff --git a/programs/imagex.c b/programs/imagex.c index d6479b4f..be4b910e 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -125,6 +125,7 @@ enum { IMAGEX_COMPRESS_OPTION, IMAGEX_CONFIG_OPTION, IMAGEX_DEBUG_OPTION, + IMAGEX_DELTA_FROM_OPTION, IMAGEX_DEREFERENCE_OPTION, IMAGEX_DEST_DIR_OPTION, IMAGEX_EXTRACT_XML_OPTION, @@ -201,6 +202,7 @@ static const struct option capture_or_append_options[] = { {T("norpfix"), no_argument, NULL, IMAGEX_NORPFIX_OPTION}, {T("pipable"), no_argument, NULL, IMAGEX_PIPABLE_OPTION}, {T("not-pipable"), no_argument, NULL, IMAGEX_NOT_PIPABLE_OPTION}, + {T("delta-from"), required_argument, NULL, IMAGEX_DELTA_FROM_OPTION}, {NULL, 0, NULL, 0}, }; @@ -1625,6 +1627,8 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) const tchar *name; const tchar *desc; const tchar *flags_element = NULL; + const tchar *template_image_name_or_num = NULL; + int template_image; WIMStruct *wim; int ret; unsigned num_threads = 0; @@ -1706,6 +1710,14 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) case IMAGEX_NOT_PIPABLE_OPTION: write_flags |= WIMLIB_WRITE_FLAG_NOT_PIPABLE; break; + case IMAGEX_DELTA_FROM_OPTION: + if (cmd == CMD_CAPTURE) { + imagex_error(T("--delta-from=IMAGE is only " + "valid for append.")); + goto out_usage; + } + template_image_name_or_num = optarg; + break; default: goto out_usage; } @@ -1854,6 +1866,30 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) tsprintf(name_end, T(" (%lu)"), conflict_idx); } } + + if (template_image_name_or_num) { + template_image = wimlib_resolve_image(wim, template_image_name_or_num); + if (template_image_name_or_num[0] == T('-')) { + tchar *tmp; + unsigned long n; + struct wimlib_wim_info info; + + wimlib_get_wim_info(wim, &info); + n = tstrtoul(template_image_name_or_num + 1, &tmp, 10); + if (n >= 1 && n <= info.image_count && + *tmp == T('\0') && + tmp != template_image_name_or_num + 1) + { + template_image = info.image_count - (n - 1); + } + } + ret = verify_image_exists_and_is_single(template_image, + template_image_name_or_num, + wimfile); + if (ret) + goto out_wimlib_free; + } + ret = wimlib_add_image_multisource(wim, capture_sources, num_sources, @@ -1864,10 +1900,11 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) if (ret) goto out_wimlib_free; - if (desc || flags_element) { - /* User provided or element. Get the - * index of the image we just added, then use it to call the - * appropriate functions. */ + if (desc || flags_element || template_image_name_or_num) { + /* User provided or element, or an image + * on which the added one is to be based has been specified with + * --delta-from=IMAGE. Get the index of the image we just + * added, then use it to call the appropriate functions. */ struct wimlib_wim_info info; wimlib_get_wim_info(wim, &info); @@ -1886,6 +1923,15 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) if (ret) goto out_wimlib_free; } + + if (template_image_name_or_num) { + ret = wimlib_reference_template_image(wim, + info.image_count, + template_image, + 0, NULL); + if (ret) + goto out_wimlib_free; + } } /* Write the new WIM or overwrite the existing WIM with the new image @@ -3443,7 +3489,7 @@ T( " [--dereference] [--config=FILE] [--threads=NUM_THREADS]\n" " [--rebuild] [--unix-data] [--source-list] [--no-acls]\n" " [--strict-acls] [--rpfix] [--norpfix] [--pipable]\n" -" [--not-pipable]\n" +" [--not-pipable] [--delta-from=IMAGE]\n" ), [CMD_APPLY] = T( @@ -3460,8 +3506,7 @@ T( " [--nocheck] [--compress=TYPE] [--flags EDITION_ID]\n" " [--verbose] [--dereference] [--config=FILE]\n" " [--threads=NUM_THREADS] [--unix-data] [--source-list]\n" -" [--no-acls] [--strict-acls] [--rpfix] [--norpfix]\n" -" [--pipable] [--not-pipable]\n" +" [--no-acls] [--strict-acls] [--norpfix] [--pipable]\n" ), [CMD_DELETE] = T( @@ -3536,24 +3581,25 @@ T( }; static const tchar *invocation_name; -static bool using_cmd_from_invocation_name = false; +static int invocation_cmd = CMD_NONE; static const tchar *get_cmd_string(int cmd, bool nospace) { - - if (using_cmd_from_invocation_name || cmd == CMD_NONE) { - return invocation_name; + static tchar buf[50]; + if (cmd == CMD_NONE) { + tsprintf(buf, T("%"TS), T(IMAGEX_PROGNAME)); + } else if (invocation_cmd != CMD_NONE) { + tsprintf(buf, T("wim%"TS), imagex_commands[cmd].name); } else { const tchar *format; - static tchar buf[50]; if (nospace) format = T("%"TS"-%"TS""); else format = T("%"TS" %"TS""); tsprintf(buf, format, invocation_name, imagex_commands[cmd].name); - return buf; } + return buf; } static void @@ -3694,7 +3740,7 @@ main(int argc, char **argv) if (!tstrcmp(invocation_name + 3, imagex_commands[i].name)) { - using_cmd_from_invocation_name = true; + invocation_cmd = i; cmd = i; break; } diff --git a/src/dentry.c b/src/dentry.c index c9a84d71..ca9fe0b1 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -1086,6 +1086,19 @@ new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret) return 0; } +static int +dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore) +{ + dentry->d_inode->i_visited = 0; + return 0; +} + +void +dentry_tree_clear_inode_visited(struct wim_dentry *root) +{ + for_dentry_in_tree(root, dentry_clear_inode_visited, NULL); +} + static int init_ads_entry(struct wim_ads_entry *ads_entry, const void *name, size_t name_nbytes, bool is_utf16le) @@ -2486,3 +2499,125 @@ wimlib_iterate_dir_tree(WIMStruct *wim, int image, const tchar *path, wim->private = &ctx; return for_image(wim, image, image_do_iterate_dir_tree); } + +static bool +inode_stream_sizes_consistent(const struct wim_inode *inode_1, + const struct wim_inode *inode_2, + const struct wim_lookup_table *lookup_table) +{ + if (inode_1->i_num_ads != inode_2->i_num_ads) + return false; + for (unsigned i = 0; i <= inode_1->i_num_ads; i++) { + const struct wim_lookup_table_entry *lte_1, *lte_2; + + lte_1 = inode_stream_lte(inode_1, i, lookup_table); + lte_2 = inode_stream_lte(inode_2, i, lookup_table); + if (lte_1 && lte_2) { + if (wim_resource_size(lte_1) != wim_resource_size(lte_2)) + return false; + } else if (lte_1 && wim_resource_size(lte_1)) { + return false; + } else if (lte_2 && wim_resource_size(lte_2)) { + return false; + } + } + return true; +} + +static void +inode_replace_ltes(struct wim_inode *inode, + struct wim_inode *template_inode, + struct wim_lookup_table *lookup_table) +{ + for (unsigned i = 0; i <= inode->i_num_ads; i++) { + struct wim_lookup_table_entry *lte, *lte_template; + + lte = inode_stream_lte(inode, i, lookup_table); + if (lte) { + for (unsigned j = 0; j < inode->i_nlink; j++) + lte_decrement_refcnt(lte, lookup_table); + lte_template = inode_stream_lte(template_inode, i, + lookup_table); + if (i == 0) + inode->i_lte = lte_template; + else + inode->i_ads_entries[i - 1].lte = lte_template; + if (lte_template) + lte_template->refcnt += inode->i_nlink; + } + } + inode->i_resolved = 1; +} + +static int +dentry_reference_template(struct wim_dentry *dentry, void *_wim) +{ + int ret; + struct wim_dentry *template_dentry; + struct wim_inode *inode, *template_inode; + WIMStruct *wim = _wim; + + if (dentry->d_inode->i_visited) + return 0; + + ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; + + template_dentry = get_dentry(wim, dentry->_full_path); + if (!template_dentry) { + DEBUG("\"%"TS"\": newly added file", dentry->_full_path); + return 0; + } + + inode = dentry->d_inode; + template_inode = template_dentry->d_inode; + + if (inode->i_last_write_time == template_inode->i_last_write_time + && inode->i_creation_time == template_inode->i_creation_time + && inode->i_last_access_time >= template_inode->i_last_access_time + && inode_stream_sizes_consistent(inode, template_inode, + wim->lookup_table)) + { + /*DEBUG("\"%"TS"\": No change detected", dentry->_full_path);*/ + inode_replace_ltes(inode, template_inode, wim->lookup_table); + inode->i_visited = 1; + } else { + DEBUG("\"%"TS"\": change detected!", dentry->_full_path); + } + return 0; +} + +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_reference_template_image(WIMStruct *wim, int new_image, int template_image, + int flags, wimlib_progress_func_t progress_func) +{ + int ret; + struct wim_image_metadata *new_imd; + + if (wim->hdr.part_number != 1) + return WIMLIB_ERR_SPLIT_UNSUPPORTED; + + if (new_image < 1 || new_image > wim->hdr.image_count) + return WIMLIB_ERR_INVALID_IMAGE; + + if (template_image < 1 || template_image > wim->hdr.image_count) + return WIMLIB_ERR_INVALID_IMAGE; + + if (new_image == template_image) + return WIMLIB_ERR_INVALID_PARAM; + + new_imd = wim->image_metadata[new_image - 1]; + if (!new_imd->modified) + return WIMLIB_ERR_INVALID_PARAM; + + ret = select_wim_image(wim, template_image); + if (ret) + return ret; + + ret = for_dentry_in_tree(new_imd->root_dentry, + dentry_reference_template, wim); + dentry_tree_clear_inode_visited(new_imd->root_dentry); + return ret; +} diff --git a/src/extract.c b/src/extract.c index 1784cdf5..c04f11b6 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1802,20 +1802,13 @@ dentry_tally_features(struct wim_dentry *dentry, void *_features) return 0; } -static int -dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore) -{ - dentry->d_inode->i_visited = 0; - return 0; -} - /* Tally the features necessary to extract a dentry tree. */ static void dentry_tree_get_features(struct wim_dentry *root, struct wim_features *features) { memset(features, 0, sizeof(struct wim_features)); for_dentry_in_tree(root, dentry_tally_features, features); - for_dentry_in_tree(root, dentry_clear_inode_visited, NULL); + dentry_tree_clear_inode_visited(root); } static u32