From 465a630d28e93b09e55ca07b1a6cae8def3b42f5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 20 Aug 2013 16:26:16 -0500 Subject: [PATCH] "delta" WIM support --- NEWS | 11 +- doc/imagex-capture.1.in | 76 +++++- include/wimlib.h | 97 +++---- include/wimlib/lookup_table.h | 26 +- include/wimlib/wim.h | 8 +- include/wimlib/write.h | 5 +- programs/imagex.c | 169 ++++++++++--- src/dentry.c | 189 ++++++++++---- src/export_image.c | 4 +- src/extract.c | 2 +- src/join.c | 8 +- src/lookup_table.c | 173 ++++++------- src/mount_image.c | 35 +-- src/ntfs-3g_capture.c | 5 +- src/resource.c | 2 +- src/wim.c | 28 +- src/write.c | 463 +++++++++++++++++++--------------- 17 files changed, 800 insertions(+), 501 deletions(-) diff --git a/NEWS b/NEWS index 783e0256..13cf485c 100644 --- a/NEWS +++ b/NEWS @@ -8,13 +8,18 @@ Version 1.5.0: more information. To better support incremental backups, added support for declaring an - image as a modified form of a prior image. See the documentation for - the '--delta-from' option of `wimlib-imagex append'. + image as a modified form of another image. See the documentation for + the '--as-update-of' option of `wimlib-imagex append' and `wimlib-imagex + capture'. + + Added supported for "delta" WIMs. See the documentation for the + '--as-delta-from' option of `wimlib-imagex capture'. The library support for managing split WIMs has been changed to support other arrangements, such as delta WIMs, and be easier to use. This change is visible in `wimlib-imagex', which also can now accept the - '--ref' option multiple times. + '--ref' option multiple times, and also now supports "delta" WIMs as + mentioned above. wimlib now preserves WIM integrity tables by default, even if WIMLIB_WRITE_FLAG_CHECK_INTEGRITY is not specified. This changes the diff --git a/doc/imagex-capture.1.in b/doc/imagex-capture.1.in index e12c8bfe..42719119 100644 --- a/doc/imagex-capture.1.in +++ b/doc/imagex-capture.1.in @@ -394,21 +394,69 @@ default for \fB@IMAGEX_PROGNAME@ capture\fR, except when writing to standard output (\fIWIMFILE\fR specified as "-"), and also for \fB@IMAGEX_PROGNAME@ append\fR, except when appending to a WIM that is already pipable. .TP -\fB--delta-from\fR=\fIIMAGE\fR -Only for \fB@IMAGEX_PROGNAME@ append\fR: declares that the new image being added -from \fISOURCE\fR is mostly the same as the existing image \fIIMAGE\fR in -\fIWIMFILE\fR, but captured at a later point in time, possibly with some -modifications in the intervening time. This is designed to be used in -incremental backups of the same filesystem or directory tree. \fIIMAGE\fR can -be a 1-based index or name of an existing image in \fIWIMFILE\fR. It can also -be a negative integer to index backwards into the images (e.g. -1 means the -last existing image). +\fB--as-update-of\fR=[\fIWIMFILE\fR]:\fIIMAGE\fR +Declares that the image being captured from \fISOURCE\fR is mostly the same as +the existing image \fIIMAGE\fR in \fIWIMFILE\fR, but captured at a later point +in time, possibly with some modifications in the intervening time. This is +designed to be used in incremental backups of the same filesystem or directory +tree. \fIIMAGE\fR can be a 1-based index or name of an existing image in +\fIWIMFILE\fR. It can also be a negative integer to index backwards into the +images (e.g. -1 means the last existing image). .IP "" -When this option is provided, the append of the new image will be optimized by -not reading files that, based on metadata such as timestamps, appear not to have -been modified since they were archived in the existing \fIIMAGE\fR. Barring -manipulation of timestamps, this option only affects performance and does not -change the resulting WIM file. +When this option is provided, the capture or append of the new image will be +optimized by not reading files that, based on metadata such as timestamps, +appear not to have been modified since they were archived in the existing +\fIIMAGE\fR. Barring manipulation of timestamps, this option only affects +performance and does not change the resulting WIM file. +.IP "" +As shown, the full syntax for the argument to this option is to specify the WIM +file, a colon, and the image; for example, "--as-update-of mywim.wim:1". +However, the WIM file may be omitted, in which case it will default to the WIM +file being appended to for append operations, or the WIM file from which a delta +is being taken (with \fB--as-delta-from\fR, if specified) for capture operations. +.TP +\fB--as-delta-from\fR=\fIWIMFILE\fR +For \fB@IMAGEX_PROGNAME@ capture\fR only: capture the new WIM as a "delta" from +\fIWIMFILE\fR. Any streams that would ordinarily need to be archived in the new +WIM are omitted if they are already present in the \fIWIMFILE\fR on which the +delta is being based. The new WIM will still contain a full copy of the image +metadata, but this is typically only a small fraction of a WIM's total size. +.IP "" +To operate on the resulting delta WIM using other commands such as +\fB@IMAGEX_PROGNAME@ apply\fR, you must specify the delta WIM as the WIM file to +operate on, but also reference the base WIM using the \fB--ref\fR option. +Beware to retain the proper functioning of the delta WIM, you can only add, not +delete, files and images to the base WIM following the capture of a delta from +it. +.IP "" +\fB--as-delta-from\fR may be combined with \fB--as-update-of\fR to increase the +speed of capturing a delta WIM. +.IP "" +As an example, consider the following backup and restore sequence: +.IP "" +.RS +.nf +(initial backup) + +$ wimcapture /some/directory bkup-base.wim + +(some days later, create second backup as delta from first) + +$ wimcapture /some/directory bkup-2013-08-20.dwm \\ + --as-update-of=winbkup.wim:-1 --as-delta-from=winbkup.wim + +(restoring the second backup) + +$ wimapply bkup-2013-08-20.dwm --ref=bkup-base.wim 1 \\ + /some/directory +.RE +.fi +.IP "" +However, note that as an alternative to the above sequence that used a delta +WIM, the second backup could have simply been appended to the WIM as new image +using \fB@IMAGEX_PROGNAME@ append\fR. Delta WIMs should be used only if it's +desired to base the backups or images on a separate, large file that is rarely +modified. .SH NOTES \fB@IMAGEX_PROGNAME@ append\fR does not support appending an image to a split WIM. .PP diff --git a/include/wimlib.h b/include/wimlib.h index 197f9b65..39538182 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -1319,6 +1319,22 @@ typedef int (*wimlib_iterate_lookup_table_callback_t)(const struct wimlib_resour * set the readonly flag on the on-disk WIM file. */ #define WIMLIB_WRITE_FLAG_IGNORE_READONLY_FLAG 0x00000100 +/** Do not include non-metadata resources already present in other WIMs. This + * flag can be used to write a "delta" WIM after resources from the WIM on which + * the delta is to be based were referenced with + * wimlib_reference_resource_files() or wimlib_reference_resources(). */ +#define WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS 0x00000200 + +/** Asserts that for writes of all WIM images, all streams needed for the WIM + * are already present (not in external resource WIMs) and their reference + * counts are correct, so the code does not need to recalculate which streams + * are referenced. This is for optimization purposes only, since with this flag + * specified, the metadata resources may not need to be decompressed and parsed. + * + * This flag can be passed to wimlib_write() and wimlib_write_to_fd(), but is + * already implied for wimlib_overwrite(). */ +#define WIMLIB_WRITE_FLAG_STREAMS_OK 0x00000400 + /** * @name Init flags * @@ -1787,9 +1803,10 @@ wimlib_delete_image(WIMStruct *wim, int image); * @retval ::WIMLIB_ERR_RESOURCE_NOT_FOUND * A resource that needed to be exported could not be found in either the * source or destination WIMs. This error can occur if, for example, @p - * src_wim is part of a split WIM but resources from the other split WIM - * parts were not referenced with wimlib_reference_resources() or - * wimlib_reference_resource_files(). + * src_wim is part of a split WIM but needed resources from the other split + * WIM parts were not referenced with wimlib_reference_resources() or + * wimlib_reference_resource_files() before the call to + * wimlib_export_image(). * @retval ::WIMLIB_ERR_WIM_IS_READONLY * @p dest_wim is considered read-only because of any of the reasons * mentioned in the documentation for the ::WIMLIB_OPEN_FLAG_WRITE_ACCESS @@ -2594,8 +2611,9 @@ wimlib_open_wim(const wimlib_tchar *wim_file, * If non-NULL, a function that will be called periodically with the * progress of the current operation. * - * @return 0 on success; nonzero on error. This function may return any value - * returned by wimlib_write() as well as the following error codes: + * @return 0 on success; nonzero on error. This function may return most error + * codes returned by wimlib_write() as well as the following error codes: + * * @retval ::WIMLIB_ERR_ALREADY_LOCKED * The WIM was going to be modified in-place (with no temporary file), but * an exclusive advisory lock on the on-disk WIM file could not be acquired @@ -2717,18 +2735,16 @@ wimlib_reference_resource_files(WIMStruct *wim, * @param ref_flags * Currently ignored (set to 0). * - * @return 0 on success; nonzero on error. On success, the ::WIMStruct's - * specified in @p resource_wims should be considered "don't touch" until either - * wimlib_free() is called on @p wim, or wimlib_unreference_resources() is - * called to unreference them. + * @return 0 on success; nonzero on error. On success, the ::WIMStruct's of the + * @p resource_wims are referenced internally by @p wim and must not be freed + * with wimlib_free() or overwritten with wimlib_overwrite() until @p wim has + * been freed with wimlib_free(), or immediately before freeing @p wim with + * wimlib_free(). * * @retval ::WIMLIB_ERR_INVALID_PARAM * @p wim was @c NULL, or @p num_resource_wims was nonzero but @p * resource_wims was @c NULL, or @p wim did not contain metadata resources, - * or an entry in @p resource_wims was @p NULL, or an entry in @p - * resource_wims was already referenced by a call to this function without - * a corresponding call to wimlib_free() on the metadata WIM, or - * wimlib_unreference_resources(). + * or an entry in @p resource_wims was @p NULL. */ extern int wimlib_reference_resources(WIMStruct *wim, WIMStruct **resource_wims, @@ -2740,14 +2756,16 @@ wimlib_reference_resources(WIMStruct *wim, WIMStruct **resource_wims, * intervening time. This is designed to be used in incremental backups of the * same filesystem or directory tree. * - * This function compares the directory tree of the newly added image against - * that of the old image. Any files that are present in both the newly added - * image and the old image and have timestamps that indicate they haven't been - * modified are deemed not to have been modified. Such files will not be read - * from the filesystem when the WIM is being written or overwritten. Note that - * these unchanged files will still be "archived" and will be logically present - * in the new image; the optimization is that they don't need to actually be - * read from the filesystem because the WIM already contains them. + * This function compares the metadata of the directory tree of the newly added + * image against that of the old image. Any files that are present in both the + * newly added image and the old image and have timestamps that indicate they + * haven't been modified are deemed not to have been modified and have their + * SHA1 message digest copied from the old image. Because of this and because + * WIM uses single-instance streams, such files need not be read from the + * filesystem when the WIM is being written or overwritten. Note that these + * unchanged files will still be "archived" and will be logically present in the + * new image; the optimization is that they don't need to actually be read from + * the filesystem because the WIM already contains them. * * This function is provided to optimize incremental backups. The resulting WIM * file will still be the same regardless of whether this function is called. @@ -2766,6 +2784,9 @@ wimlib_reference_resources(WIMStruct *wim, WIMStruct **resource_wims, * 1-based index in the WIM of the newly added image. This image can have * been added with wimlib_add_image() or wimlib_add_image_multisource(), or * wimlib_add_empty_image() followed by wimlib_update_image(). + * @param template_wim + * The ::WIMStruct for the WIM containing the template image. This can be + * the same as @p wim, or it can be a different ::WIMStruct. * @param template_image * 1-based index in the WIM of a template image that reflects a prior state * of the directory tree being captured. @@ -2799,8 +2820,8 @@ wimlib_reference_resources(WIMStruct *wim, WIMStruct **resource_wims, */ extern int wimlib_reference_template_image(WIMStruct *wim, int new_image, - int template_image, int flags, - wimlib_progress_func_t progress_func); + WIMStruct *template_wim, int template_image, + int flags, wimlib_progress_func_t progress_func); /** * Translates a string specifying the name or number of an image in the WIM into @@ -3113,30 +3134,6 @@ wimlib_unmount_image(const wimlib_tchar *dir, int unmount_flags, wimlib_progress_func_t progress_func); -/** - * Unreferences resources previously referenced with - * wimlib_reference_resources(). - * - * Calling this is not necessary (or even possible) if the higher-level function - * wimlib_reference_resource_files() is used. - * - * @param wim - * See corresponding parameter to wimlib_reference_resources(). - * @param resource_wims - * See corresponding parameter to wimlib_reference_resources(). - * @param num_resource_wims - * See corresponding parameter to wimlib_reference_resources(). - * - * @return 0 on success; nonzero on error. - * - * @retval ::WIMLIB_ERR_INVALID_PARAM - * Not all entries in @p resource_wims specify valid ::WIMStruct's that are - * referenced by @p wim. - */ -extern int -wimlib_unreference_resources(WIMStruct *wim, WIMStruct **resource_wims, - unsigned num_resource_wims); - /** * Update a WIM image by adding, deleting, and/or renaming files or directories. * @@ -3302,6 +3299,12 @@ wimlib_update_image(WIMStruct *wim, * with @p wim, or some file resources in @p wim refer to files in the * outside filesystem, and a read error occurred when reading one of these * files. + * @retval ::WIMLIB_ERR_RESOURCE_NOT_FOUND + * A stream that needed to be written could not be found in the stream + * lookup table of @p wim. This error can occur if, for example, @p wim is + * part of a split WIM but needed resources from the other split WIM parts + * were not referenced with wimlib_reference_resources() or + * wimlib_reference_resource_files() before the call to wimlib_write(). * @retval ::WIMLIB_ERR_WRITE * An error occurred when trying to write data to the new WIM file. * diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h index 746d7449..93616348 100644 --- a/include/wimlib/lookup_table.h +++ b/include/wimlib/lookup_table.h @@ -163,6 +163,12 @@ struct wim_lookup_table_entry { * resource is read again. */ u16 dont_check_metadata_hash : 1; + /* Only used during WIM write. Normal value is 0 (resource not + * filtered). */ + u16 filtered : 2; +#define FILTERED_SAME_WIM 0x1 /* Resource already in same WIM */ +#define FILTERED_EXTERNAL_WIM 0x2 /* Resource already in external WIM */ + /* (On-disk field) * Number of times this lookup table entry is referenced by dentries. * Unfortunately, this field is not always set correctly in Microsoft's @@ -298,12 +304,6 @@ wim_resource_chunks(const struct wim_lookup_table_entry *lte) return DIV_ROUND_UP(wim_resource_size(lte), WIM_CHUNK_SIZE); } -static inline u64 -wim_resource_compressed_size(const struct wim_lookup_table_entry *lte) -{ - return lte->resource_entry.size; -} - static inline int wim_resource_compression_type(const struct wim_lookup_table_entry *lte) { @@ -385,12 +385,14 @@ for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, void *arg); extern struct wim_lookup_table_entry * -__lookup_resource(const struct wim_lookup_table *table, const u8 hash[]); +lookup_resource(const struct wim_lookup_table *table, const u8 hash[]); extern int -lookup_resource(WIMStruct *wim, const tchar *path, - int lookup_flags, struct wim_dentry **dentry_ret, - struct wim_lookup_table_entry **lte_ret, u16 *stream_idx_ret); +wim_pathname_to_stream(WIMStruct *wim, const tchar *path, + int lookup_flags, + struct wim_dentry **dentry_ret, + struct wim_lookup_table_entry **lte_ret, + u16 *stream_idx_ret); extern void lte_decrement_refcnt(struct wim_lookup_table_entry *lte, @@ -442,9 +444,9 @@ inode_stream_lte_unresolved(const struct wim_inode *inode, unsigned stream_idx, if (!table) return NULL; if (stream_idx == 0) - return __lookup_resource(table, inode->i_hash); + return lookup_resource(table, inode->i_hash); else - return __lookup_resource(table, + return lookup_resource(table, inode->i_ads_entries[ stream_idx - 1].hash); } diff --git a/include/wimlib/wim.h b/include/wimlib/wim.h index fc36c519..5a04756b 100644 --- a/include/wimlib/wim.h +++ b/include/wimlib/wim.h @@ -42,11 +42,9 @@ struct WIMStruct { /* Temporary field */ void *private; - WIMStruct *master_wim; + struct list_head subwims; - struct list_head resource_wims; - - struct list_head resource_wim_node; + struct list_head subwim_node; /* The currently selected image, indexed starting at 1. If not 0, * subtract 1 from this to get the index of the current image in the @@ -67,7 +65,7 @@ struct WIMStruct { u8 being_unmerged : 1; - u8 is_owned_by_master : 1; + u8 guid_set_explicitly : 1; /* One of WIMLIB_COMPRESSION_TYPE_*, cached from the header flags. */ u8 compression_type : 2; diff --git a/include/wimlib/write.h b/include/wimlib/write.h index 7a7bae61..eb73bed3 100644 --- a/include/wimlib/write.h +++ b/include/wimlib/write.h @@ -11,8 +11,9 @@ #define WIMLIB_WRITE_FLAG_HEADER_AT_END 0x10000000 #define WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR 0x08000000 #define WIMLIB_WRITE_FLAG_USE_EXISTING_TOTALBYTES 0x04000000 -#define WIMLIB_WRITE_FLAG_NO_METADATA 0x02000000 -#define WIMLIB_WRITE_MASK_PUBLIC 0x01ffffff +#define WIMLIB_WRITE_FLAG_NO_METADATA 0x02000000 +#define WIMLIB_WRITE_FLAG_OVERWRITE 0x01000000 +#define WIMLIB_WRITE_MASK_PUBLIC 0x00ffffff #if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK) extern int diff --git a/programs/imagex.c b/programs/imagex.c index e3800ca3..031f555b 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -118,6 +118,8 @@ static FILE *imagex_info_file; enum { IMAGEX_ALLOW_OTHER_OPTION, + IMAGEX_AS_DELTA_FROM_OPTION, + IMAGEX_AS_UPDATE_OF_OPTION, IMAGEX_BOOT_OPTION, IMAGEX_CHECK_OPTION, IMAGEX_COMMAND_OPTION, @@ -125,7 +127,6 @@ enum { IMAGEX_COMPRESS_OPTION, IMAGEX_CONFIG_OPTION, IMAGEX_DEBUG_OPTION, - IMAGEX_DELTA_FROM_OPTION, IMAGEX_DEREFERENCE_OPTION, IMAGEX_DEST_DIR_OPTION, IMAGEX_EXTRACT_XML_OPTION, @@ -202,7 +203,9 @@ static const struct option capture_or_append_options[] = { {T("norpfix"), no_argument, NULL, IMAGEX_NORPFIX_OPTION}, {T("pipable"), no_argument, NULL, IMAGEX_PIPABLE_OPTION}, {T("not-pipable"), no_argument, NULL, IMAGEX_NOT_PIPABLE_OPTION}, - {T("delta-from"), required_argument, NULL, IMAGEX_DELTA_FROM_OPTION}, + {T("as-update-of"), required_argument, NULL, IMAGEX_AS_UPDATE_OF_OPTION}, + {T("as-update-from"), required_argument, NULL, IMAGEX_AS_UPDATE_OF_OPTION}, + {T("as-delta-from"), required_argument, NULL, IMAGEX_AS_DELTA_FROM_OPTION}, {NULL, 0, NULL, 0}, }; @@ -1451,13 +1454,13 @@ imagex_apply(int argc, tchar **argv, int cmd) { int c; int open_flags = WIMLIB_OPEN_FLAG_SPLIT_OK; - int image; + int image = WIMLIB_NO_IMAGE; WIMStruct *wim; struct wimlib_wim_info info; int ret; const tchar *wimfile; const tchar *target; - const tchar *image_num_or_name; + const tchar *image_num_or_name = NULL; int extract_flags = WIMLIB_EXTRACT_FLAG_SEQUENTIAL; REFGLOB_SET(refglobs); @@ -1639,9 +1642,15 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) const tchar *name; const tchar *desc; const tchar *flags_element = NULL; - const tchar *template_image_name_or_num = NULL; - int template_image; + WIMStruct *wim; + WIMStruct *base_wim; + const tchar *base_wimfile = NULL; + WIMStruct *template_wim; + const tchar *template_wimfile = NULL; + const tchar *template_image_name_or_num = NULL; + int template_image = WIMLIB_NO_IMAGE; + int ret; unsigned num_threads = 0; @@ -1653,7 +1662,7 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) struct wimlib_capture_config *config; bool source_list = false; - size_t source_list_nchars; + size_t source_list_nchars = 0; tchar *source_list_contents; bool capture_sources_malloced; struct wimlib_capture_source *capture_sources; @@ -1722,13 +1731,38 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) case IMAGEX_NOT_PIPABLE_OPTION: write_flags |= WIMLIB_WRITE_FLAG_NOT_PIPABLE; break; - case IMAGEX_DELTA_FROM_OPTION: - if (cmd == CMD_CAPTURE) { - imagex_error(T("--delta-from=IMAGE is only " - "valid for append.")); + case IMAGEX_AS_UPDATE_OF_OPTION: + if (template_image_name_or_num) { + imagex_error(T("'--as-update-of' can only be " + "specified one time!")); + goto out_err; + } else { + tchar *colon; + colon = tstrrchr(optarg, T(':')); + + if (colon) { + template_wimfile = optarg; + *colon = T('\0'); + template_image_name_or_num = colon + 1; + } else { + template_wimfile = NULL; + template_image_name_or_num = optarg; + } + } + break; + case IMAGEX_AS_DELTA_FROM_OPTION: + if (cmd != CMD_CAPTURE) { + imagex_error(T("'--as-delta-from' is only " + "valid for capture!")); goto out_usage; } - template_image_name_or_num = optarg; + if (base_wimfile) { + imagex_error(T("'--as-delta-from' can only be " + "specified one time!")); + goto out_err; + } + base_wimfile = optarg; + write_flags |= WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS; break; default: goto out_usage; @@ -1767,6 +1801,26 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) set_fd_to_binary_mode(wim_fd); } + /* If template image was specified using --as-update-of=IMAGE rather + * than --as-update-of=WIMFILE:IMAGE, set the default WIMFILE. */ + if (template_image_name_or_num && !template_wimfile) { + if (base_wimfile) { + /* Capturing delta WIM: default to base WIM. */ + template_wimfile = base_wimfile; + } else if (cmd == CMD_APPEND) { + /* Appending to WIM: default to WIM being appended to. + */ + template_wimfile = wimfile; + } else { + /* Capturing a normal (non-delta) WIM, so the WIM file + * *must* be explicitly specified. */ + imagex_error(T("For capture of non-delta WIM, " + "'--as-update-of' must specify " + "WIMFILE:IMAGE!")); + goto out_usage; + } + } + if (argc >= 3) { name = argv[2]; name_defaulted = false; @@ -1808,8 +1862,7 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) } capture_sources_malloced = true; } else { - /* Set up capture source in non-source-list mode (could be - * either "normal" mode or "NTFS mode"--- see the man page). */ + /* Set up capture source in non-source-list mode. */ capture_sources = alloca(sizeof(struct wimlib_capture_source)); capture_sources[0].fs_source_path = source; capture_sources[0].wim_target_path = NULL; @@ -1820,6 +1873,7 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) } if (config_file) { + /* Read and parse capture configuration file. */ size_t config_len; config_str = file_get_text_contents(config_file, &config_len); @@ -1833,9 +1887,13 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) if (ret) goto out_free_config; } else { + /* No capture configuration file specified; use default + * configuration for capturing Windows operating systems. */ config = NULL; + add_image_flags |= WIMLIB_ADD_FLAG_WINCONFIG; } + /* Open the existing WIM, or create a new one. */ if (cmd == CMD_APPEND) ret = wimlib_open_wim(wimfile, open_flags, &wim, imagex_progress_func); @@ -1845,6 +1903,8 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) goto out_free_config; #ifndef __WIN32__ + /* Detect if source is regular file or block device and set NTFS volume + * capture mode. */ if (!source_list) { struct stat stbuf; @@ -1859,16 +1919,16 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) imagex_error_with_errno(T("Failed to stat " "\"%"TS"\""), source); ret = -1; - goto out_wimlib_free; + goto out_free_wim; } } } #endif + /* If the user did not specify an image name, and the basename of the + * source already exists as an image name in the WIM file, append a + * suffix to make it unique. */ if (cmd == CMD_APPEND && name_defaulted) { - /* If the user did not specify an image name, and the basename - * of the source already exists as an image name in the WIM - * file, append a suffix to make it unique. */ unsigned long conflict_idx; tchar *name_end = tstrchr(name, T('\0')); for (conflict_idx = 1; @@ -1879,8 +1939,43 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) } } + /* If capturing a delta WIM, reference resources from the base WIM + * before adding the new image. */ + if (base_wimfile) { + ret = wimlib_open_wim(base_wimfile, open_flags, + &base_wim, imagex_progress_func); + if (ret) + goto out_free_wim; + + imagex_printf(T("Capturing delta WIM based on \"%"TS"\"\n"), + base_wimfile); + + ret = wimlib_reference_resources(wim, &base_wim, 1, 0); + if (ret) + goto out_free_base_wim; + } else { + base_wim = NULL; + } + + /* If capturing or appending as an update of an existing (template) image, + * open the WIM if needed and parse the image index. */ if (template_image_name_or_num) { - template_image = wimlib_resolve_image(wim, template_image_name_or_num); + + + if (template_wimfile == base_wimfile) { + template_wim = base_wim; + } else if (template_wimfile == wimfile) { + template_wim = wim; + } else { + ret = wimlib_open_wim(template_wimfile, open_flags, + &template_wim, imagex_progress_func); + if (ret) + goto out_free_base_wim; + } + + template_image = wimlib_resolve_image(template_wim, + template_image_name_or_num); + if (template_image_name_or_num[0] == T('-')) { tchar *tmp; unsigned long n; @@ -1897,9 +1992,11 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) } ret = verify_image_exists_and_is_single(template_image, template_image_name_or_num, - wimfile); + template_wimfile); if (ret) - goto out_wimlib_free; + goto out_free_template_wim; + } else { + template_wim = NULL; } ret = wimlib_add_image_multisource(wim, @@ -1910,12 +2007,12 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) add_image_flags, imagex_progress_func); if (ret) - goto out_wimlib_free; + goto out_free_template_wim; if (desc || flags_element || template_image_name_or_num) { /* User provided or element, or an image * on which the added one is to be based has been specified with - * --delta-from=IMAGE. Get the index of the image we just + * --as-update-of. Get the index of the image we just * added, then use it to call the appropriate functions. */ struct wimlib_wim_info info; @@ -1926,23 +2023,28 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) info.image_count, desc); if (ret) - goto out_wimlib_free; + goto out_free_template_wim; } if (flags_element) { ret = wimlib_set_image_flags(wim, info.image_count, flags_element); if (ret) - goto out_wimlib_free; + goto out_free_template_wim; } + /* Reference template image if the user provided one. */ if (template_image_name_or_num) { + imagex_printf(T("Using image %d " + "from \"%"TS"\" as template\n"), + template_image, template_wimfile); ret = wimlib_reference_template_image(wim, info.image_count, + template_wim, template_image, 0, NULL); if (ret) - goto out_wimlib_free; + goto out_free_template_wim; } } @@ -1960,7 +2062,13 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd) write_flags, num_threads, imagex_progress_func); } -out_wimlib_free: +out_free_template_wim: + /* template_wim may alias base_wim or wim. */ + if (template_wim != base_wim && template_wim != wim) + wimlib_free(template_wim); +out_free_base_wim: + wimlib_free(base_wim); +out_free_wim: wimlib_free(wim); out_free_config: if (config) { @@ -3255,8 +3363,7 @@ imagex_update(int argc, tchar **argv, int cmd) int open_flags = WIMLIB_OPEN_FLAG_WRITE_ACCESS; int write_flags = 0; int update_flags = WIMLIB_UPDATE_FLAG_SEND_PROGRESS; - int default_add_flags = WIMLIB_ADD_FLAG_EXCLUDE_VERBOSE | - WIMLIB_ADD_FLAG_WINCONFIG; + int default_add_flags = WIMLIB_ADD_FLAG_EXCLUDE_VERBOSE; int default_delete_flags = 0; unsigned num_threads = 0; int c; @@ -3382,6 +3489,7 @@ imagex_update(int argc, tchar **argv, int cmd) goto out_free_config; } else { config = NULL; + default_add_flags |= WIMLIB_ADD_FLAG_WINCONFIG; } /* Read update commands from standard input, or the command string if @@ -3495,7 +3603,7 @@ T( " [--dereference] [--config=FILE] [--threads=NUM_THREADS]\n" " [--rebuild] [--unix-data] [--source-list] [--no-acls]\n" " [--strict-acls] [--rpfix] [--norpfix] [--pipable]\n" -" [--not-pipable] [--delta-from=IMAGE]\n" +" [--not-pipable] [--as-update-of=[WIMFILE:]IMAGE]\n" ), [CMD_APPLY] = T( @@ -3513,6 +3621,7 @@ T( " [--verbose] [--dereference] [--config=FILE]\n" " [--threads=NUM_THREADS] [--unix-data] [--source-list]\n" " [--no-acls] [--strict-acls] [--norpfix] [--pipable]\n" +" [--as-update-of=[WIMFILE:]IMAGE] [--as-delta-from=WIMFILE]\n" ), [CMD_DELETE] = T( diff --git a/src/dentry.c b/src/dentry.c index a73828ba..7caab57f 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -1459,7 +1459,7 @@ add_stream_from_data_buffer(const void *buffer, size_t size, struct wim_lookup_table_entry *lte, *existing_lte; sha1_buffer(buffer, size, hash); - existing_lte = __lookup_resource(lookup_table, hash); + existing_lte = lookup_resource(lookup_table, hash); if (existing_lte) { wimlib_assert(wim_resource_size(existing_lte) == size); lte = existing_lte; @@ -2500,62 +2500,145 @@ wimlib_iterate_dir_tree(WIMStruct *wim, int image, const tchar *path, return for_image(wim, image, image_do_iterate_dir_tree); } +/* Returns %true iff the metadata of @inode and @template_inode are reasonably + * consistent with them being the same, unmodified file. */ static bool -inode_stream_sizes_consistent(const struct wim_inode *inode_1, - const struct wim_inode *inode_2, - const struct wim_lookup_table *lookup_table) +inode_metadata_consistent(const struct wim_inode *inode, + const struct wim_inode *template_inode, + const struct wim_lookup_table *template_lookup_table) { - if (inode_1->i_num_ads != inode_2->i_num_ads) + /* Must have exact same creation time and last write time. */ + if (inode->i_creation_time != template_inode->i_creation_time || + inode->i_last_write_time != template_inode->i_last_write_time) return false; - for (unsigned i = 0; i <= inode_1->i_num_ads; i++) { - const struct wim_lookup_table_entry *lte_1, *lte_2; - lte_1 = inode_stream_lte(inode_1, i, lookup_table); - lte_2 = inode_stream_lte(inode_2, i, lookup_table); - if (lte_1 && lte_2) { - if (wim_resource_size(lte_1) != wim_resource_size(lte_2)) + /* Last access time may have stayed the same or increased, but certainly + * shouldn't have decreased. */ + if (inode->i_last_access_time < template_inode->i_last_access_time) + return false; + + /* Must have same number of alternate data stream entries. */ + if (inode->i_num_ads != template_inode->i_num_ads) + return false; + + /* If the stream entries for the inode are for some reason not resolved, + * then the hashes are already available and the point of this function + * is defeated. */ + if (!inode->i_resolved) + return false; + + /* Iterate through each stream and do some more checks. */ + for (unsigned i = 0; i <= inode->i_num_ads; i++) { + const struct wim_lookup_table_entry *lte, *template_lte; + + lte = inode_stream_lte_resolved(inode, i); + template_lte = inode_stream_lte(template_inode, i, + template_lookup_table); + + /* Compare stream sizes. */ + if (lte && template_lte) { + if (wim_resource_size(lte) != wim_resource_size(template_lte)) return false; - } else if (lte_1 && wim_resource_size(lte_1)) { + + /* If hash happens to be available, compare with template. */ + if (!lte->unhashed && !template_lte->unhashed && + !hashes_equal(lte->hash, template_lte->hash)) + return false; + + } else if (lte && wim_resource_size(lte)) { return false; - } else if (lte_2 && wim_resource_size(lte_2)) { + } else if (template_lte && wim_resource_size(template_lte)) { return false; } + } + + /* All right, barring a full checksum and given that the inodes share a + * path and the user isn't trying to trick us, these inodes most likely + * refer to the same file. */ return true; } -static void -inode_replace_ltes(struct wim_inode *inode, - struct wim_inode *template_inode, - struct wim_lookup_table *lookup_table) +/** + * Given an inode @inode that has been determined to be "the same" as another + * inode @template_inode in either the same WIM or another WIM, retrieve some + * useful stream information (e.g. checksums) from @template_inode. + * + * This assumes that the streams for @inode have been resolved (to point + * directly to the appropriate `struct wim_lookup_table_entry') but do not + * necessarily have checksum information filled in. + */ +static int +inode_copy_checksums(struct wim_inode *inode, + struct wim_inode *template_inode, + WIMStruct *wim, + WIMStruct *template_wim) { for (unsigned i = 0; i <= inode->i_num_ads; i++) { - struct wim_lookup_table_entry *lte, *lte_template; + struct wim_lookup_table_entry *lte, *template_lte; + struct wim_lookup_table_entry *replace_lte; + + lte = inode_stream_lte_resolved(inode, i); + template_lte = inode_stream_lte(template_inode, i, + template_wim->lookup_table); + + /* Only take action if both entries exist, the entry for @inode + * has no checksum calculated, but the entry for @template_inode + * does. */ + if (!lte || !template_lte || + !lte->unhashed || template_lte->unhashed) + continue; - lte = inode_stream_lte(inode, i, lookup_table); - if (lte) { - for (unsigned j = 0; j < inode->i_nlink; j++) - lte_decrement_refcnt(lte, lookup_table); - lte_template = inode_stream_lte(template_inode, i, - lookup_table); - if (i == 0) - inode->i_lte = lte_template; - else - inode->i_ads_entries[i - 1].lte = lte_template; - if (lte_template) - lte_template->refcnt += inode->i_nlink; + wimlib_assert(lte->refcnt == inode->i_nlink); + + /* If the WIM of the template image is the same as the WIM of + * the new image, then @template_lte can be used directly. + * + * Otherwise, look for a stream with the same hash in the WIM of + * the new image. If found, use it; otherwise re-use the entry + * being discarded, filling in the hash. */ + + if (wim == template_wim) + replace_lte = template_lte; + else + replace_lte = lookup_resource(wim->lookup_table, + template_lte->hash); + + list_del(<e->unhashed_list); + if (replace_lte) { + free_lookup_table_entry(lte); + } else { + copy_hash(lte->hash, template_lte->hash); + lte->unhashed = 0; + lookup_table_insert(wim->lookup_table, lte); + lte->refcnt = 0; + replace_lte = lte; } + + if (i == 0) + inode->i_lte = replace_lte; + else + inode->i_ads_entries[i - 1].lte = replace_lte; + + replace_lte->refcnt += inode->i_nlink; } - inode->i_resolved = 1; + return 0; } +struct reference_template_args { + WIMStruct *wim; + WIMStruct *template_wim; +}; + static int -dentry_reference_template(struct wim_dentry *dentry, void *_wim) +dentry_reference_template(struct wim_dentry *dentry, void *_args) { int ret; struct wim_dentry *template_dentry; struct wim_inode *inode, *template_inode; - WIMStruct *wim = _wim; + struct reference_template_args *args = _args; + WIMStruct *wim = args->wim; + WIMStruct *template_wim = args->template_wim; if (dentry->d_inode->i_visited) return 0; @@ -2564,7 +2647,7 @@ dentry_reference_template(struct wim_dentry *dentry, void *_wim) if (ret) return ret; - template_dentry = get_dentry(wim, dentry->_full_path); + template_dentry = get_dentry(template_wim, dentry->_full_path); if (!template_dentry) { DEBUG("\"%"TS"\": newly added file", dentry->_full_path); return 0; @@ -2573,38 +2656,37 @@ dentry_reference_template(struct wim_dentry *dentry, void *_wim) inode = dentry->d_inode; template_inode = template_dentry->d_inode; - if (inode->i_last_write_time == template_inode->i_last_write_time - && inode->i_creation_time == template_inode->i_creation_time - && inode->i_last_access_time >= template_inode->i_last_access_time - && inode_stream_sizes_consistent(inode, template_inode, - wim->lookup_table)) - { + if (inode_metadata_consistent(inode, template_inode, + template_wim->lookup_table)) { /*DEBUG("\"%"TS"\": No change detected", dentry->_full_path);*/ - inode_replace_ltes(inode, template_inode, wim->lookup_table); + ret = inode_copy_checksums(inode, template_inode, + wim, template_wim); inode->i_visited = 1; } else { DEBUG("\"%"TS"\": change detected!", dentry->_full_path); + ret = 0; } - return 0; + return ret; } /* API function documented in wimlib.h */ WIMLIBAPI int -wimlib_reference_template_image(WIMStruct *wim, int new_image, int template_image, +wimlib_reference_template_image(WIMStruct *wim, int new_image, + WIMStruct *template_wim, int template_image, int flags, wimlib_progress_func_t progress_func) { int ret; struct wim_image_metadata *new_imd; - if (new_image < 1 || new_image > wim->hdr.image_count) - return WIMLIB_ERR_INVALID_IMAGE; - - if (template_image < 1 || template_image > wim->hdr.image_count) - return WIMLIB_ERR_INVALID_IMAGE; + if (wim == NULL || template_wim == NULL) + return WIMLIB_ERR_INVALID_PARAM; - if (new_image == template_image) + if (wim == template_wim && new_image == template_image) return WIMLIB_ERR_INVALID_PARAM; + if (new_image < 1 || new_image > wim->hdr.image_count) + return WIMLIB_ERR_INVALID_IMAGE; + if (!wim_has_metadata(wim)) return WIMLIB_ERR_METADATA_NOT_FOUND; @@ -2612,12 +2694,17 @@ wimlib_reference_template_image(WIMStruct *wim, int new_image, int template_imag if (!new_imd->modified) return WIMLIB_ERR_INVALID_PARAM; - ret = select_wim_image(wim, template_image); + ret = select_wim_image(template_wim, template_image); if (ret) return ret; + struct reference_template_args args = { + .wim = wim, + .template_wim = template_wim, + }; + ret = for_dentry_in_tree(new_imd->root_dentry, - dentry_reference_template, wim); + dentry_reference_template, &args); dentry_tree_clear_inode_visited(new_imd->root_dentry); return ret; } diff --git a/src/export_image.c b/src/export_image.c index 81c66594..b769afc2 100644 --- a/src/export_image.c +++ b/src/export_image.c @@ -52,12 +52,12 @@ inode_export_streams(struct wim_inode *inode, /* Search for the stream (via SHA1 message digest) in the * destination WIM. */ - dest_lte = __lookup_resource(dest_lookup_table, hash); + dest_lte = lookup_resource(dest_lookup_table, hash); if (!dest_lte) { /* Stream not yet present in destination WIM. Search * for it in the source WIM, then export it into the * destination WIM. */ - src_lte = __lookup_resource(src_lookup_table, hash); + src_lte = lookup_resource(src_lookup_table, hash); if (!src_lte) return resource_not_found_error(inode, hash); diff --git a/src/extract.c b/src/extract.c index c9f6e21f..a087a76d 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1426,7 +1426,7 @@ extract_streams_from_pipe(struct apply_ctx *ctx) if ((found_lte->resource_location != RESOURCE_NONEXISTENT) && !(found_lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) - && (needed_lte = __lookup_resource(lookup_table, found_lte->hash)) + && (needed_lte = lookup_resource(lookup_table, found_lte->hash)) && (needed_lte->out_refcnt)) { copy_resource_entry(&needed_lte->resource_entry, diff --git a/src/join.c b/src/join.c index 1045fc71..d52bd0f2 100644 --- a/src/join.c +++ b/src/join.c @@ -195,10 +195,12 @@ wimlib_join(const tchar * const *swm_names, if (ret) goto out_free_swms; + /* It is reasonably safe to provide, WIMLIB_WRITE_FLAG_STREAMS_OK, as we + * have verified that the specified split WIM parts form a spanned set. + */ ret = wimlib_write(swm0, output_path, WIMLIB_ALL_IMAGES, - wim_write_flags, 1, progress_func); - wimlib_unreference_resources(swm0, additional_swms, - num_additional_swms); + wim_write_flags | WIMLIB_WRITE_FLAG_STREAMS_OK, + 1, progress_func); out_free_swms: for (i = 0; i < num_additional_swms; i++) wimlib_free(additional_swms[i]); diff --git a/src/lookup_table.c b/src/lookup_table.c index a6202ef2..2a5bac31 100644 --- a/src/lookup_table.c +++ b/src/lookup_table.c @@ -613,7 +613,7 @@ read_wim_lookup_table(WIMStruct *wim) } else { /* Lookup table entry for a stream that is not a * metadata resource */ - duplicate_entry = __lookup_resource(table, cur_entry->hash); + duplicate_entry = lookup_resource(table, cur_entry->hash); if (duplicate_entry) { if (wimlib_print_errors) { WARNING("The WIM lookup table contains two entries with the " @@ -713,8 +713,29 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list, static int append_lookup_table_entry(struct wim_lookup_table_entry *lte, void *_list) { - if (lte->out_refcnt != 0) + /* Lookup table entries with 'out_refcnt' == 0 correspond to streams not + * written and not present in the resulting WIM file, and should not be + * included in the lookup table. + * + * Lookup table entries marked as filtered (EXTERNAL_WIM) with + * 'out_refcnt != 0' were referenced as part of the logical write but + * correspond to streams that were not in fact written, and should not + * be included in the lookup table. + * + * Lookup table entries marked as filtered (SAME_WIM) with 'out_refcnt + * != 0' were referenced as part of the logical write but correspond to + * streams that were not in fact written, but nevertheless were already + * present in the WIM being overwritten in-place. These entries must be + * included in the lookup table, and the resource information to write + * needs to be copied from the resource information read originally. + */ + if (lte->out_refcnt != 0 && !(lte->filtered & FILTERED_EXTERNAL_WIM)) { + if (lte->filtered & FILTERED_SAME_WIM) { + copy_resource_entry(<e->output_resource_entry, + <e->resource_entry); + } list_add_tail(<e->lookup_table_list, (struct list_head*)_list); + } return 0; } @@ -761,7 +782,9 @@ write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, } } - /* Append additional lookup table entries that have out_refcnt != 0. */ + /* Append additional lookup table entries that need to be written, with + * some special handling for streams that have been marked as filtered. + */ if (!stream_list_override) { for_lookup_table_entry(wim->lookup_table, append_lookup_table_entry, stream_list); @@ -922,7 +945,7 @@ wimlib_iterate_lookup_table(WIMStruct *wim, int flags, /* Given a SHA1 message digest, return the corresponding entry in the WIM's * lookup table, or NULL if there is none. */ struct wim_lookup_table_entry * -__lookup_resource(const struct wim_lookup_table *table, const u8 hash[]) +lookup_resource(const struct wim_lookup_table *table, const u8 hash[]) { size_t i; struct wim_lookup_table_entry *lte; @@ -946,12 +969,12 @@ __lookup_resource(const struct wim_lookup_table *table, const u8 hash[]) * This is only for pre-resolved inodes. */ int -lookup_resource(WIMStruct *wim, - const tchar *path, - int lookup_flags, - struct wim_dentry **dentry_ret, - struct wim_lookup_table_entry **lte_ret, - u16 *stream_idx_ret) +wim_pathname_to_stream(WIMStruct *wim, + const tchar *path, + int lookup_flags, + struct wim_dentry **dentry_ret, + struct wim_lookup_table_entry **lte_ret, + u16 *stream_idx_ret) { struct wim_dentry *dentry; struct wim_lookup_table_entry *lte; @@ -1050,7 +1073,7 @@ inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table, lte = NULL; hash = inode->i_hash; if (!is_zero_hash(hash)) { - lte = __lookup_resource(table, hash); + lte = lookup_resource(table, hash); if (!lte) { if (force) { lte = new_lookup_table_entry(); @@ -1073,7 +1096,7 @@ inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table, cur_entry = &inode->i_ads_entries[i]; hash = cur_entry->hash; if (!is_zero_hash(hash)) { - ads_lte = __lookup_resource(table, hash); + ads_lte = lookup_resource(table, hash); if (!ads_lte) { if (force) { ads_lte = new_lookup_table_entry(); @@ -1256,7 +1279,7 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, return ret; /* Look for a duplicate stream */ - duplicate_lte = __lookup_resource(lookup_table, lte->hash); + duplicate_lte = lookup_resource(lookup_table, lte->hash); list_del(<e->unhashed_list); if (duplicate_lte) { /* We have a duplicate stream. Transfer the reference counts @@ -1265,7 +1288,7 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, * duplicate, then free this stream. */ wimlib_assert(!(duplicate_lte->unhashed)); duplicate_lte->refcnt += lte->refcnt; - duplicate_lte->out_refcnt += lte->refcnt; + duplicate_lte->out_refcnt += lte->out_refcnt; *back_ptr = duplicate_lte; free_lookup_table_entry(lte); lte = duplicate_lte; @@ -1282,86 +1305,67 @@ hash_unhashed_stream(struct wim_lookup_table_entry *lte, } static int -move_lte_to_table(struct wim_lookup_table_entry *lte, void *_combined_table) +lte_clone_if_new(struct wim_lookup_table_entry *lte, void *_lookup_table) { - struct wim_lookup_table *combined_table = _combined_table; - - hlist_del(<e->hash_list); - lookup_table_insert(combined_table, lte); - return 0; -} + struct wim_lookup_table *lookup_table = _lookup_table; -static void -lookup_table_join(struct wim_lookup_table *combined_table, - struct wim_lookup_table *part_table) -{ - for_lookup_table_entry(part_table, move_lte_to_table, combined_table); - part_table->num_entries = 0; -} + if (lookup_resource(lookup_table, lte->hash)) + return 0; /* Resource already present. */ -static void -merge_lookup_tables(WIMStruct *wim, WIMStruct **resource_wims, - unsigned num_resource_wims) -{ - for (unsigned i = 0; i < num_resource_wims; i++) { - lookup_table_join(wim->lookup_table, resource_wims[i]->lookup_table); - list_add(&resource_wims[i]->resource_wim_node, &wim->resource_wims); - resource_wims[i]->master_wim = wim; - } + lte = clone_lookup_table_entry(lte); + if (!lte) + return WIMLIB_ERR_NOMEM; + lte->out_refcnt = 1; + lookup_table_insert(lookup_table, lte); + return 0; } static int -move_lte_to_orig_table(struct wim_lookup_table_entry *lte, void *_wim) +lte_delete_if_new(struct wim_lookup_table_entry *lte, void *_lookup_table) { - WIMStruct *wim = _wim; + struct wim_lookup_table *lookup_table = _lookup_table; - if (lte->resource_location == RESOURCE_IN_WIM && - lte->wim->being_unmerged) - { - move_lte_to_table(lte, lte->wim->lookup_table); - wim->lookup_table->num_entries--; + if (lte->out_refcnt) { + lookup_table_unlink(lookup_table, lte); + free_lookup_table_entry(lte); } return 0; } -static int -check_reference_params(WIMStruct *wim, - WIMStruct **resource_wims, unsigned num_resource_wims, - WIMStruct *expected_master) +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_reference_resources(WIMStruct *wim, + WIMStruct **resource_wims, unsigned num_resource_wims, + int ref_flags) { - if (wim == NULL) - return WIMLIB_ERR_INVALID_PARAM; + int ret; + unsigned i; - if (wim->hdr.part_number != 1) + if (wim == NULL) return WIMLIB_ERR_INVALID_PARAM; if (num_resource_wims != 0 && resource_wims == NULL) return WIMLIB_ERR_INVALID_PARAM; - for (unsigned i = 0; i < num_resource_wims; i++) { + for (i = 0; i < num_resource_wims; i++) if (resource_wims[i] == NULL) return WIMLIB_ERR_INVALID_PARAM; - if (resource_wims[i]->master_wim != expected_master) - return WIMLIB_ERR_INVALID_PARAM; - } - return 0; -} - -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_reference_resources(WIMStruct *wim, - WIMStruct **resource_wims, unsigned num_resource_wims, - int ref_flags) -{ - int ret; - ret = check_reference_params(wim, resource_wims, - num_resource_wims, NULL); - if (ret) - return ret; + for_lookup_table_entry(wim->lookup_table, lte_zero_out_refcnt, NULL); - merge_lookup_tables(wim, resource_wims, num_resource_wims); + for (i = 0; i < num_resource_wims; i++) { + ret = for_lookup_table_entry(resource_wims[i]->lookup_table, + lte_clone_if_new, + wim->lookup_table); + if (ret) + goto out_rollback; + } return 0; + +out_rollback: + for_lookup_table_entry(wim->lookup_table, lte_delete_if_new, + wim->lookup_table); + return ret; } static int @@ -1383,6 +1387,8 @@ reference_resource_paths(WIMStruct *wim, return WIMLIB_ERR_NOMEM; for (i = 0; i < num_resource_wimfiles; i++) { + DEBUG("Referencing resources from path \"%"TS"\"", + resource_wimfiles[i]); ret = wimlib_open_wim(resource_wimfiles[i], open_flags, &resource_wims[i], progress_func); if (ret) @@ -1395,7 +1401,7 @@ reference_resource_paths(WIMStruct *wim, goto out_free_resource_wims; for (i = 0; i < num_resource_wimfiles; i++) - resource_wims[i]->is_owned_by_master = 1; + list_add_tail(&resource_wims[i]->subwim_node, &wim->subwims); ret = 0; goto out_free_array; @@ -1479,28 +1485,3 @@ wimlib_reference_resource_files(WIMStruct *wim, open_flags, progress_func); } } - -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_unreference_resources(WIMStruct *wim, - WIMStruct **resource_wims, unsigned num_resource_wims) -{ - int ret; - unsigned i; - - ret = check_reference_params(wim, resource_wims, num_resource_wims, wim); - if (ret) - return ret; - - for (i = 0; i < num_resource_wims; i++) - resource_wims[i]->being_unmerged = 1; - - for_lookup_table_entry(wim->lookup_table, move_lte_to_orig_table, wim); - - for (i = 0; i < num_resource_wims; i++) { - resource_wims[i]->being_unmerged = 0; - list_del(&resource_wims[i]->resource_wim_node); - resource_wims[i]->master_wim = NULL; - } - return 0; -} diff --git a/src/mount_image.c b/src/mount_image.c index 85c0dee4..cc1c57dc 100644 --- a/src/mount_image.c +++ b/src/mount_image.c @@ -482,7 +482,7 @@ create_staging_file(char **name_ret, struct wimfs_context *ctx) static const size_t STAGING_FILE_NAME_LEN = 20; name_len = ctx->staging_dir_name_len + 1 + STAGING_FILE_NAME_LEN; - name = MALLOC(name_len + 1); + name = MALLOC(name_len + 1); if (!name) { errno = ENOMEM; return -1; @@ -864,7 +864,7 @@ set_message_queue_names(struct wimfs_context *ctx, const char *mount_dir) char *p; int ret; - dir_path = realpath(mount_dir, NULL); + dir_path = realpath(mount_dir, NULL); if (!dir_path) { ERROR_WITH_ERRNO("Failed to resolve path \"%s\"", mount_dir); if (errno == ENOMEM) @@ -1539,8 +1539,8 @@ wimfs_chmod(const char *path, mode_t mask) if (!(ctx->mount_flags & WIMLIB_MOUNT_FLAG_UNIX_DATA)) return -EPERM; - ret = lookup_resource(ctx->wim, path, LOOKUP_FLAG_DIRECTORY_OK, - &dentry, NULL, NULL); + ret = wim_pathname_to_stream(ctx->wim, path, LOOKUP_FLAG_DIRECTORY_OK, + &dentry, NULL, NULL); if (ret) return ret; @@ -1560,8 +1560,8 @@ wimfs_chown(const char *path, uid_t uid, gid_t gid) if (!(ctx->mount_flags & WIMLIB_MOUNT_FLAG_UNIX_DATA)) return -EPERM; - ret = lookup_resource(ctx->wim, path, LOOKUP_FLAG_DIRECTORY_OK, - &dentry, NULL, NULL); + ret = wim_pathname_to_stream(ctx->wim, path, LOOKUP_FLAG_DIRECTORY_OK, + &dentry, NULL, NULL); if (ret) return ret; @@ -1621,9 +1621,10 @@ wimfs_getattr(const char *path, struct stat *stbuf) int ret; struct wimfs_context *ctx = wimfs_get_context(); - ret = lookup_resource(ctx->wim, path, - get_lookup_flags(ctx) | LOOKUP_FLAG_DIRECTORY_OK, - &dentry, <e, NULL); + ret = wim_pathname_to_stream(ctx->wim, path, + get_lookup_flags(ctx) | + LOOKUP_FLAG_DIRECTORY_OK, + &dentry, <e, NULL); if (ret != 0) return ret; return inode_to_stbuf(dentry->d_inode, lte, stbuf); @@ -1828,8 +1829,8 @@ wimfs_open(const char *path, struct fuse_file_info *fi) struct wimfs_context *ctx = wimfs_get_context(); struct wim_lookup_table_entry **back_ptr; - ret = lookup_resource(ctx->wim, path, get_lookup_flags(ctx), - &dentry, <e, &stream_idx); + ret = wim_pathname_to_stream(ctx->wim, path, get_lookup_flags(ctx), + &dentry, <e, &stream_idx); if (ret) return ret; @@ -2186,8 +2187,8 @@ wimfs_truncate(const char *path, off_t size) struct wim_inode *inode; struct wimfs_context *ctx = wimfs_get_context(); - ret = lookup_resource(ctx->wim, path, get_lookup_flags(ctx), - &dentry, <e, &stream_idx); + ret = wim_pathname_to_stream(ctx->wim, path, get_lookup_flags(ctx), + &dentry, <e, &stream_idx); if (ret != 0) return ret; @@ -2231,8 +2232,8 @@ wimfs_unlink(const char *path) u16 stream_idx; struct wimfs_context *ctx = wimfs_get_context(); - ret = lookup_resource(ctx->wim, path, get_lookup_flags(ctx), - &dentry, <e, &stream_idx); + ret = wim_pathname_to_stream(ctx->wim, path, get_lookup_flags(ctx), + &dentry, <e, &stream_idx); if (ret != 0) return ret; @@ -2258,7 +2259,7 @@ wimfs_utimens(const char *path, const struct timespec tv[2]) struct wim_inode *inode; WIMStruct *wim = wimfs_get_WIMStruct(); - dentry = get_dentry(wim, path); + dentry = get_dentry(wim, path); if (!dentry) return -errno; inode = dentry->d_inode; @@ -2285,7 +2286,7 @@ wimfs_utime(const char *path, struct utimbuf *times) struct wim_inode *inode; WIMStruct *wim = wimfs_get_WIMStruct(); - dentry = get_dentry(wim, path); + dentry = get_dentry(wim, path); if (!dentry) return -errno; inode = dentry->d_inode; diff --git a/src/ntfs-3g_capture.c b/src/ntfs-3g_capture.c index d154fa9a..c674cca9 100644 --- a/src/ntfs-3g_capture.c +++ b/src/ntfs-3g_capture.c @@ -658,7 +658,7 @@ build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret, if (!(params->add_flags & WIMLIB_ADD_FLAG_NO_ACLS)) { struct SECURITY_CONTEXT sec_ctx; - char _sd[1]; + char _sd[4096]; char *sd; /* Get security descriptor */ @@ -666,7 +666,8 @@ build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret, sec_ctx.vol = vol; errno = 0; - ret = ntfs_get_ntfs_acl(&sec_ctx, ni, _sd, sizeof(_sd)); + sd = _sd; + ret = ntfs_get_ntfs_acl(&sec_ctx, ni, sd, sizeof(_sd)); if (ret > sizeof(_sd)) { sd = alloca(ret); ret = ntfs_get_ntfs_acl(&sec_ctx, ni, sd, ret); diff --git a/src/resource.c b/src/resource.c index bb3dda97..001ea024 100644 --- a/src/resource.c +++ b/src/resource.c @@ -625,7 +625,7 @@ invalid: * WIMLIB_ERR_NOMEM (errno set to ENOMEM) * WIMLIB_ERR_DECOMPRESSION (errno set to EINVAL) * WIMLIB_ERR_INVALID_PIPABLE_WIM (errno set to EINVAL) - * + * * or other error code returned by the @cb function. */ int diff --git a/src/wim.c b/src/wim.c index f91c98bc..20afd389 100644 --- a/src/wim.c +++ b/src/wim.c @@ -71,7 +71,7 @@ new_wim_struct(void) wim->in_fd.fd = -1; wim->out_fd.fd = -1; } - INIT_LIST_HEAD(&wim->resource_wims); + INIT_LIST_HEAD(&wim->subwims); return wim; } @@ -333,8 +333,10 @@ wimlib_set_wim_info(WIMStruct *wim, const struct wimlib_wim_info *info, int whic if (ret) return ret; - if (which & WIMLIB_CHANGE_GUID) + if (which & WIMLIB_CHANGE_GUID) { memcpy(wim->hdr.guid, info->guid, WIM_GID_LEN); + wim->guid_set_explicitly = 1; + } if (which & WIMLIB_CHANGE_BOOT_INDEX) { if (info->boot_index > wim->hdr.image_count) { @@ -752,22 +754,19 @@ can_delete_from_wim(WIMStruct *wim) WIMLIBAPI void wimlib_free(WIMStruct *wim) { - DEBUG("Freeing WIMStruct"); - if (!wim) return; - while (!list_empty(&wim->resource_wims)) { - WIMStruct *resource_wim; + DEBUG("Freeing WIMStruct (filename=\"%"TS"\", image_count=%u)", + wim->filename, wim->hdr.image_count); - resource_wim = list_entry(wim->resource_wims.next, - WIMStruct, resource_wim_node); - if (resource_wim->is_owned_by_master) { - list_del(&resource_wim->resource_wim_node); - wimlib_free(resource_wim); - } else { - wimlib_unreference_resources(wim, &resource_wim, 1); - } + while (!list_empty(&wim->subwims)) { + WIMStruct *subwim; + + subwim = list_entry(wim->subwims.next, WIMStruct, subwim_node); + list_del(&subwim->subwim_node); + DEBUG("Freeing subwim."); + wimlib_free(subwim); } if (filedes_valid(&wim->in_fd)) @@ -786,7 +785,6 @@ wimlib_free(WIMStruct *wim) FREE(wim->image_metadata); } FREE(wim); - DEBUG("Freed WIMStruct"); } static bool diff --git a/src/write.c b/src/write.c index b1654e9a..42b9fcff 100644 --- a/src/write.c +++ b/src/write.c @@ -831,19 +831,29 @@ do_write_stream_list(struct list_head *stream_list, * stream in the WIM we are writing. The stream must be * checksummed to know if we need to write it or not. */ struct wim_lookup_table_entry *tmp; - u32 orig_refcnt = lte->out_refcnt; + u32 orig_out_refcnt = lte->out_refcnt; ret = hash_unhashed_stream(lte, lookup_table, &tmp); if (ret) break; if (tmp != lte) { + /* We found a duplicate stream. 'lte' was + * freed, so replace it with the duplicate. */ lte = tmp; - /* We found a duplicate stream. */ - if (orig_refcnt != tmp->out_refcnt) { - /* We have already written, or are going - * to write, the duplicate stream. So - * just skip to the next stream. */ - DEBUG("Discarding duplicate stream of length %"PRIu64, + + /* 'out_refcnt' was transferred to the + * duplicate, and we can detect if the duplicate + * stream was already referenced for writing by + * checking if its 'out_refcnt' is higher than + * that of the original stream. In such cases, + * the current stream can be discarded. We can + * also discard the current stream if it was + * previously marked as filtered (e.g. already + * present in the WIM being written). */ + if (lte->out_refcnt > orig_out_refcnt || + lte->filtered) { + DEBUG("Discarding duplicate stream of " + "length %"PRIu64, wim_resource_size(lte)); lte->no_progress = 0; stream_discarded = true; @@ -1587,12 +1597,15 @@ write_stream_list(struct list_head *stream_list, unsigned total_parts = 0; WIMStruct *prev_wim_part = NULL; - if (list_empty(stream_list)) + if (list_empty(stream_list)) { + DEBUG("No streams to write."); return 0; + } write_resource_flags = write_flags_to_resource_flags(write_flags); - DEBUG("write_resource_flags=0x%08x", write_resource_flags); + DEBUG("Writing stream list (offset = %"PRIu64", write_resource_flags=0x%08x)", + out_fd->offset, write_resource_flags); sort_stream_list_by_sequential_order(stream_list, offsetof(struct wim_lookup_table_entry, @@ -1617,7 +1630,6 @@ write_stream_list(struct list_head *stream_list, total_parts++; } } - } memset(&progress_data, 0, sizeof(progress_data)); @@ -1705,141 +1717,37 @@ stream_size_table_insert(struct wim_lookup_table_entry *lte, void *_tab) return 0; } - -struct lte_overwrite_prepare_args { - WIMStruct *wim; - off_t end_offset; - struct list_head stream_list; - struct stream_size_table stream_size_tab; -}; - -/* First phase of preparing streams for an in-place overwrite. This is called - * on all streams, both hashed and unhashed, except the metadata resources. */ -static int -lte_overwrite_prepare(struct wim_lookup_table_entry *lte, void *_args) -{ - struct lte_overwrite_prepare_args *args = _args; - - wimlib_assert(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)); - if (lte->resource_location != RESOURCE_IN_WIM || lte->wim != args->wim) - list_add_tail(<e->write_streams_list, &args->stream_list); - lte->out_refcnt = lte->refcnt; - stream_size_table_insert(lte, &args->stream_size_tab); - return 0; -} - -/* Second phase of preparing streams for an in-place overwrite. This is called - * on existing metadata resources and hashed streams, but not unhashed streams. - * - * NOTE: lte->output_resource_entry is in union with lte->hash_list_2, so - * lte_overwrite_prepare_2() must be called after lte_overwrite_prepare(), as - * the latter uses lte->hash_list_2, while the former expects to set - * lte->output_resource_entry. */ -static int -lte_overwrite_prepare_2(struct wim_lookup_table_entry *lte, void *_args) -{ - struct lte_overwrite_prepare_args *args = _args; - - if (lte->resource_location == RESOURCE_IN_WIM && lte->wim == args->wim) { - /* We can't do an in place overwrite on the WIM if there are - * streams after the XML data. */ - if (lte->resource_entry.offset + - lte->resource_entry.size > args->end_offset) - { - if (wimlib_print_errors) { - ERROR("The following resource is after the XML data:"); - print_lookup_table_entry(lte, stderr); - } - return WIMLIB_ERR_RESOURCE_ORDER; - } - copy_resource_entry(<e->output_resource_entry, - <e->resource_entry); - } - return 0; -} - -/* Given a WIM that we are going to overwrite in place with zero or more - * additional streams added, construct a list the list of new unique streams - * ('struct wim_lookup_table_entry's) that must be written, plus any unhashed - * streams that need to be added but may be identical to other hashed or - * unhashed streams. These unhashed streams are checksummed while the streams - * are being written. To aid this process, the member @unique_size is set to 1 - * on streams that have a unique size and therefore must be written. - * - * The out_refcnt member of each 'struct wim_lookup_table_entry' is set to - * indicate the number of times the stream is referenced in only the streams - * that are being written; this may still be adjusted later when unhashed - * streams are being resolved. - */ -static int -prepare_streams_for_overwrite(WIMStruct *wim, off_t end_offset, - struct list_head *stream_list) -{ - int ret; - struct lte_overwrite_prepare_args args; - unsigned i; - - args.wim = wim; - args.end_offset = end_offset; - ret = init_stream_size_table(&args.stream_size_tab, - wim->lookup_table->capacity); - if (ret) - return ret; - - INIT_LIST_HEAD(&args.stream_list); - for (i = 0; i < wim->hdr.image_count; i++) { - struct wim_image_metadata *imd; - struct wim_lookup_table_entry *lte; - - imd = wim->image_metadata[i]; - image_for_each_unhashed_stream(lte, imd) - lte_overwrite_prepare(lte, &args); - } - for_lookup_table_entry(wim->lookup_table, lte_overwrite_prepare, &args); - list_transfer(&args.stream_list, stream_list); - - for (i = 0; i < wim->hdr.image_count; i++) { - ret = lte_overwrite_prepare_2(wim->image_metadata[i]->metadata_lte, - &args); - if (ret) - goto out_destroy_stream_size_table; - } - ret = for_lookup_table_entry(wim->lookup_table, - lte_overwrite_prepare_2, &args); -out_destroy_stream_size_table: - destroy_stream_size_table(&args.stream_size_tab); - return ret; -} - - struct find_streams_ctx { + WIMStruct *wim; + int write_flags; struct list_head stream_list; struct stream_size_table stream_size_tab; }; static void -lte_reference_for_write(struct wim_lookup_table_entry *lte, - struct find_streams_ctx *ctx, - unsigned nref) +lte_reference_for_logical_write(struct wim_lookup_table_entry *lte, + struct find_streams_ctx *ctx, + unsigned nref) { if (lte->out_refcnt == 0) { - if (lte->unhashed) - stream_size_table_insert(lte, &ctx->stream_size_tab); + stream_size_table_insert(lte, &ctx->stream_size_tab); list_add_tail(<e->write_streams_list, &ctx->stream_list); } lte->out_refcnt += nref; } static int -do_lte_reference_for_write(struct wim_lookup_table_entry *lte, void *_ctx) +do_lte_full_reference_for_logical_write(struct wim_lookup_table_entry *lte, + void *_ctx) { struct find_streams_ctx *ctx = _ctx; lte->out_refcnt = 0; - lte_reference_for_write(lte, ctx, lte->refcnt); + lte_reference_for_logical_write(lte, ctx, + (lte->refcnt ? lte->refcnt : 1)); return 0; } -static void +static int inode_find_streams_to_write(struct wim_inode *inode, struct wim_lookup_table *table, struct find_streams_ctx *ctx) @@ -1850,8 +1758,11 @@ inode_find_streams_to_write(struct wim_inode *inode, for (i = 0; i <= inode->i_num_ads; i++) { lte = inode_stream_lte(inode, i, table); if (lte) - lte_reference_for_write(lte, ctx, inode->i_nlink); + lte_reference_for_logical_write(lte, ctx, inode->i_nlink); + else if (!is_zero_hash(inode_stream_hash(inode, i))) + return WIMLIB_ERR_RESOURCE_NOT_FOUND; } + return 0; } static int @@ -1861,6 +1772,7 @@ image_find_streams_to_write(WIMStruct *wim) struct wim_image_metadata *imd; struct wim_inode *inode; struct wim_lookup_table_entry *lte; + int ret; ctx = wim->private; imd = wim_get_current_image_metadata(wim); @@ -1870,78 +1782,175 @@ image_find_streams_to_write(WIMStruct *wim) /* Go through this image's inodes to find any streams that have not been * found yet. */ - image_for_each_inode(inode, imd) - inode_find_streams_to_write(inode, wim->lookup_table, ctx); + image_for_each_inode(inode, imd) { + ret = inode_find_streams_to_write(inode, wim->lookup_table, ctx); + if (ret) + return ret; + } return 0; } -/* Given a WIM that from which one or all of the images is being written, build - * the list of unique streams ('struct wim_lookup_table_entry's) that must be - * written, plus any unhashed streams that need to be written but may be - * identical to other hashed or unhashed streams being written. These unhashed - * streams are checksummed while the streams are being written. To aid this - * process, the member @unique_size is set to 1 on streams that have a unique - * size and therefore must be written. +/* + * Build a list of streams (via `struct wim_lookup_table_entry's) included in + * the "logical write" of the WIM, meaning all streams that are referenced at + * least once by dentries in the the image(s) being written. 'out_refcnt' on + * each stream being included in the logical write is set to the number of + * references from dentries in the image(s). Furthermore, 'unique_size' on each + * stream being included in the logical write is set to indicate whether that + * stream has a unique size relative to the streams being included in the + * logical write. Still furthermore, 'part_number' on each stream being + * included in the logical write is set to the part number given in the + * in-memory header of @p wim. * - * The out_refcnt member of each 'struct wim_lookup_table_entry' is set to - * indicate the number of times the stream is referenced in only the streams - * that are being written; this may still be adjusted later when unhashed - * streams are being resolved. + * This is considered a "logical write" because it does not take into account + * filtering out streams already present in the WIM (in the case of an in place + * overwrite) or present in other WIMs (in case of creating delta WIM). */ static int -prepare_stream_list(WIMStruct *wim, int image, struct list_head *stream_list) +prepare_logical_stream_list(WIMStruct *wim, int image, bool streams_ok, + struct find_streams_ctx *ctx) { int ret; - struct find_streams_ctx ctx; - - DEBUG("Preparing list of streams to write for image %d.", image); + struct wim_lookup_table_entry *lte; - for_lookup_table_entry(wim->lookup_table, lte_zero_out_refcnt, NULL); - ret = init_stream_size_table(&ctx.stream_size_tab, - wim->lookup_table->capacity); - if (ret) - return ret; - for_lookup_table_entry(wim->lookup_table, stream_size_table_insert, - &ctx.stream_size_tab); - INIT_LIST_HEAD(&ctx.stream_list); - wim->private = &ctx; - -#if 1 - /* Optimization enabled by default: if we're writing all the images, - * it's not strictly necessary to decompress, parse, and go through the - * dentry tree in each image's metadata resource. Instead, include all - * the hashed streams referenced from the lookup table as well as all - * unhashed streams referenced in the per-image list. For 'out_refcnt' - * for each stream, just copy the value from 'refcnt', which is the - * reference count of that stream in the entire WIM. */ - if (image == WIMLIB_ALL_IMAGES) { + if (streams_ok && (image == WIMLIB_ALL_IMAGES || + (image == 1 && wim->hdr.image_count == 1))) + { + /* Fast case: Assume that all streams are being written and + * that the reference counts are correct. */ struct wim_lookup_table_entry *lte; struct wim_image_metadata *imd; unsigned i; for_lookup_table_entry(wim->lookup_table, - do_lte_reference_for_write, &ctx); + do_lte_full_reference_for_logical_write, ctx); for (i = 0; i < wim->hdr.image_count; i++) { imd = wim->image_metadata[i]; image_for_each_unhashed_stream(lte, imd) - do_lte_reference_for_write(lte, &ctx); + do_lte_full_reference_for_logical_write(lte, ctx); } - ret = 0; - } else -#endif + } else { + /* Slow case: Walk through the images being written and + * determine the streams referenced. */ + for_lookup_table_entry(wim->lookup_table, lte_zero_out_refcnt, NULL); + wim->private = ctx; ret = for_image(wim, image, image_find_streams_to_write); + if (ret) + return ret; + } - destroy_stream_size_table(&ctx.stream_size_tab); + list_for_each_entry(lte, &ctx->stream_list, write_streams_list) + lte->part_number = wim->hdr.part_number; + return 0; +} + +static int +process_filtered_stream(struct wim_lookup_table_entry *lte, void *_ctx) +{ + struct find_streams_ctx *ctx = _ctx; + u16 filtered = 0; + + /* Calculate and set lte->filtered. */ + if (lte->resource_location == RESOURCE_IN_WIM) { + if (lte->wim == ctx->wim && + (ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE)) + filtered |= FILTERED_SAME_WIM; + if (lte->wim != ctx->wim && + (ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS)) + filtered |= FILTERED_EXTERNAL_WIM; + } + lte->filtered = filtered; + + /* Filtered streams get inserted into the stream size table too, unless + * they already were. This is because streams that are checksummed + * on-the-fly during the write should not be written if they are + * duplicates of filtered stream. */ + if (lte->filtered && lte->out_refcnt == 0) + stream_size_table_insert(lte, &ctx->stream_size_tab); + return 0; +} + +static int +mark_stream_not_filtered(struct wim_lookup_table_entry *lte, void *_ignore) +{ + lte->filtered = 0; + return 0; +} + +/* Given the list of streams to include in a logical write of a WIM, handle + * filtering out streams already present in the WIM or already present in + * external WIMs, depending on the write flags provided. */ +static void +handle_stream_filtering(struct find_streams_ctx *ctx) +{ + struct wim_lookup_table_entry *lte, *tmp; + + if (!(ctx->write_flags & (WIMLIB_WRITE_FLAG_OVERWRITE | + WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS))) + { + for_lookup_table_entry(ctx->wim->lookup_table, + mark_stream_not_filtered, ctx); + return; + } + + for_lookup_table_entry(ctx->wim->lookup_table, + process_filtered_stream, ctx); + + /* Streams in logical write list that were filtered can be removed. */ + list_for_each_entry_safe(lte, tmp, &ctx->stream_list, + write_streams_list) + if (lte->filtered) + list_del(<e->write_streams_list); +} + +/* Prepares list of streams to write for the specified WIM image(s). This wraps + * around prepare_logical_stream_list() to handle filtering out streams already + * present in the WIM or already present in external WIMs, depending on the + * write flags provided. + * + * Note: some additional data is stored in each `struct wim_lookup_table_entry': + * + * - 'out_refcnt' is set to the number of references found for the logical write. + * This will be nonzero on all streams in the list returned by this function, + * but will also be nonzero on streams not in the list that were included in + * the logical write list, but filtered out from the returned list. + * - 'filtered' is set to nonzero if the stream was filtered. Filtered streams + * are not included in the list of streams returned by this function. + * - 'unique_size' is set if the stream has a unique size among all streams in + * the logical write plus any filtered streams in the entire WIM that could + * potentially turn out to have the same checksum as a yet-to-be-checksummed + * stream being written. + */ +static int +prepare_stream_list(WIMStruct *wim, int image, int write_flags, + struct list_head *stream_list) +{ + int ret; + bool streams_ok; + struct find_streams_ctx ctx; + + INIT_LIST_HEAD(&ctx.stream_list); + ret = init_stream_size_table(&ctx.stream_size_tab, + wim->lookup_table->capacity); if (ret) return ret; + ctx.write_flags = write_flags; + ctx.wim = wim; + + streams_ok = ((write_flags & WIMLIB_WRITE_FLAG_STREAMS_OK) != 0); + + ret = prepare_logical_stream_list(wim, image, streams_ok, &ctx); + if (ret) + goto out_destroy_table; + + handle_stream_filtering(&ctx); list_transfer(&ctx.stream_list, stream_list); - return 0; + ret = 0; +out_destroy_table: + destroy_stream_size_table(&ctx.stream_size_tab); + return ret; } -/* Writes the streams for the specified @image in @wim to @wim->out_fd. - * Alternatively, if @stream_list_override is specified, it is taken to be the - * list of streams to write (connected with 'write_streams_list') and @image is - * ignored. */ static int write_wim_streams(WIMStruct *wim, int image, int write_flags, unsigned num_threads, @@ -1953,22 +1962,24 @@ write_wim_streams(WIMStruct *wim, int image, int write_flags, struct list_head *stream_list; struct wim_lookup_table_entry *lte; - if (stream_list_override) { - stream_list = stream_list_override; - list_for_each_entry(lte, stream_list, write_streams_list) { - if (lte->refcnt) - lte->out_refcnt = lte->refcnt; - else - lte->out_refcnt = 1; - } - } else { + if (stream_list_override == NULL) { + /* Normal case: prepare stream list from image(s) being written. + */ stream_list = &_stream_list; - ret = prepare_stream_list(wim, image, stream_list); + ret = prepare_stream_list(wim, image, write_flags, stream_list); if (ret) return ret; + } else { + /* Currently only as a result of wimlib_split() being called: + * use stream list already explicitly provided. Use existing + * reference counts. */ + stream_list = stream_list_override; + list_for_each_entry(lte, stream_list, write_streams_list) { + lte->out_refcnt = (lte->refcnt ? lte->refcnt : 1); + lte->part_number = wim->hdr.part_number; + } } - list_for_each_entry(lte, stream_list, write_streams_list) - lte->part_number = wim->hdr.part_number; + return write_stream_list(stream_list, wim->lookup_table, &wim->out_fd, @@ -1987,8 +1998,10 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags, int end_image; int write_resource_flags; - if (write_flags & WIMLIB_WRITE_FLAG_NO_METADATA) + if (write_flags & WIMLIB_WRITE_FLAG_NO_METADATA) { + DEBUG("Not writing any metadata resources."); return 0; + } write_resource_flags = write_flags_to_resource_flags(write_flags); @@ -2014,9 +2027,19 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags, * the original (or was newly added). Otherwise just copy the * existing one. */ if (imd->modified) { + DEBUG("Image %u was modified; building and writing new " + "metadata resource", i); ret = write_metadata_resource(wim, i, write_resource_flags); + } else if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE) { + DEBUG("Image %u was not modified; re-using existing " + "metadata resource.", i); + copy_resource_entry(&imd->metadata_lte->output_resource_entry, + &imd->metadata_lte->resource_entry); + ret = 0; } else { + DEBUG("Image %u was not modified; copying existing " + "metadata resource.", i); ret = write_wim_resource(imd->metadata_lte, &wim->out_fd, wim->compression_type, @@ -2404,7 +2427,8 @@ write_wim_part(WIMStruct *wim, DEBUG("Number of threads: %u", num_threads); DEBUG("Progress function: %s", (progress_func ? "yes" : "no")); DEBUG("Stream list: %s", (stream_list_override ? "specified" : "autodetect")); - DEBUG("GUID: %s", (guid ? "specified" : "generate new")); + DEBUG("GUID: %s", ((guid || wim->guid_set_explicitly) ? + "specified" : "generate new")); /* Internally, this is always called with a valid part number and total * parts. */ @@ -2474,7 +2498,7 @@ write_wim_part(WIMStruct *wim, /* Use GUID if specified; otherwise generate a new one. */ if (guid) memcpy(wim->hdr.guid, guid, WIMLIB_GUID_LEN); - else + else if (!wim->guid_set_explicitly) randomize_byte_array(wim->hdr.guid, WIMLIB_GUID_LEN); /* Clear references to resources that have not been written yet. */ @@ -2627,6 +2651,40 @@ any_images_modified(WIMStruct *wim) return false; } +static int +check_resource_offset(struct wim_lookup_table_entry *lte, void *_wim) +{ + const WIMStruct *wim = _wim; + off_t end_offset = *(const off_t*)wim->private; + + if (lte->resource_location == RESOURCE_IN_WIM && lte->wim == wim && + lte->resource_entry.offset + lte->resource_entry.size > end_offset) + return WIMLIB_ERR_RESOURCE_ORDER; + return 0; +} + +/* Make sure no file or metadata resources are located after the XML data (or + * integrity table if present)--- otherwise we can't safely overwrite the WIM in + * place and we return WIMLIB_ERR_RESOURCE_ORDER. */ +static int +check_resource_offsets(WIMStruct *wim, off_t end_offset) +{ + int ret; + unsigned i; + + wim->private = &end_offset; + ret = for_lookup_table_entry(wim->lookup_table, check_resource_offset, wim); + if (ret) + return ret; + + for (i = 0; i < wim->hdr.image_count; i++) { + ret = check_resource_offset(wim->image_metadata[i]->metadata_lte, wim); + if (ret) + return ret; + } + return 0; +} + /* * Overwrite a WIM, possibly appending streams to it. * @@ -2703,6 +2761,10 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, if (wim_has_integrity_table(wim)) write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY; + /* Set additional flags for overwrite. */ + write_flags |= WIMLIB_WRITE_FLAG_OVERWRITE | + WIMLIB_WRITE_FLAG_STREAMS_OK; + /* Make sure that the integrity table (if present) is after the XML * data, and that there are no stream resources, metadata resources, or * lookup tables after the XML data. Otherwise, these data would be @@ -2712,12 +2774,12 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, old_lookup_table_end = wim->hdr.lookup_table_res_entry.offset + wim->hdr.lookup_table_res_entry.size; if (wim->hdr.integrity.offset != 0 && wim->hdr.integrity.offset < old_xml_end) { - ERROR("Didn't expect the integrity table to be before the XML data"); + WARNING("Didn't expect the integrity table to be before the XML data"); return WIMLIB_ERR_RESOURCE_ORDER; } if (old_lookup_table_end > old_xml_begin) { - ERROR("Didn't expect the lookup table to be after the XML data"); + WARNING("Didn't expect the lookup table to be after the XML data"); return WIMLIB_ERR_RESOURCE_ORDER; } @@ -2746,7 +2808,12 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, old_wim_end = old_xml_end; } - ret = prepare_streams_for_overwrite(wim, old_wim_end, &stream_list); + ret = check_resource_offsets(wim, old_wim_end); + if (ret) + return ret; + + ret = prepare_stream_list(wim, WIMLIB_ALL_IMAGES, write_flags, + &stream_list); if (ret) return ret; @@ -2776,8 +2843,6 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, goto out_unlock_wim; } - DEBUG("Writing newly added streams (offset = %"PRIu64")", - old_wim_end); ret = write_stream_list(&stream_list, wim->lookup_table, &wim->out_fd, @@ -2788,13 +2853,11 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, if (ret) goto out_truncate; - for (unsigned i = 1; i <= wim->hdr.image_count; i++) { - if (wim->image_metadata[i - 1]->modified) { - ret = write_metadata_resource(wim, i, 0); - if (ret) - goto out_truncate; - } - } + ret = write_wim_metadata_resources(wim, WIMLIB_ALL_IMAGES, + write_flags, progress_func); + if (ret) + goto out_truncate; + write_flags |= WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE; ret = finish_write(wim, WIMLIB_ALL_IMAGES, write_flags, progress_func, NULL); -- 2.43.0