Update NEWS and add imagex-optimize
authorEric Biggers <ebiggers3@gmail.com>
Tue, 20 Nov 2012 05:33:21 +0000 (23:33 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Tue, 20 Nov 2012 05:33:21 +0000 (23:33 -0600)
Makefile.am
NEWS
configure.ac
doc/imagex-capture.1.in
doc/imagex-export.1.in
doc/imagex-optimize.1.in [new file with mode: 0644]
doc/imagex.1.in
programs/imagex.c
src/integrity.c
src/wimlib_internal.h
src/write.c

index f0a0057..1835be1 100644 (file)
@@ -122,6 +122,7 @@ man1_MANS =                 \
        doc/imagex-join.1       \
        doc/imagex-mount.1      \
        doc/imagex-mountrw.1    \
+       doc/imagex-optimize.1   \
        doc/imagex-split.1      \
        doc/imagex-unmount.1    \
        doc/mkwinpeimg.1
diff --git a/NEWS b/NEWS
index 650825b..30f02db 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,19 @@
 Only the most important changes more recent than version 0.6 are noted here.
 
+Version 1.1.1:
+       Appending images to a WIM is now be done by default without re-building
+       the whole WIM.  Use the --rebuild flag to get the old behavior (which
+       was to re-build the entire WIM when a new image is appended).
+
+       A new command `imagex optimize' is now available to manually re-build a
+       WIM that has wasted space due to repeated appends.
+
+       Previously, the soname of libwim.so has been 0.0.0, despite many
+       interface changes.  The soname is now updated to 1.0.0 and will now be
+       updated each release.
+
+       A possible bug with changing the bootable image of a WIM was fixed.
+
 Version 1.1.0:
        Resources will now be compressed using multiple threads by default.
        (This applies to `imagex capture', `imagex append', and `imagex
index 5c4ad6b..dc60193 100644 (file)
@@ -21,6 +21,7 @@ AC_CONFIG_FILES([Makefile
                doc/imagex-join.1
                doc/imagex-mount.1
                doc/imagex-mountrw.1
+               doc/imagex-optimize.1
                doc/imagex-split.1
                doc/imagex-unmount.1
                doc/mkwinpeimg.1
index 70414eb..a9ea448 100644 (file)
@@ -119,6 +119,12 @@ processors).  Note: if creating or appending to an uncompressed WIM, additional
 threads will not be used, regardless of this parameter, since no compression
 needs to be done in this case.
 .TP
+\fB--rebuild\fR
+For \fBimagex append\fR: rebuild the entire WIM rather than appending an image
+to the end of it.  Rebuilding the WIM is slower, but will save a little bit of
+space that would otherwise be left as a hole in the WIM.  Also see \fBimagex
+optimize\fR.
+.TP
 \fB--flags\fR=\fIEDITIONID\fR
 Specify a string to use in the <FLAGS> element of the XML data for the new
 image.
index 9825e56..0c13577 100644 (file)
@@ -68,6 +68,12 @@ with the same compression type as the source WIM, additional threads will not
 be used, regardless of this parameter, since no data compression needs to be
 done in these cases.
 .TP
+\fB--rebuild\fR
+When exporting image(s) to an existing WIM: rebuild the entire WIM rather than
+appending an image to the end of it.  Rebuilding the WIM is slower, but will
+save a little bit of space that would otherwise be left as a hole in the WIM.
+Also see \fBimagex optimize\fR.
+.TP
 \fB--ref\fR="\fIGLOB\fR"
 File glob of additional split WIM parts that are part of the split WIM being
 exported.  See \fBSPLIT_WIMS\fR.
diff --git a/doc/imagex-optimize.1.in b/doc/imagex-optimize.1.in
new file mode 100644 (file)
index 0000000..40a91ad
--- /dev/null
@@ -0,0 +1,30 @@
+.TH IMAGEX "1" "November 2012" "imagex (wimlib) wimlib @VERSION@" "User Commands"
+.SH NAME
+imagex-optimize \- Optimize a WIM archive
+
+.SH SYNOPSIS
+\fBimagex optimize\fR \fIWIMFILE\fR [--check]
+
+.SH DESCRIPTION
+.PP
+\fBimagex optimize\fR will rebuild the stand-alone WIM \fIWIMFILE\fR.  The new
+WIM is written to a temporary file, and it is renamed to the original file when
+it's ready.  This action will remove any holes that have been left as a result
+of appending images, so the new WIM may be slightly smaller than the old WIM.
+In addition, some errors in the original WIM may be fixed by re-writing it
+(although most cannot).
+
+.SH OPTIONS
+.TP 6
+When reading \fIWIMFILE\fR, verify its integrity if the integrity table is
+present; include an integrity table in the optimized WIM.  If this option is not
+specified and \fIWIMFILE\fR, no integrity table is included in the optimized
+WIM, even if there was one before.
+
+.SH NOTES
+
+\fBimagex optimize\fR does not support split WIMs.
+
+.SH SEE ALSO
+.BR imagex (1)
+
index 54b3583..86e0eab 100644 (file)
@@ -22,6 +22,8 @@ imagex \- Create, modify, extract, mount, or unmount a WIM (Windows Imaging Form
 .br
 \fBimagex mountrw\fR \fIarguments...\fR
 .br
+\fBimagex optimize\fR \fIarguments...\fR
+.br
 \fBimagex split\fR \fIarguments...\fR
 .br
 \fBimagex unmount\fR \fIarguments...\fR
@@ -145,6 +147,7 @@ Report bugs to ebiggers3@gmail.com.
 .BR imagex-join (1),
 .BR imagex-mount (1),
 .BR imagex-mountrw (1),
+.BR imagex-optimize (1),
 .BR imagex-split (1),
 .BR imagex-unmount (1),
 
index 266dc0d..0b7c96b 100644 (file)
@@ -34,6 +34,7 @@
 #include <limits.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <inttypes.h>
 
 #define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))
 
@@ -52,6 +53,7 @@ enum imagex_op_type {
        JOIN,
        MOUNT,
        MOUNTRW,
+       OPTIMIZE,
        SPLIT,
        UNMOUNT,
 };
@@ -64,7 +66,7 @@ static const char *usage_strings[] = {
 "imagex append (DIRECTORY | NTFS_VOLUME) WIMFILE [IMAGE_NAME]\n"
 "                     [DESCRIPTION] [--boot] [--check] [--flags EDITION_ID]\n"
 "                     [--verbose] [--dereference] [--config=FILE]\n"
-"                     [--threads=NUM_THREADS]\n",
+"                     [--threads=NUM_THREADS] [--rebuild]\n",
 [APPLY] =
 "imagex apply WIMFILE [IMAGE_NUM | IMAGE_NAME | all]\n"
 "                    (DIRECTORY | NTFS_VOLUME) [--check] [--hardlink]\n"
@@ -82,7 +84,7 @@ static const char *usage_strings[] = {
 "imagex export SRC_WIMFILE (SRC_IMAGE_NUM | SRC_IMAGE_NAME | all ) \n"
 "              DEST_WIMFILE [DEST_IMAGE_NAME] [DEST_IMAGE_DESCRIPTION]\n"
 "              [--boot] [--check] [--compress=TYPE] [--ref=\"GLOB\"]\n"
-"              [--threads=NUM_THREADS]\n",
+"              [--threads=NUM_THREADS] [--rebuild]\n",
 [INFO] =
 "imagex info WIMFILE [IMAGE_NUM | IMAGE_NAME] [NEW_NAME]\n"
 "                   [NEW_DESC] [--boot] [--check] [--header] [--lookup-table]\n"
@@ -96,6 +98,8 @@ static const char *usage_strings[] = {
 [MOUNTRW] =
 "imagex mountrw WIMFILE [IMAGE_NUM | IMAGE_NAME] DIRECTORY\n"
 "                      [--check] [--debug] [--streams-interface=INTERFACE]\n",
+[OPTIMIZE] =
+"imagex optimize WIMFILE [--check]\n",
 [SPLIT] =
 "imagex split WIMFILE SPLIT_WIMFILE PART_SIZE_MB [--check]\n",
 [UNMOUNT] =
@@ -125,6 +129,7 @@ static const struct option capture_or_append_options[] = {
        {"flags",       required_argument, NULL, 'f'},
        {"verbose",     no_argument,       NULL, 'v'},
        {"threads",     required_argument, NULL, 't'},
+       {"rebuild",     no_argument,       NULL, 'R'},
        {NULL, 0, NULL, 0},
 };
 static const struct option delete_options[] = {
@@ -138,6 +143,7 @@ static const struct option export_options[] = {
        {"compress",   required_argument, NULL, 'x'},
        {"ref",        required_argument, NULL, 'r'},
        {"threads",    required_argument, NULL, 't'},
+       {"rebuild",    no_argument,       NULL, 'R'},
        {NULL, 0, NULL, 0},
 };
 
@@ -165,6 +171,11 @@ static const struct option mount_options[] = {
        {NULL, 0, NULL, 0},
 };
 
+static const struct option optimize_options[] = {
+       {"check", no_argument, NULL, 'c'},
+       {NULL, 0, NULL, 0},
+};
+
 static const struct option split_options[] = {
        {"check", no_argument, NULL, 'c'},
        {NULL, 0, NULL, 0},
@@ -250,6 +261,15 @@ static int get_compression_type(const char *optarg)
        }
 }
 
+static off_t file_get_size(const char *filename)
+{
+       struct stat st;
+       if (stat(filename, &st) == 0)
+               return st.st_size;
+       else
+               return (off_t)-1;
+}
+
 static char *file_get_contents(const char *filename, size_t *len_ret)
 {
        struct stat stbuf;
@@ -542,6 +562,9 @@ static int imagex_capture_or_append(int argc, const char **argv)
                        if (num_threads == UINT_MAX)
                                return -1;
                        break;
+               case 'R':
+                       write_flags |= WIMLIB_WRITE_FLAG_REBUILD;
+                       break;
                default:
                        usage(cmd);
                        return -1;
@@ -801,6 +824,9 @@ static int imagex_export(int argc, const char **argv)
                        if (num_threads == UINT_MAX)
                                return -1;
                        break;
+               case 'R':
+                       write_flags |= WIMLIB_WRITE_FLAG_REBUILD;
+                       break;
                default:
                        usage(EXPORT);
                        return -1;
@@ -1320,6 +1346,71 @@ mount_usage:
        return -1;
 }
 
+static int imagex_optimize(int argc, const char **argv)
+{
+       int c;
+       int open_flags = WIMLIB_OPEN_FLAG_SHOW_PROGRESS;
+       int write_flags = WIMLIB_WRITE_FLAG_REBUILD |
+                         WIMLIB_WRITE_FLAG_SHOW_PROGRESS;
+       int ret;
+       WIMStruct *w;
+       const char *wimfile;
+       off_t old_size;
+       off_t new_size;
+
+       for_opt(c, optimize_options) {
+               switch (c) {
+               case 'c':
+                       open_flags |= WIMLIB_OPEN_FLAG_CHECK_INTEGRITY;
+                       write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY;
+                       break;
+               default:
+                       usage(OPTIMIZE);
+                       return -1;
+               }
+       }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 1) {
+               usage(OPTIMIZE);
+               return -1;
+       }
+
+       wimfile = argv[0];
+
+       ret = wimlib_open_wim(wimfile, open_flags, &w);
+       if (ret != 0)
+               return ret;
+
+       old_size = file_get_size(argv[0]);
+       printf("`%s' original size: ", wimfile);
+       if (old_size == -1)
+               puts("Unknown");
+       else
+               printf("%"PRIu64" KiB\n", old_size >> 10);
+
+       ret = wimlib_overwrite(w, write_flags, 0);
+
+       new_size = file_get_size(argv[0]);
+       printf("`%s' optimized size: ", wimfile);
+       if (new_size == -1)
+               puts("Unknown");
+       else
+               printf("%"PRIu64" KiB\n", new_size >> 10);
+
+       fputs("Space saved: ", stdout);
+       if (new_size != -1 && old_size != -1) {
+               printf("%lld KiB\n",
+                      ((long long)old_size - (long long)new_size) >> 10);
+       } else {
+               puts("Unknown");
+       }
+
+       wimlib_free(w);
+       return ret;
+}
+
 /* Split a WIM into a spanned set */
 static int imagex_split(int argc, const char **argv)
 {
@@ -1408,6 +1499,7 @@ static const struct imagex_command imagex_commands[] = {
        {"join",    imagex_join,              JOIN},
        {"mount",   imagex_mount_rw_or_ro,    MOUNT},
        {"mountrw", imagex_mount_rw_or_ro,    MOUNTRW},
+       {"optimize",imagex_optimize,          OPTIMIZE},
        {"split",   imagex_split,             SPLIT},
        {"unmount", imagex_unmount,           UNMOUNT},
 };
index bf02a6b..d7f81eb 100644 (file)
@@ -372,6 +372,8 @@ int write_integrity_table(FILE *fp,
        off_t cur_offset;
        u32 new_table_size;
 
+       wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
+
        cur_offset = ftello(fp);
        if (cur_offset == -1) {
                ERROR_WITH_ERRNO("Failed to get offset in WIM");
index e3884c9..6c45ca3 100644 (file)
@@ -498,8 +498,9 @@ extern int open_wim_writable(WIMStruct *w, const char *path,
 /* Internal use only */
 #define WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE      0x80000000
 #define WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE 0x40000000
+#define WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML  0x20000000
 
-#define WIMLIB_WRITE_MASK_PUBLIC               0x3fffffff
+#define WIMLIB_WRITE_MASK_PUBLIC               0x1fffffff
 
 /* write.c */
 extern int begin_write(WIMStruct *w, const char *path, int write_flags);
index 7315ec9..96d6676 100644 (file)
@@ -1370,8 +1370,33 @@ static int write_wim_streams(WIMStruct *w, int image, int write_flags,
 }
 
 /*
- * Write the lookup table, xml data, and integrity table, then overwrite the WIM
- * header.
+ * Finish writing a WIM file: write the lookup table, xml data, and integrity
+ * table (optional), then overwrite the WIM header.
+ *
+ * write_flags is a bitwise OR of the following:
+ *
+ *     (public)  WIMLIB_WRITE_FLAG_CHECK_INTEGRITY:
+ *             Include an integrity table.
+ *
+ *     (public)  WIMLIB_WRITE_FLAG_SHOW_PROGRESS:
+ *             Show progress information when (if) writing the integrity table.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE:
+ *             Don't write the lookup table.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE:
+ *             When (if) writing the integrity table, re-use entries from the
+ *             existing integrity table, if possible.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML:
+ *             After writing the XML data but before writing the integrity
+ *             table, write a temporary WIM header and flush the stream so that
+ *             the WIM is less likely to become corrupted upon abrupt program
+ *             termination.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_FSYNC:
+ *             fsync() the output file before closing it.
+ *
  */
 int finish_write(WIMStruct *w, int image, int write_flags)
 {
@@ -1399,6 +1424,30 @@ int finish_write(WIMStruct *w, int image, int write_flags)
                goto out;
 
        if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
+               if (write_flags & WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) {
+                       struct wim_header checkpoint_hdr;
+                       memcpy(&checkpoint_hdr, &hdr, sizeof(struct wim_header));
+                       memset(&checkpoint_hdr.integrity, 0, sizeof(struct resource_entry));
+                       if (fseeko(out, 0, SEEK_SET) != 0) {
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+                       ret = write_header(&checkpoint_hdr, out);
+                       if (ret != 0)
+                               goto out;
+
+                       if (fflush(out) != 0) {
+                               ERROR_WITH_ERRNO("Can't write data to WIM");
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+
+                       if (fseeko(out, 0, SEEK_END) != 0) {
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+               }
+
                off_t old_lookup_table_end;
                off_t new_lookup_table_end;
                bool show_progress;
@@ -1452,8 +1501,6 @@ int finish_write(WIMStruct *w, int image, int write_flags)
 
        if (fseeko(out, 0, SEEK_SET) != 0) {
                ret = WIMLIB_ERR_WRITE;
-               ERROR_WITH_ERRNO("Failed to seek to beginning of WIM "
-                                "to overwrite header");
                goto out;
        }
 
@@ -1587,6 +1634,63 @@ static int find_new_streams(struct lookup_table_entry *lte, void *arg)
        return 0;
 }
 
+/*
+ * Overwrite a WIM, possibly appending streams to it.
+ *
+ * A WIM looks like (or is supposed to look like) the following:
+ *
+ *                   Header (212 bytes)
+ *                   Streams and metadata resources (variable size)
+ *                   Lookup table (variable size)
+ *                   XML data (variable size)
+ *                   Integrity table (optional) (variable size)
+ *
+ * If we are not adding any streams or metadata resources, the lookup table is
+ * unchanged--- so we only need to overwrite the XML data, integrity table, and
+ * header.  This operation is potentially unsafe if the program is abruptly
+ * terminated while the XML data or integrity table are being overwritten, but
+ * before the new header has been written.  To partially alleviate this problem,
+ * a special flag (WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) is passed to
+ * finish_write() to cause a temporary WIM header to be written after the XML
+ * data has been written.  This may prevent the WIM from becoming corrupted if
+ * the program is terminated while the integrity table is being calculated (but
+ * no guarantees, due to write re-ordering...).
+ *
+ * If we are adding new streams or images (metadata resources), the lookup table
+ * needs to be changed, and those streams need to be written.  In this case, we
+ * try to perform a safe update of the WIM file by writing the streams *after*
+ * the end of the previous WIM, then writing the new lookup table, XML data, and
+ * (optionally) integrity table following the new streams.  This will produce a
+ * layout like the following:
+ *
+ *                   Header (212 bytes)
+ *                   (OLD) Streams and metadata resources (variable size)
+ *                   (OLD) Lookup table (variable size)
+ *                   (OLD) XML data (variable size)
+ *                   (OLD) Integrity table (optional) (variable size)
+ *                   (NEW) Streams and metadata resources (variable size)
+ *                   (NEW) Lookup table (variable size)
+ *                   (NEW) XML data (variable size)
+ *                   (NEW) Integrity table (optional) (variable size)
+ *
+ * At all points, the WIM is valid as nothing points to the new data yet.  Then,
+ * the header is overwritten to point to the new lookup table, XML data, and
+ * integrity table, to produce the following layout:
+ *
+ *                   Header (212 bytes)
+ *                   Streams and metadata resources (variable size)
+ *                   Nothing (variable size)
+ *                   More Streams and metadata resources (variable size)
+ *                   Lookup table (variable size)
+ *                   XML data (variable size)
+ *                   Integrity table (optional) (variable size)
+ *
+ * This method allows an image to be appended to a large WIM very quickly, and
+ * is is crash-safe except in the case of write re-ordering, but the
+ * disadvantage is that a small hole is left in the WIM where the old lookup
+ * table, xml data, and integrity table were.  (These usually only take up a
+ * small amount of space compared to the streams, however.
+ */
 static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
                                 unsigned num_threads,
                                 int modified_image_idx)
@@ -1636,29 +1740,24 @@ static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
        if (ret != 0)
                return ret;
 
-       DEBUG("old_wim_end = %"PRIu64, old_wim_end);
+       if (modified_image_idx == w->hdr.image_count) {
+               /* If no images are modified, a new lookup table does not need
+                * to be written. */
+               wimlib_assert(list_empty(&stream_list));
+               old_wim_end = w->hdr.lookup_table_res_entry.offset +
+                             w->hdr.lookup_table_res_entry.size;
+               write_flags |= WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE |
+                              WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML;
+       }
 
        INIT_LIST_HEAD(&stream_list);
        for_lookup_table_entry(w->lookup_table, find_new_streams,
                               &stream_list);
 
-       {
-               u64 num_new_streams = 0;
-               struct list_head *cur;
-               list_for_each(cur, &stream_list)
-                       num_new_streams++;
-               DEBUG("%"PRIu64" new streams to write", num_new_streams);
-       }
-
-       {
-               bool trunc = false;
-               bool need_readable = false;
-               if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
-                       need_readable = true;
-               ret = open_wim_writable(w, w->filename, trunc, need_readable);
-               if (ret != 0)
-                       return ret;
-       }
+       ret = open_wim_writable(w, w->filename, false,
+                               (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) != 0);
+       if (ret != 0)
+               return ret;
 
        if (fseeko(w->out_fp, old_wim_end, SEEK_SET) != 0) {
                ERROR_WITH_ERRNO("Can't seek to end of WIM");