Update NEWS and add imagex-optimize
authorEric Biggers <ebiggers3@gmail.com>
Tue, 20 Nov 2012 05:33:21 +0000 (23:33 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Tue, 20 Nov 2012 05:33:21 +0000 (23:33 -0600)
Makefile.am
NEWS
configure.ac
doc/imagex-capture.1.in
doc/imagex-export.1.in
doc/imagex-optimize.1.in [new file with mode: 0644]
doc/imagex.1.in
programs/imagex.c
src/integrity.c
src/wimlib_internal.h
src/write.c

index f0a00573c166e15cea38c94c1fa14b92fe5eb939..1835be1cc9f704580ce9f8d9e6e1a53617d0a150 100644 (file)
@@ -122,6 +122,7 @@ man1_MANS =                 \
        doc/imagex-join.1       \
        doc/imagex-mount.1      \
        doc/imagex-mountrw.1    \
+       doc/imagex-optimize.1   \
        doc/imagex-split.1      \
        doc/imagex-unmount.1    \
        doc/mkwinpeimg.1
diff --git a/NEWS b/NEWS
index 650825b337d222219fa11a41148b5f87f5a8815d..30f02dbfd477140d7f9a5a9519b208066ace001c 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,19 @@
 Only the most important changes more recent than version 0.6 are noted here.
 
+Version 1.1.1:
+       Appending images to a WIM is now be done by default without re-building
+       the whole WIM.  Use the --rebuild flag to get the old behavior (which
+       was to re-build the entire WIM when a new image is appended).
+
+       A new command `imagex optimize' is now available to manually re-build a
+       WIM that has wasted space due to repeated appends.
+
+       Previously, the soname of libwim.so has been 0.0.0, despite many
+       interface changes.  The soname is now updated to 1.0.0 and will now be
+       updated each release.
+
+       A possible bug with changing the bootable image of a WIM was fixed.
+
 Version 1.1.0:
        Resources will now be compressed using multiple threads by default.
        (This applies to `imagex capture', `imagex append', and `imagex
index 5c4ad6b7031240477dd28a1d833b4484dfa09452..dc6019346cfae1ba692a21b2651e2ffcb05a944d 100644 (file)
@@ -21,6 +21,7 @@ AC_CONFIG_FILES([Makefile
                doc/imagex-join.1
                doc/imagex-mount.1
                doc/imagex-mountrw.1
+               doc/imagex-optimize.1
                doc/imagex-split.1
                doc/imagex-unmount.1
                doc/mkwinpeimg.1
index 70414eb2a35ec6667aeece57963560aca3548833..a9ea44804d312e6bb507e2ca2bd6840c9138b846 100644 (file)
@@ -119,6 +119,12 @@ processors).  Note: if creating or appending to an uncompressed WIM, additional
 threads will not be used, regardless of this parameter, since no compression
 needs to be done in this case.
 .TP
+\fB--rebuild\fR
+For \fBimagex append\fR: rebuild the entire WIM rather than appending an image
+to the end of it.  Rebuilding the WIM is slower, but will save a little bit of
+space that would otherwise be left as a hole in the WIM.  Also see \fBimagex
+optimize\fR.
+.TP
 \fB--flags\fR=\fIEDITIONID\fR
 Specify a string to use in the <FLAGS> element of the XML data for the new
 image.
index 9825e56a80e7dc12e8664c2300d48254ad70974d..0c13577de7404b361c39d7f250ea52d20d9c4c1f 100644 (file)
@@ -68,6 +68,12 @@ with the same compression type as the source WIM, additional threads will not
 be used, regardless of this parameter, since no data compression needs to be
 done in these cases.
 .TP
+\fB--rebuild\fR
+When exporting image(s) to an existing WIM: rebuild the entire WIM rather than
+appending an image to the end of it.  Rebuilding the WIM is slower, but will
+save a little bit of space that would otherwise be left as a hole in the WIM.
+Also see \fBimagex optimize\fR.
+.TP
 \fB--ref\fR="\fIGLOB\fR"
 File glob of additional split WIM parts that are part of the split WIM being
 exported.  See \fBSPLIT_WIMS\fR.
diff --git a/doc/imagex-optimize.1.in b/doc/imagex-optimize.1.in
new file mode 100644 (file)
index 0000000..40a91ad
--- /dev/null
@@ -0,0 +1,30 @@
+.TH IMAGEX "1" "November 2012" "imagex (wimlib) wimlib @VERSION@" "User Commands"
+.SH NAME
+imagex-optimize \- Optimize a WIM archive
+
+.SH SYNOPSIS
+\fBimagex optimize\fR \fIWIMFILE\fR [--check]
+
+.SH DESCRIPTION
+.PP
+\fBimagex optimize\fR will rebuild the stand-alone WIM \fIWIMFILE\fR.  The new
+WIM is written to a temporary file, and it is renamed to the original file when
+it's ready.  This action will remove any holes that have been left as a result
+of appending images, so the new WIM may be slightly smaller than the old WIM.
+In addition, some errors in the original WIM may be fixed by re-writing it
+(although most cannot).
+
+.SH OPTIONS
+.TP 6
+When reading \fIWIMFILE\fR, verify its integrity if the integrity table is
+present; include an integrity table in the optimized WIM.  If this option is not
+specified and \fIWIMFILE\fR, no integrity table is included in the optimized
+WIM, even if there was one before.
+
+.SH NOTES
+
+\fBimagex optimize\fR does not support split WIMs.
+
+.SH SEE ALSO
+.BR imagex (1)
+
index 54b35832683013fc0cda470269bfec693a430956..86e0eaba0ebd4ccb33cdf40ecb118379ada3c1f7 100644 (file)
@@ -22,6 +22,8 @@ imagex \- Create, modify, extract, mount, or unmount a WIM (Windows Imaging Form
 .br
 \fBimagex mountrw\fR \fIarguments...\fR
 .br
+\fBimagex optimize\fR \fIarguments...\fR
+.br
 \fBimagex split\fR \fIarguments...\fR
 .br
 \fBimagex unmount\fR \fIarguments...\fR
@@ -145,6 +147,7 @@ Report bugs to ebiggers3@gmail.com.
 .BR imagex-join (1),
 .BR imagex-mount (1),
 .BR imagex-mountrw (1),
+.BR imagex-optimize (1),
 .BR imagex-split (1),
 .BR imagex-unmount (1),
 
index 266dc0d45070080297b09e0910cf50fd66254174..0b7c96b0b7556f9aa72471e100583b3d307814a2 100644 (file)
@@ -34,6 +34,7 @@
 #include <limits.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <inttypes.h>
 
 #define ARRAY_LEN(array) (sizeof(array) / sizeof(array[0]))
 
@@ -52,6 +53,7 @@ enum imagex_op_type {
        JOIN,
        MOUNT,
        MOUNTRW,
+       OPTIMIZE,
        SPLIT,
        UNMOUNT,
 };
@@ -64,7 +66,7 @@ static const char *usage_strings[] = {
 "imagex append (DIRECTORY | NTFS_VOLUME) WIMFILE [IMAGE_NAME]\n"
 "                     [DESCRIPTION] [--boot] [--check] [--flags EDITION_ID]\n"
 "                     [--verbose] [--dereference] [--config=FILE]\n"
-"                     [--threads=NUM_THREADS]\n",
+"                     [--threads=NUM_THREADS] [--rebuild]\n",
 [APPLY] =
 "imagex apply WIMFILE [IMAGE_NUM | IMAGE_NAME | all]\n"
 "                    (DIRECTORY | NTFS_VOLUME) [--check] [--hardlink]\n"
@@ -82,7 +84,7 @@ static const char *usage_strings[] = {
 "imagex export SRC_WIMFILE (SRC_IMAGE_NUM | SRC_IMAGE_NAME | all ) \n"
 "              DEST_WIMFILE [DEST_IMAGE_NAME] [DEST_IMAGE_DESCRIPTION]\n"
 "              [--boot] [--check] [--compress=TYPE] [--ref=\"GLOB\"]\n"
-"              [--threads=NUM_THREADS]\n",
+"              [--threads=NUM_THREADS] [--rebuild]\n",
 [INFO] =
 "imagex info WIMFILE [IMAGE_NUM | IMAGE_NAME] [NEW_NAME]\n"
 "                   [NEW_DESC] [--boot] [--check] [--header] [--lookup-table]\n"
@@ -96,6 +98,8 @@ static const char *usage_strings[] = {
 [MOUNTRW] =
 "imagex mountrw WIMFILE [IMAGE_NUM | IMAGE_NAME] DIRECTORY\n"
 "                      [--check] [--debug] [--streams-interface=INTERFACE]\n",
+[OPTIMIZE] =
+"imagex optimize WIMFILE [--check]\n",
 [SPLIT] =
 "imagex split WIMFILE SPLIT_WIMFILE PART_SIZE_MB [--check]\n",
 [UNMOUNT] =
@@ -125,6 +129,7 @@ static const struct option capture_or_append_options[] = {
        {"flags",       required_argument, NULL, 'f'},
        {"verbose",     no_argument,       NULL, 'v'},
        {"threads",     required_argument, NULL, 't'},
+       {"rebuild",     no_argument,       NULL, 'R'},
        {NULL, 0, NULL, 0},
 };
 static const struct option delete_options[] = {
@@ -138,6 +143,7 @@ static const struct option export_options[] = {
        {"compress",   required_argument, NULL, 'x'},
        {"ref",        required_argument, NULL, 'r'},
        {"threads",    required_argument, NULL, 't'},
+       {"rebuild",    no_argument,       NULL, 'R'},
        {NULL, 0, NULL, 0},
 };
 
@@ -165,6 +171,11 @@ static const struct option mount_options[] = {
        {NULL, 0, NULL, 0},
 };
 
+static const struct option optimize_options[] = {
+       {"check", no_argument, NULL, 'c'},
+       {NULL, 0, NULL, 0},
+};
+
 static const struct option split_options[] = {
        {"check", no_argument, NULL, 'c'},
        {NULL, 0, NULL, 0},
@@ -250,6 +261,15 @@ static int get_compression_type(const char *optarg)
        }
 }
 
+static off_t file_get_size(const char *filename)
+{
+       struct stat st;
+       if (stat(filename, &st) == 0)
+               return st.st_size;
+       else
+               return (off_t)-1;
+}
+
 static char *file_get_contents(const char *filename, size_t *len_ret)
 {
        struct stat stbuf;
@@ -542,6 +562,9 @@ static int imagex_capture_or_append(int argc, const char **argv)
                        if (num_threads == UINT_MAX)
                                return -1;
                        break;
+               case 'R':
+                       write_flags |= WIMLIB_WRITE_FLAG_REBUILD;
+                       break;
                default:
                        usage(cmd);
                        return -1;
@@ -801,6 +824,9 @@ static int imagex_export(int argc, const char **argv)
                        if (num_threads == UINT_MAX)
                                return -1;
                        break;
+               case 'R':
+                       write_flags |= WIMLIB_WRITE_FLAG_REBUILD;
+                       break;
                default:
                        usage(EXPORT);
                        return -1;
@@ -1320,6 +1346,71 @@ mount_usage:
        return -1;
 }
 
+static int imagex_optimize(int argc, const char **argv)
+{
+       int c;
+       int open_flags = WIMLIB_OPEN_FLAG_SHOW_PROGRESS;
+       int write_flags = WIMLIB_WRITE_FLAG_REBUILD |
+                         WIMLIB_WRITE_FLAG_SHOW_PROGRESS;
+       int ret;
+       WIMStruct *w;
+       const char *wimfile;
+       off_t old_size;
+       off_t new_size;
+
+       for_opt(c, optimize_options) {
+               switch (c) {
+               case 'c':
+                       open_flags |= WIMLIB_OPEN_FLAG_CHECK_INTEGRITY;
+                       write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY;
+                       break;
+               default:
+                       usage(OPTIMIZE);
+                       return -1;
+               }
+       }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 1) {
+               usage(OPTIMIZE);
+               return -1;
+       }
+
+       wimfile = argv[0];
+
+       ret = wimlib_open_wim(wimfile, open_flags, &w);
+       if (ret != 0)
+               return ret;
+
+       old_size = file_get_size(argv[0]);
+       printf("`%s' original size: ", wimfile);
+       if (old_size == -1)
+               puts("Unknown");
+       else
+               printf("%"PRIu64" KiB\n", old_size >> 10);
+
+       ret = wimlib_overwrite(w, write_flags, 0);
+
+       new_size = file_get_size(argv[0]);
+       printf("`%s' optimized size: ", wimfile);
+       if (new_size == -1)
+               puts("Unknown");
+       else
+               printf("%"PRIu64" KiB\n", new_size >> 10);
+
+       fputs("Space saved: ", stdout);
+       if (new_size != -1 && old_size != -1) {
+               printf("%lld KiB\n",
+                      ((long long)old_size - (long long)new_size) >> 10);
+       } else {
+               puts("Unknown");
+       }
+
+       wimlib_free(w);
+       return ret;
+}
+
 /* Split a WIM into a spanned set */
 static int imagex_split(int argc, const char **argv)
 {
@@ -1408,6 +1499,7 @@ static const struct imagex_command imagex_commands[] = {
        {"join",    imagex_join,              JOIN},
        {"mount",   imagex_mount_rw_or_ro,    MOUNT},
        {"mountrw", imagex_mount_rw_or_ro,    MOUNTRW},
+       {"optimize",imagex_optimize,          OPTIMIZE},
        {"split",   imagex_split,             SPLIT},
        {"unmount", imagex_unmount,           UNMOUNT},
 };
index bf02a6b108ecf58ebe6ca4a453cb6346fd2a1e7c..d7f81eb60a436aafaaf96ce9d5237bfb1b23ec00 100644 (file)
@@ -372,6 +372,8 @@ int write_integrity_table(FILE *fp,
        off_t cur_offset;
        u32 new_table_size;
 
+       wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
+
        cur_offset = ftello(fp);
        if (cur_offset == -1) {
                ERROR_WITH_ERRNO("Failed to get offset in WIM");
index e3884c9c9583a1704a588e13ffa8a75e526522d3..6c45ca38185b1c6b0125e5ecef1c1e490a3482c0 100644 (file)
@@ -498,8 +498,9 @@ extern int open_wim_writable(WIMStruct *w, const char *path,
 /* Internal use only */
 #define WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE      0x80000000
 #define WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE 0x40000000
+#define WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML  0x20000000
 
-#define WIMLIB_WRITE_MASK_PUBLIC               0x3fffffff
+#define WIMLIB_WRITE_MASK_PUBLIC               0x1fffffff
 
 /* write.c */
 extern int begin_write(WIMStruct *w, const char *path, int write_flags);
index 7315ec903dd513ece056448cfe99764acb01d4a1..96d6676729aaa307acb4f340326e2ce37de48c2d 100644 (file)
@@ -1370,8 +1370,33 @@ static int write_wim_streams(WIMStruct *w, int image, int write_flags,
 }
 
 /*
- * Write the lookup table, xml data, and integrity table, then overwrite the WIM
- * header.
+ * Finish writing a WIM file: write the lookup table, xml data, and integrity
+ * table (optional), then overwrite the WIM header.
+ *
+ * write_flags is a bitwise OR of the following:
+ *
+ *     (public)  WIMLIB_WRITE_FLAG_CHECK_INTEGRITY:
+ *             Include an integrity table.
+ *
+ *     (public)  WIMLIB_WRITE_FLAG_SHOW_PROGRESS:
+ *             Show progress information when (if) writing the integrity table.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE:
+ *             Don't write the lookup table.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE:
+ *             When (if) writing the integrity table, re-use entries from the
+ *             existing integrity table, if possible.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML:
+ *             After writing the XML data but before writing the integrity
+ *             table, write a temporary WIM header and flush the stream so that
+ *             the WIM is less likely to become corrupted upon abrupt program
+ *             termination.
+ *
+ *     (private) WIMLIB_WRITE_FLAG_FSYNC:
+ *             fsync() the output file before closing it.
+ *
  */
 int finish_write(WIMStruct *w, int image, int write_flags)
 {
@@ -1399,6 +1424,30 @@ int finish_write(WIMStruct *w, int image, int write_flags)
                goto out;
 
        if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
+               if (write_flags & WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) {
+                       struct wim_header checkpoint_hdr;
+                       memcpy(&checkpoint_hdr, &hdr, sizeof(struct wim_header));
+                       memset(&checkpoint_hdr.integrity, 0, sizeof(struct resource_entry));
+                       if (fseeko(out, 0, SEEK_SET) != 0) {
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+                       ret = write_header(&checkpoint_hdr, out);
+                       if (ret != 0)
+                               goto out;
+
+                       if (fflush(out) != 0) {
+                               ERROR_WITH_ERRNO("Can't write data to WIM");
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+
+                       if (fseeko(out, 0, SEEK_END) != 0) {
+                               ret = WIMLIB_ERR_WRITE;
+                               goto out;
+                       }
+               }
+
                off_t old_lookup_table_end;
                off_t new_lookup_table_end;
                bool show_progress;
@@ -1452,8 +1501,6 @@ int finish_write(WIMStruct *w, int image, int write_flags)
 
        if (fseeko(out, 0, SEEK_SET) != 0) {
                ret = WIMLIB_ERR_WRITE;
-               ERROR_WITH_ERRNO("Failed to seek to beginning of WIM "
-                                "to overwrite header");
                goto out;
        }
 
@@ -1587,6 +1634,63 @@ static int find_new_streams(struct lookup_table_entry *lte, void *arg)
        return 0;
 }
 
+/*
+ * Overwrite a WIM, possibly appending streams to it.
+ *
+ * A WIM looks like (or is supposed to look like) the following:
+ *
+ *                   Header (212 bytes)
+ *                   Streams and metadata resources (variable size)
+ *                   Lookup table (variable size)
+ *                   XML data (variable size)
+ *                   Integrity table (optional) (variable size)
+ *
+ * If we are not adding any streams or metadata resources, the lookup table is
+ * unchanged--- so we only need to overwrite the XML data, integrity table, and
+ * header.  This operation is potentially unsafe if the program is abruptly
+ * terminated while the XML data or integrity table are being overwritten, but
+ * before the new header has been written.  To partially alleviate this problem,
+ * a special flag (WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) is passed to
+ * finish_write() to cause a temporary WIM header to be written after the XML
+ * data has been written.  This may prevent the WIM from becoming corrupted if
+ * the program is terminated while the integrity table is being calculated (but
+ * no guarantees, due to write re-ordering...).
+ *
+ * If we are adding new streams or images (metadata resources), the lookup table
+ * needs to be changed, and those streams need to be written.  In this case, we
+ * try to perform a safe update of the WIM file by writing the streams *after*
+ * the end of the previous WIM, then writing the new lookup table, XML data, and
+ * (optionally) integrity table following the new streams.  This will produce a
+ * layout like the following:
+ *
+ *                   Header (212 bytes)
+ *                   (OLD) Streams and metadata resources (variable size)
+ *                   (OLD) Lookup table (variable size)
+ *                   (OLD) XML data (variable size)
+ *                   (OLD) Integrity table (optional) (variable size)
+ *                   (NEW) Streams and metadata resources (variable size)
+ *                   (NEW) Lookup table (variable size)
+ *                   (NEW) XML data (variable size)
+ *                   (NEW) Integrity table (optional) (variable size)
+ *
+ * At all points, the WIM is valid as nothing points to the new data yet.  Then,
+ * the header is overwritten to point to the new lookup table, XML data, and
+ * integrity table, to produce the following layout:
+ *
+ *                   Header (212 bytes)
+ *                   Streams and metadata resources (variable size)
+ *                   Nothing (variable size)
+ *                   More Streams and metadata resources (variable size)
+ *                   Lookup table (variable size)
+ *                   XML data (variable size)
+ *                   Integrity table (optional) (variable size)
+ *
+ * This method allows an image to be appended to a large WIM very quickly, and
+ * is is crash-safe except in the case of write re-ordering, but the
+ * disadvantage is that a small hole is left in the WIM where the old lookup
+ * table, xml data, and integrity table were.  (These usually only take up a
+ * small amount of space compared to the streams, however.
+ */
 static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
                                 unsigned num_threads,
                                 int modified_image_idx)
@@ -1636,29 +1740,24 @@ static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
        if (ret != 0)
                return ret;
 
-       DEBUG("old_wim_end = %"PRIu64, old_wim_end);
+       if (modified_image_idx == w->hdr.image_count) {
+               /* If no images are modified, a new lookup table does not need
+                * to be written. */
+               wimlib_assert(list_empty(&stream_list));
+               old_wim_end = w->hdr.lookup_table_res_entry.offset +
+                             w->hdr.lookup_table_res_entry.size;
+               write_flags |= WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE |
+                              WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML;
+       }
 
        INIT_LIST_HEAD(&stream_list);
        for_lookup_table_entry(w->lookup_table, find_new_streams,
                               &stream_list);
 
-       {
-               u64 num_new_streams = 0;
-               struct list_head *cur;
-               list_for_each(cur, &stream_list)
-                       num_new_streams++;
-               DEBUG("%"PRIu64" new streams to write", num_new_streams);
-       }
-
-       {
-               bool trunc = false;
-               bool need_readable = false;
-               if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
-                       need_readable = true;
-               ret = open_wim_writable(w, w->filename, trunc, need_readable);
-               if (ret != 0)
-                       return ret;
-       }
+       ret = open_wim_writable(w, w->filename, false,
+                               (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) != 0);
+       if (ret != 0)
+               return ret;
 
        if (fseeko(w->out_fp, old_wim_end, SEEK_SET) != 0) {
                ERROR_WITH_ERRNO("Can't seek to end of WIM");