Support for splitting WIMs
authorEric Biggers <ebiggers3@gmail.com>
Sun, 20 May 2012 04:06:23 +0000 (23:06 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Sun, 20 May 2012 04:06:23 +0000 (23:06 -0500)
Added file split.c:  wimlib_split() function

21 files changed:
configure
configure.ac
doc/Makefile.am
doc/Makefile.in
doc/imagex-export.1.in
doc/imagex-split.1.in [new file with mode: 0644]
doc/imagex.1.in
programs/imagex.c
src/Makefile.am
src/Makefile.in
src/header.c
src/integrity.c
src/join.c
src/lookup_table.h
src/resource.c
src/split.c [new file with mode: 0644]
src/wimlib.h
src/wimlib_internal.h
src/write.c
src/xml.c
src/xml.h

index 4337b09..36c8ae3 100755 (executable)
--- a/configure
+++ b/configure
@@ -11625,7 +11625,7 @@ CC="$lt_save_CC"
 
 ac_config_headers="$ac_config_headers config.h"
 
-ac_config_files="$ac_config_files Makefile doc/Makefile doc/Doxyfile src/Makefile programs/Makefile wimlib.pc doc/imagex.1 doc/imagex-append.1 doc/imagex-apply.1 doc/imagex-capture.1 doc/imagex-delete.1 doc/imagex-dir.1 doc/imagex-export.1 doc/imagex-info.1 doc/imagex-join.1 doc/imagex-mount.1 doc/imagex-mountrw.1 doc/imagex-unmount.1 doc/mkwinpeimg.1 rpm/wimlib.spec archlinux/PKGBUILD"
+ac_config_files="$ac_config_files Makefile doc/Makefile doc/Doxyfile src/Makefile programs/Makefile wimlib.pc doc/imagex.1 doc/imagex-append.1 doc/imagex-apply.1 doc/imagex-capture.1 doc/imagex-delete.1 doc/imagex-dir.1 doc/imagex-export.1 doc/imagex-info.1 doc/imagex-join.1 doc/imagex-mount.1 doc/imagex-mountrw.1 doc/imagex-split.1 doc/imagex-unmount.1 doc/mkwinpeimg.1 rpm/wimlib.spec archlinux/PKGBUILD"
 
 ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
@@ -14742,6 +14742,7 @@ do
     "doc/imagex-join.1") CONFIG_FILES="$CONFIG_FILES doc/imagex-join.1" ;;
     "doc/imagex-mount.1") CONFIG_FILES="$CONFIG_FILES doc/imagex-mount.1" ;;
     "doc/imagex-mountrw.1") CONFIG_FILES="$CONFIG_FILES doc/imagex-mountrw.1" ;;
+    "doc/imagex-split.1") CONFIG_FILES="$CONFIG_FILES doc/imagex-split.1" ;;
     "doc/imagex-unmount.1") CONFIG_FILES="$CONFIG_FILES doc/imagex-unmount.1" ;;
     "doc/mkwinpeimg.1") CONFIG_FILES="$CONFIG_FILES doc/mkwinpeimg.1" ;;
     "rpm/wimlib.spec") CONFIG_FILES="$CONFIG_FILES rpm/wimlib.spec" ;;
index 22e1c88..0e2db09 100644 (file)
@@ -23,6 +23,7 @@ AC_CONFIG_FILES([Makefile
                doc/imagex-join.1
                doc/imagex-mount.1
                doc/imagex-mountrw.1
+               doc/imagex-split.1
                doc/imagex-unmount.1
                doc/mkwinpeimg.1
                rpm/wimlib.spec
index 43583c4..e7c1872 100644 (file)
@@ -9,6 +9,7 @@ man1_MANS =      imagex.1 \
                 imagex-join.1 \
                 imagex-mount.1 \
                 imagex-mountrw.1 \
+                imagex-split.1 \
                 imagex-unmount.1 \
                 mkwinpeimg.1
 
index 040dbc9..657d2df 100644 (file)
@@ -57,8 +57,9 @@ DIST_COMMON = $(srcdir)/Doxyfile.in $(srcdir)/Makefile.am \
        $(srcdir)/imagex-delete.1.in $(srcdir)/imagex-dir.1.in \
        $(srcdir)/imagex-export.1.in $(srcdir)/imagex-info.1.in \
        $(srcdir)/imagex-join.1.in $(srcdir)/imagex-mount.1.in \
-       $(srcdir)/imagex-mountrw.1.in $(srcdir)/imagex-unmount.1.in \
-       $(srcdir)/imagex.1.in $(srcdir)/mkwinpeimg.1.in
+       $(srcdir)/imagex-mountrw.1.in $(srcdir)/imagex-split.1.in \
+       $(srcdir)/imagex-unmount.1.in $(srcdir)/imagex.1.in \
+       $(srcdir)/mkwinpeimg.1.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 am__aclocal_m4_deps = $(top_srcdir)/m4/ax_prog_nasm.m4 \
        $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/libtool.m4 \
@@ -72,7 +73,7 @@ CONFIG_HEADER = $(top_builddir)/config.h
 CONFIG_CLEAN_FILES = Doxyfile imagex.1 imagex-append.1 imagex-apply.1 \
        imagex-capture.1 imagex-delete.1 imagex-dir.1 imagex-export.1 \
        imagex-info.1 imagex-join.1 imagex-mount.1 imagex-mountrw.1 \
-       imagex-unmount.1 mkwinpeimg.1
+       imagex-split.1 imagex-unmount.1 mkwinpeimg.1
 CONFIG_CLEAN_VPATH_FILES =
 SOURCES =
 DIST_SOURCES =
@@ -250,6 +251,7 @@ man1_MANS = imagex.1 \
                 imagex-join.1 \
                 imagex-mount.1 \
                 imagex-mountrw.1 \
+                imagex-split.1 \
                 imagex-unmount.1 \
                 mkwinpeimg.1
 
@@ -310,6 +312,8 @@ imagex-mount.1: $(top_builddir)/config.status $(srcdir)/imagex-mount.1.in
        cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 imagex-mountrw.1: $(top_builddir)/config.status $(srcdir)/imagex-mountrw.1.in
        cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
+imagex-split.1: $(top_builddir)/config.status $(srcdir)/imagex-split.1.in
+       cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 imagex-unmount.1: $(top_builddir)/config.status $(srcdir)/imagex-unmount.1.in
        cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
 mkwinpeimg.1: $(top_builddir)/config.status $(srcdir)/mkwinpeimg.1.in
index 6ec5716..4bedcb9 100644 (file)
@@ -54,6 +54,10 @@ Specifies the compression type for \fIDEST_WIMFILE\fR.  This is only valid if
 is specified but \fITYPE\fR is not, the compression type is taken to be
 "maximum", which is LZX compression.  "fast" compression is XPRESS compression.
 
+.SH NOTES
+
+\fBimagex export\fR does not support split WIMs.
+
 .SH EXAMPLES
 .IP 
 image export boot.wim 2 image2.wim --compress=maximum
@@ -61,6 +65,7 @@ image export boot.wim 2 image2.wim --compress=maximum
 Export the second image of 'boot.wim' to the new WIM file 'image2.wim', and
 change the compression type to maximum, if it wasn't maximum already.
 
+
 .SH SEE ALSO
 .BR imagex (1)
 
diff --git a/doc/imagex-split.1.in b/doc/imagex-split.1.in
new file mode 100644 (file)
index 0000000..270d4bb
--- /dev/null
@@ -0,0 +1,41 @@
+.TH IMAGEX "1" "May 2012" "imagex (wimlib) wimlib @VERSION@" "User Commands"
+.SH NAME
+imagex split \- Split a WIM into multiple parts
+
+.SH SYNOPSIS
+\fBimagex split\fR \fIWIMFILE\fR \fISPLIT_WIMFILE\fR \fIPART_SIZE\fR [\fIOPTION...\fR]
+
+.SH DESCRIPTION
+.PP
+
+Splits \fIWIMFILE\fR into parts with size at most \fIPART_SIZE\fR megabytes,
+with the first part having the name \fISPLIT_WIMFILE\fR and the other parts
+having names numbered in order of the parts.
+
+.SH OPTIONS
+.TP 6
+\fB--check\fR
+When reading \fIWIMFILE\fR, verify its integrity if the integrity table is
+present; additionally, when writing each \fISPLIT_WIMFILE\fR, write an integrity
+table.  If this option is not specified, no integrity tables are included in the
+split WIM files, even if there was one in the original WIM.
+
+.SH EXAMPLES
+.IP 
+imagex split windows.wim windows.swm 100
+.LP
+Splits the WIM 'windows.wim' into 'windows.swm', 'windows2.swm', 'windows3.swm',
+etc. where each part is at most 100 MiB.
+
+.SH NOTE
+
+It it possible for the size of the parts to exceed the \fIPART_SIZE\fR given.
+This is impossible to avoid and Microsoft's program has this problem as well
+because the WIM file format provides no way to divide a single file resource in
+the WIM among multiple split WIM parts.  So if you, for example, have a file
+inside the WIM that is 100 MiB, then an uncompressed split WIM will have at
+least one part that is 100 MiB in size to contain that file.  If the WIM
+resources are compressed then less space would be needed.
+
+.SH SEE ALSO
+.BR imagex (1)
index c65cd2d..d526ff7 100644 (file)
@@ -22,6 +22,8 @@ imagex \- Create, modify, extract, mount, or unmount a WIM (Windows Imaging Form
 .br
 \fBimagex mountrw\fR \fIarguments...\fR
 .br
+\fBimagex split\fR \fIarguments...\fR
+.br
 \fBimagex unmount\fR \fIarguments...\fR
 
 .SH DESCRIPTION
@@ -47,14 +49,6 @@ Mount an image in a WIM read-only (\fBimagex mount\fR)
 .IP \[bu] 2
 Mount an image in a WIM read-write (\fBimagex mountrw\fR)
 .IP \[bu] 2
-LZX decompression and compression
-.IP \[bu] 2
-XPRESS decompression and compression
-.IP \[bu] 2
-Integrity table
-.IP \[bu] 2
-XML data (parsed and written using \fBlibxml\fR(3))
-.IP \[bu] 2
 Create a WIM from a directory (\fBimagex capture\fR)
 .IP \[bu] 2
 Append a directory onto a WIM as a new image (\fBimagex append\fR)
@@ -70,6 +64,15 @@ Change the name or description of an image in the WIM (\fBimagex info\fR)
 Change which image in a WIM is bootable (\fBimagex info\fR)
 .IP \[bu] 2
 Combining split WIMs into one WIM (\fBimage join\fR)
+.IP \[bu] 2
+Splitting a WIM into multiple parts (\fBimage split\fR)
+.IP \[bu] 2
+Support for all WIM compression types, both compression and decompression (LZX,
+XPRESS, and none)
+.IP \[bu] 2
+Integrity table
+.IP \[bu] 2
+XML data (parsed and written using \fBlibxml\fR(3))
 
 .SH UNSUPPORTED FEATURES
 The following features are currently unsupported:
@@ -80,18 +83,18 @@ This does not seem to matter for Windows PE, but this means that you should not
 use this program to image a drive containing Windows Vista/7/8 and expect it to
 be applied with the correct file permissions.
 .IP \[bu] 2
-Split WIMs are not fully supported.  These can be used to split up a WIM to fit
-on multiple CDs, if you can't use a DVD for some reason.  You can use
-\fBimagex join\fR to combine split WIMs, but you cannot yet create split WIMs or
-mount them directly.
+Alternate file streams are unsupported and will be lost when wimlib writes a WIM
+file.  Note that you shouldn't really have these on your Windows system anyway
+because they are unneeded and a security risk.
 .IP \[bu] 2
-The \fB--verify\fR option, for all commands that use it.  Without this option,
-there theoretically could be a SHA1 hash collision between two files, although
-it's very unlikely.
+Directly applying or mounting split WIMs is unsupported.  You have to combine
+them together with \fBimagex join\fR first.
 .IP \[bu] 2
-The \fB--config\fR option, for all commands that use it. 
+The \fB--verify\fR option, for all commands that use it is unsupported.  Without
+this option, there theoretically could be a SHA1 hash collision between two
+files, although it's very unlikely.
 .IP \[bu] 2
-Alternate stream entries
+The \fB--config\fR option, for all commands that use it
 .IP \[bu] 2
 Different versions of the WIM file format (if different versions even exist)
 
@@ -101,15 +104,30 @@ Also see the Doxygen documentation for Wimlib.
 
 See \fBUNSUPPORTED FEATURES\fR.
 
-The \fB/scroll\fR and \fB/log\fR switches from Microsoft's version imagex are
-not planned to be implemented.  Note that to scroll the output in the UNIX shell
-you can just pipe the output into \fBless\fR(1).
+The most important difference is that this version of \fBimagex\fR cannot
+capture and restore Windows images losslessly because file permissions and
+alternate file streams are ignored.  This is because Microsoft designed the WIM
+format to be specific to their NTFS filesystem and the Windows security
+model/API, which is difficult to support in a non-Windows program.  You can
+still create images of Windows PE, however.
+
+See the documentation for each subcommand of \fBimagex\fR; in some cases they do
+not do exactly the same thing as imagex.exe.
 
 Some features, such as the ability to keep files hard-linked when they are
 extracted from a WIM, are not available in Microsoft's version of imagex.
+Also, doesn't seem to be an equivalent of \fBimagex join\fR in Microsoft's
+version; you would have to use \fBimagex.exe /export\fR, but that doesn't let
+you export all images at once.
+
+Microsoft's version has some weird limitations, like it won't let you extract a
+WIM on a shared folder, and it requires some commands to be run only from
+Windows PE and not from regular Windows.  This version does not have these
+unusual limitations, although it won't actually run on Windows anyway.
 
-See the documentation for each command; in some cases they do not do exactly the
-same thing as imagex.exe.
+The \fB/scroll\fR and \fB/log\fR switches from Microsoft's version of imagex
+will not be implemented.  Note that to scroll the output in the UNIX shell you
+can just pipe the output into \fBless\fR(1).
 
 Obviously, this version of imagex is free software but Microsoft's version is
 not.
@@ -117,13 +135,13 @@ not.
 .SH WARNING
 
 Note: \fBwimlib\fR and \fBimagex\fR are experimental.  Use Microsoft's
-imagex.exe if you have to make sure your WIM files are made correctly.  Not all
-features listed under \fBSUPPORTED FEATURES\fR have been thoroughly tested.
+imagex.exe if you have to make sure your WIM files are made "correctly".  Not
+all features listed under \fBSUPPORTED FEATURES\fR have been thoroughly tested.
 Feel free to submit a bug report if you find a bug.
 
 Some parts of the WIM file format are poorly documented or even completely
-undocumented, so these parts had to be reverse engineered for compatibility
-purposes.
+undocumented, so I've just had to do the best I can to read and write WIMs in a
+way that appears to be compatible with Microsoft's software.
 
 .SH REPORTING BUGS
 
@@ -140,5 +158,6 @@ Report bugs to ebiggers3@gmail.com.
 .BR imagex-join (1),
 .BR imagex-mount (1),
 .BR imagex-mountrw (1),
+.BR imagex-split (1),
 .BR imagex-unmount (1),
 
index 66772ae..d6d57d1 100644 (file)
@@ -48,6 +48,7 @@ enum imagex_op_type {
        JOIN,
        MOUNT,
        MOUNTRW,
+       SPLIT,
        UNMOUNT,
 };
 
@@ -100,6 +101,8 @@ static const char *usage_strings[] = {
 [MOUNTRW] = 
 "    imagex mountrw WIMFILE [IMAGE_NUM | IMAGE_NAME] DIRECTORY\n"
 "        [--check] [--debug]\n",
+[SPLIT] = 
+"    imagex split WIMFILE SPLIT_WIMFILE PART_SIZE [--check]\n",
 [UNMOUNT] = 
 "    imagex unmount DIRECTORY [--commit] [--check]\n",
 };
@@ -162,7 +165,6 @@ static const struct option info_options[] = {
 
 static const struct option join_options[] = {
        {"check", no_argument, NULL, 'c'},
-       {"output", required_argument, NULL, 'o'},
        {NULL, 0, NULL, 0},
 };
 
@@ -172,6 +174,11 @@ static const struct option mount_options[] = {
        {NULL, 0, NULL, 0},
 };
 
+static const struct option split_options[] = {
+       {"check", no_argument, NULL, 'c'},
+       {NULL, 0, NULL, 0},
+};
+
 static const struct option unmount_options[] = {
        {"commit", no_argument, NULL, 'c'},
        {"check", no_argument, NULL, 'C'},
@@ -1018,8 +1025,8 @@ static int imagex_join(int argc, const char **argv)
        argc -= optind;
        argv += optind;
 
-       if (argc < 3) {
-               imagex_error("Must specify at least two split WIM "
+       if (argc < 2) {
+               imagex_error("Must specify at least one split WIM "
                                "(.swm) parts to join!\n");
                goto err;
        }
@@ -1103,6 +1110,34 @@ done:
        return ret;
 }
 
+/* Split a WIM into a spanned set */
+static int imagex_split(int argc, const char **argv)
+{
+       int c;
+       int flags = WIMLIB_OPEN_FLAG_SHOW_PROGRESS;
+       unsigned long part_size;
+
+       for_opt(c, split_options) {
+               switch (c) {
+               case 'c':
+                       flags |= WIMLIB_OPEN_FLAG_CHECK_INTEGRITY;
+                       break;
+               default:
+                       usage(SPLIT);
+                       return -1;
+               }
+       }
+       argc -= optind;
+       argv += optind;
+
+       if (argc != 3) {
+               usage(SPLIT);
+               return -1;
+       }
+       part_size = strtoul(argv[2], NULL, 10) * (1 << 20);
+       return wimlib_split(argv[0], argv[1], part_size, flags);
+}
+
 /* Unmounts an image. */
 static int imagex_unmount(int argc, const char **argv)
 {
@@ -1153,6 +1188,7 @@ static struct imagex_command imagex_commands[] = {
        {"join",    imagex_join,           JOIN},
        {"mount",   imagex_mount_rw_or_ro, MOUNT},
        {"mountrw", imagex_mount_rw_or_ro, MOUNTRW},
+       {"split",   imagex_split,          SPLIT},
        {"unmount", imagex_unmount,        UNMOUNT},
 };
 
index 547892e..28471ef 100644 (file)
@@ -38,6 +38,7 @@ other_srcs = \
        resource.c \
        sha1.c \
        sha1.h \
+       split.c \
        timestamp.h \
        util.c \
        util.h \
index b1bb54c..896fbbd 100644 (file)
@@ -103,8 +103,8 @@ libwim_la_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \
 am__objects_1 = comp.lo decomp.lo huffman.lo lz.lo lzx-common.lo \
        lzx-comp.lo lzx-decomp.lo xpress-comp.lo xpress-decomp.lo
 am__objects_2 = dentry.lo extract.lo header.lo integrity.lo join.lo \
-       lookup_table.lo modify.lo mount.lo resource.lo sha1.lo util.lo \
-       wim.lo write.lo xml.lo
+       lookup_table.lo modify.lo mount.lo resource.lo sha1.lo \
+       split.lo util.lo wim.lo write.lo xml.lo
 am_libwim_la_OBJECTS = $(am__objects_1) $(am__objects_2)
 libwim_la_OBJECTS = $(am_libwim_la_OBJECTS)
 DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
@@ -295,6 +295,7 @@ other_srcs = \
        resource.c \
        sha1.c \
        sha1.h \
+       split.c \
        timestamp.h \
        util.c \
        util.h \
@@ -408,6 +409,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mount.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/resource.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/split.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/util.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/wim.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/write.Plo@am__quote@
index c7bad0f..6f9668a 100644 (file)
@@ -165,10 +165,14 @@ int write_header(const struct wim_header *hdr, FILE *out)
        p = put_u32(p, hdr->flags);
        p = put_u32(p, (hdr->flags & WIM_HDR_FLAG_COMPRESSION) ? 
                                WIM_CHUNK_SIZE : 0);
-       randomize_byte_array(p, WIM_GID_LEN);
-       p += WIM_GID_LEN;
-       p = put_u16(p, 1); /* part number */
-       p = put_u16(p, 1); /* total parts */
+       /* byte 24 */
+
+       p = put_bytes(p, WIM_GID_LEN, hdr->guid);
+       p = put_u16(p, hdr->part_number);
+
+       /* byte 40 */
+
+       p = put_u16(p, hdr->total_parts);
        p = put_u32(p, hdr->image_count);
        p = put_resource_entry(p, &hdr->lookup_table_res_entry);
        p = put_resource_entry(p, &hdr->xml_res_entry);
@@ -205,6 +209,8 @@ int init_header(struct wim_header *hdr, int ctype)
                ERROR("Invalid compression type specified (%d)!\n", ctype);
                return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
        }
+       hdr->total_parts = 1;
+       hdr->part_number = 1;
        randomize_byte_array(hdr->guid, sizeof(hdr->guid));
        return 0;
 }
index 38a12ff..ce05906 100644 (file)
@@ -249,7 +249,7 @@ int write_integrity_table(FILE *out, u64 end_header_offset,
        u32   integrity_table_size;
        int   ret;
 
-       DEBUG("Writing integrity table.\n");
+       DEBUG("Writing integrity table\n");
        if (fseeko(out, end_header_offset, SEEK_SET) != 0) {
                ERROR("Failed to seek to byte %"PRIu64" of WIM "
                                "to calculate integrity data: %m\n",
@@ -301,6 +301,7 @@ int write_integrity_table(FILE *out, u64 end_header_offset,
                        fflush(stdout);
                }
 
+
                size_t bytes_to_read = min(INTEGRITY_CHUNK_SIZE, bytes_remaining);
                size_t bytes_read = fread(chunk_buf, 1, bytes_to_read, out);
                if (bytes_read != bytes_to_read) {
@@ -319,7 +320,9 @@ int write_integrity_table(FILE *out, u64 end_header_offset,
                bytes_remaining -= bytes_read;
        }
        if (show_progress)
-               putchar('\n');
+               puts("Calculating integrity checksums for WIM "
+                               "(0 bytes remaining, 100% done)"
+                               "                       ");
 
        if (fseeko(out, 0, SEEK_END) != 0) {
                ERROR("Failed to seek to end of WIM to write integrity "
index 1326804..b882777 100644 (file)
@@ -3,7 +3,6 @@
  *
  * Join split WIMs (sometimes named as .swm files) together into one WIM.
  *
- * Copyright (C) 2010 Carl Thijssen
  * Copyright (C) 2012 Eric Biggers
  *
  * wimlib - Library for working with WIM files 
 #include "lookup_table.h"
 #include "xml.h"
 
-static int join_resource(struct lookup_table_entry *lte, void *split_wim)
-{
-       FILE *split_wim_fp = ((WIMStruct*)split_wim)->fp;
-       FILE *joined_wim_fp = ((WIMStruct*)split_wim)->out_fp;
-       int ret;
-
-       u64 size = lte->resource_entry.size;
-       u64 offset = lte->resource_entry.offset;
-       off_t new_offset = ftello(joined_wim_fp);
-
-       if (new_offset == -1)
-               return WIMLIB_ERR_WRITE;
-
-       ret = copy_between_files(split_wim_fp, offset, joined_wim_fp, size);
-       if (ret != 0)
-               return ret;
-
-       memcpy(&lte->output_resource_entry, &lte->resource_entry, 
-                       sizeof(struct resource_entry));
-
-       lte->output_resource_entry.offset = new_offset;
-       lte->out_refcnt = lte->refcnt;
-       lte->part_number = 1;
-       return 0;
-}
-
 static int join_wims(WIMStruct **swms, uint num_swms, WIMStruct *joined_wim,
                     int write_flags)
 {
@@ -60,21 +33,40 @@ static int join_wims(WIMStruct **swms, uint num_swms, WIMStruct *joined_wim,
        FILE *out_fp = joined_wim->out_fp;
        u64 total_bytes = wim_info_get_total_bytes(swms[0]->wim_info);
 
-       /* The following loop writes both file resources and metadata resources
-        * because it loops over the lookup table entries rather than the dentry
-        * tree for the images */
+       swms[0]->write_metadata = false;
        for (i = 0; i < num_swms; i++) {
                if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
                        off_t cur_offset = ftello(out_fp);
                        printf("Writing resources from part %u of %u "
-                                       "(%"PRIu64" of %"PRIu64" bytes, %.2f%% done)\n",
+                                       "(%"PRIu64" of %"PRIu64" bytes, %.0f%% done)\n",
                                        i + 1, num_swms,
                                        cur_offset, total_bytes,
                                        (double)cur_offset / total_bytes * 100.0);
                }
+               swms[i]->fp = fopen(swms[i]->filename, "rb");
+               if (!swms[i]->fp) {
+                       ERROR("Failed to reopen `%s': %m\n", swms[i]->filename);
+                       return WIMLIB_ERR_OPEN;
+               }
                swms[i]->out_fp = out_fp;
+               swms[i]->hdr.part_number = 1;
                ret = for_lookup_table_entry(swms[i]->lookup_table, 
-                                            join_resource, swms[i]);
+                                            copy_resource, swms[i]);
+               if (ret != 0)
+                       return ret;
+               if (i != 0) {
+                       fclose(swms[i]->fp);
+                       swms[i]->fp = NULL;
+               }
+       }
+       swms[0]->write_metadata = true;
+       if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
+               printf("Writing %d metadata resources\n", 
+                       swms[0]->hdr.image_count);
+
+       for (i = 0; i < swms[0]->hdr.image_count; i++) {
+               ret = copy_resource(swms[0]->image_metadata[i].lookup_table_entry, 
+                                   swms[0]);
                if (ret != 0)
                        return ret;
        }
@@ -101,11 +93,12 @@ static int join_wims(WIMStruct **swms, uint num_swms, WIMStruct *joined_wim,
        swms[0]->hdr.lookup_table_res_entry.size = 
                                        xml_data_offset - lookup_table_offset;
 
-       swms[0]->hdr.flags &= ~WIM_HDR_FLAG_SPANNED;
 
        /* finish_write is called on the first swm, not the joined_wim, because
         * the first swm is the one that has the image metadata and XML data
         * attached to it.  */
+       swms[0]->hdr.flags &= ~WIM_HDR_FLAG_SPANNED;
+       swms[0]->hdr.total_parts = 1;
        return finish_write(swms[0], WIM_ALL_IMAGES, write_flags, 0);
 }
 
@@ -133,6 +126,11 @@ WIMLIBAPI int wimlib_join(const char **swm_names, int num_swms,
                if (ret != 0)
                        goto err;
 
+               /* don't open all the parts at the same time, in case there are
+                * a lot af them */
+               fclose(w->fp);
+               w->fp = NULL;
+
                if (i == 0) {
                        ctype = wimlib_get_compression_type(w);
                        guid = w->hdr.guid;
index b294d4f..5620e67 100644 (file)
@@ -47,6 +47,7 @@ struct lookup_table_entry {
        union {
                char *file_on_disk;
                char *staging_file_name;
+               struct lookup_table_entry *next_lte_in_swm;
        };
 
        union {
index 600bf5b..e315d33 100644 (file)
  */
 
 #include "wimlib_internal.h"
+#include "lookup_table.h"
 #include "io.h"
 #include "lzx.h"
 #include "xpress.h"
+#include "sha1.h"
 #include "dentry.h"
 #include <unistd.h>
 #include <errno.h>
 
+/* Used for buffering FILE IO */
+#define BUFFER_SIZE 4096
+
 /* 
  * Reads all or part of a compressed resource into an in-memory buffer.
  *
@@ -362,6 +367,7 @@ int read_uncompressed_resource(FILE *fp, u64 offset, u64 len,
        return 0;
 }
 
+
 /* 
  * Reads a WIM resource.
  *
@@ -462,6 +468,47 @@ int extract_resource_to_fd(WIMStruct *w, const struct resource_entry *entry,
        return ret;
 }
 
+/* 
+ * Copies the file resource specified by the lookup table entry @lte from the
+ * input WIM, pointed to by the fp field of the WIMStruct, to the output WIM,
+ * pointed to by the out_fp field of the WIMStruct.
+ *
+ * The output_resource_entry, out_refcnt, and part_number fields of @lte are
+ * updated.
+ *
+ * Metadata resources are not copied (they are handled elsewhere for joining and
+ * splitting).
+ */
+int copy_resource(struct lookup_table_entry *lte, void *w)
+{
+       if ((lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) &&
+           !((WIMStruct*)w)->write_metadata) {
+               return 0;
+       }
+
+       FILE *in_fp = ((WIMStruct*)w)->fp;
+       FILE *out_fp = ((WIMStruct*)w)->out_fp;
+       int ret;
+       u64 size = lte->resource_entry.size;
+       u64 offset = lte->resource_entry.offset;
+       off_t new_offset = ftello(out_fp);
+
+       if (new_offset == -1)
+               return WIMLIB_ERR_WRITE;
+
+       ret = copy_between_files(in_fp, offset, out_fp, size);
+       if (ret != 0)
+               return ret;
+
+       memcpy(&lte->output_resource_entry, &lte->resource_entry, 
+                       sizeof(struct resource_entry));
+
+       lte->output_resource_entry.offset = new_offset;
+       lte->out_refcnt = lte->refcnt;
+       lte->part_number = ((WIMStruct*)w)->hdr.part_number;
+       return 0;
+}
+
 /* Reads the contents of a struct resource_entry, as represented in the on-disk
  * format, from the memory pointed to by @p, and fills in the fields of @entry.
  * A pointer to the byte after the memory read at @p is returned. */
@@ -505,6 +552,351 @@ int resource_compression_type(int wim_ctype, int reshdr_flags)
        }
 }
 
+
+
+/*
+ * Copies bytes between two file streams.
+ *
+ * Copies @len bytes from @in to @out, at the current position in @out, and at
+ * an offset of @in_offset in @in.
+ */
+int copy_between_files(FILE *in, off_t in_offset, FILE *out, size_t len)
+{
+       u8 buf[BUFFER_SIZE];
+       size_t n;
+
+       if (fseeko(in, in_offset, SEEK_SET) != 0) {
+               ERROR("Failed to seek to byte %"PRIu64" of input file: %m\n",
+                               in_offset);
+               return WIMLIB_ERR_READ;
+       }
+       /* To reduce memory usage and improve speed, read and write BUFFER_SIZE
+        * bytes at a time. */
+       while (len != 0) {
+               n = min(len, BUFFER_SIZE);
+               if (fread(buf, 1, n, in) != n) {
+                       if (feof(in)) {
+                               ERROR("Unexpected EOF when copying data "
+                                               "between files\n");
+                       } else {
+                               ERROR("Error copying data between files: %m\n");
+                       }
+                       return WIMLIB_ERR_READ;
+               }
+
+               if (fwrite(buf, 1, n, out) != n) {
+                       ERROR("Error copying data between files: %m\n");
+                       return WIMLIB_ERR_WRITE;
+               }
+               len -= n;
+       }
+       return 0;
+}
+
+
+/* 
+ * Uncompresses a WIM file resource and writes it uncompressed to a file stream.
+ *
+ * @in:                    The file stream that contains the file resource.
+ * @size:           The size of the resource in the input file.
+ * @original_size:  The original (uncompressed) size of the resource. 
+ * @offset:        The offset of the start of the resource in @in.
+ * @input_ctype:    The compression type of the resource in @in.
+ * @out:           The file stream to write the file resource to.
+ */
+static int uncompress_resource(FILE *in, u64 size, u64 original_size,
+                              off_t offset, int input_ctype, FILE *out)
+{
+       int ret;
+       u8 buf[WIM_CHUNK_SIZE];
+       /* Determine how many compressed chunks the file is divided into. */
+       u64 num_chunks;
+       u64 i;
+       u64 uncompressed_offset;
+       u64 uncompressed_chunk_size;
+       
+       num_chunks = (original_size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
+
+       for (i = 0; i < num_chunks; i++) {
+
+               uncompressed_offset = i * WIM_CHUNK_SIZE;
+               uncompressed_chunk_size = min(WIM_CHUNK_SIZE, 
+                                       original_size - uncompressed_offset);
+
+               ret = read_resource(in, size, original_size, offset, input_ctype, 
+                                       uncompressed_chunk_size, 
+                                       uncompressed_offset, buf);
+               if (ret != 0)
+                       return ret;
+
+               if (fwrite(buf, 1, uncompressed_chunk_size, out) != 
+                                               uncompressed_chunk_size) {
+                       ERROR("Failed to write file resource: %m\n");
+                       return WIMLIB_ERR_WRITE;
+               }
+       }
+       return 0;
+}
+
+/* 
+ * Transfers a file resource between two files, writing it compressed.  The file
+ * resource in the input file may be either compressed or uncompressed.
+ * Alternatively, the input resource may be in-memory, but it must be
+ * uncompressed.
+ *
+ * @in:                    The file stream that contains the file resource.  Ignored
+ *                     if uncompressed_resource != NULL.
+ * @uncompressed_resource:     If this pointer is not NULL, it points to an
+ *                                     array of @original_size bytes that are
+ *                                     the uncompressed input resource.
+ * @size:           The size of the resource in the input file.
+ * @original_size:  The original (uncompressed) size of the resource. 
+ * @offset:        The offset of the start of the resource in @in.  Ignored
+ *                     if uncompressed_resource != NULL.
+ * @input_ctype:    The compression type of the resource in @in.  Ignored if
+ *                     uncompressed_resource != NULL.
+ * @out:           The file stream to write the file resource to.
+ * @output_type:    The compression type to use when writing the resource to
+ *                     @out.
+ * @new_size_ret:   A location into which the new compressed size of the file
+ *                     resource in returned.
+ */
+static int recompress_resource(FILE *in, const u8 *uncompressed_resource, 
+                                       u64 size, u64 original_size,
+                                       off_t offset, int input_ctype, FILE *out,
+                                       int output_ctype, u64 *new_size_ret)
+{
+       int ret;
+       int (*compress)(const void *, uint, void *, uint *);
+       if (output_ctype == WIM_COMPRESSION_TYPE_LZX)
+               compress = lzx_compress;
+       else
+               compress = xpress_compress;
+
+       u8 uncompressed_buf[WIM_CHUNK_SIZE];
+       u8 compressed_buf[WIM_CHUNK_SIZE - 1];
+
+       /* Determine how many compressed chunks the file needs to be divided
+        * into. */
+       u64 num_chunks = (original_size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
+
+       u64 num_chunk_entries = num_chunks - 1;
+
+       /* Size of the chunk entries--- 8 bytes for files over 4GB, otherwise 4
+        * bytes */
+       uint chunk_entry_size = (original_size >= (u64)1 << 32) ?  8 : 4;
+
+       /* Array in which to construct the chunk offset table. */
+       u64 chunk_offsets[num_chunk_entries];
+
+       /* Offset of the start of the chunk table in the output file. */
+       off_t chunk_tab_offset = ftello(out);
+
+       /* Total size of the chunk table (as written to the file) */
+       u64 chunk_tab_size = chunk_entry_size * num_chunk_entries;
+
+       /* Reserve space for the chunk table. */
+       if (fwrite(chunk_offsets, 1, chunk_tab_size, out) != chunk_tab_size) {
+               ERROR("Failed to write chunk offset table: %m\n");
+               return WIMLIB_ERR_WRITE;
+       }
+
+       /* Read each chunk of the file, compress it, write it to the output
+        * file, and update th chunk offset table. */
+       u64 cur_chunk_offset = 0;
+       for (u64 i = 0; i < num_chunks; i++) {
+
+               u64 uncompressed_offset = i * WIM_CHUNK_SIZE;
+               u64 uncompressed_chunk_size = min(WIM_CHUNK_SIZE, 
+                                       original_size - uncompressed_offset);
+
+               const u8 *uncompressed_p;
+               if (uncompressed_resource != NULL) {
+                       uncompressed_p = uncompressed_resource + 
+                                                       uncompressed_offset;
+
+               } else {
+                       /* Read chunk i of the file into uncompressed_buf. */
+                       ret = read_resource(in, size, original_size, offset, input_ctype, 
+                                               uncompressed_chunk_size, 
+                                               uncompressed_offset, 
+                                               uncompressed_buf);
+                       if (ret != 0)
+                               return ret;
+                       uncompressed_p = uncompressed_buf;
+               }
+
+               if (i != 0)
+                       chunk_offsets[i - 1] = cur_chunk_offset;
+
+               uint compressed_len;
+
+               ret = compress(uncompressed_p, uncompressed_chunk_size, 
+                              compressed_buf, &compressed_len);
+
+               /* if compress() returned nonzero, the compressed chunk would
+                * have been at least as large as the uncompressed chunk.  In
+                * this situation, the WIM format requires that the uncompressed
+                * chunk be written instead. */
+               const u8 *buf_to_write;
+               uint len_to_write;
+               if (ret == 0) {
+                       buf_to_write = compressed_buf;
+                       len_to_write = compressed_len;
+               } else {
+                       buf_to_write = uncompressed_p;
+                       len_to_write = uncompressed_chunk_size;
+               }
+
+               if (fwrite(buf_to_write, 1, len_to_write, out) != len_to_write) {
+                       ERROR("Failed to write compressed file resource: %m\n");
+                       return WIMLIB_ERR_WRITE;
+               }
+               cur_chunk_offset += len_to_write;
+       }
+
+       /* The chunk offset after the last chunk, plus the size of the chunk
+        * table, gives the total compressed size of the resource. */
+       *new_size_ret = cur_chunk_offset + chunk_tab_size;
+
+       /* Now that all entries of the chunk table are determined, rewind the
+        * stream to where the chunk table was, and write it back out. */
+
+       if (fseeko(out, chunk_tab_offset, SEEK_SET) != 0) {
+               ERROR("Failed to seek to beginning of chunk table: %m\n");
+               return WIMLIB_ERR_READ;
+       }
+
+       if (chunk_entry_size == 8) {
+               array_to_le64(chunk_offsets, num_chunk_entries);
+
+               if (fwrite(chunk_offsets, 1, chunk_tab_size, out) != 
+                               chunk_tab_size) {
+                       ERROR("Failed to write chunk table: %m\n");
+                       return WIMLIB_ERR_WRITE;
+               }
+       } else {
+               u32 chunk_entries_small[num_chunk_entries];
+               for (u64 i = 0; i < num_chunk_entries; i++)
+                       chunk_entries_small[i] = to_le32(chunk_offsets[i]);
+               if (fwrite(chunk_entries_small, 1, chunk_tab_size, out) != 
+                               chunk_tab_size) {
+                       ERROR("Failed to write chunk table: %m\n");
+                       return WIMLIB_ERR_WRITE;
+               }
+       }
+
+       if (fseeko(out, 0, SEEK_END) != 0) {
+               ERROR("Failed to seek to end of output file: %m\n");
+               return WIMLIB_ERR_WRITE;
+       }
+
+       return 0;
+}
+
+int write_resource_from_memory(const u8 resource[], int out_ctype,
+                              u64 resource_original_size, FILE *out,
+                              u64 *resource_size_ret)
+{
+       if (out_ctype == WIM_COMPRESSION_TYPE_NONE) {
+               if (fwrite(resource, 1, resource_original_size, out) != 
+                                       resource_original_size) {
+                       ERROR("Failed to write resource of length "
+                                       "%"PRIu64": %m\n", 
+                                       resource_original_size);
+                       return WIMLIB_ERR_WRITE;
+               }
+               *resource_size_ret = resource_original_size;
+               return 0;
+       } else {
+               return recompress_resource(NULL, resource, resource_original_size,
+                               resource_original_size, 0, 0, out, out_ctype, 
+                                                       resource_size_ret);
+       }
+}
+
+
+/* 
+ * Transfers a file resource from a FILE* opened for reading to a FILE* opened
+ * for writing, possibly changing the compression type. 
+ *
+ * @in:                        The FILE* that contains the file resource.
+ * @size:              The (compressed) size of the file resource.
+ * @original_size:     The uncompressed size of the file resource.
+ * @offset:            The offset of the file resource in the input file.
+ * @input_ctype:       The compression type of the file resource in the input
+ *                             file.
+ * @out:               The FILE* for the output file.  The file resource is 
+ *                             written at the current position of @out.
+ * @output_ctype:      The compression type to which the file resource will be
+ *                             converted.
+ * @output_res_entry:  A pointer to a resource entry that, upon successful
+ *                             return of this function,  will have the size,
+ *                             original size, offset, and flags fields filled
+ *                             in for the file resource written to the output
+ *                             file.
+ */
+static int transfer_file_resource(FILE *in, u64 size, u64 original_size, 
+                                 off_t offset, int input_ctype, FILE *out, 
+                                 int output_ctype, 
+                                 struct resource_entry *output_res_entry)
+{
+       int ret;
+
+       /* Handle zero-length files */
+       if (original_size == 0) {
+               memset(output_res_entry, 0, sizeof(*output_res_entry));
+               return 0;
+       }
+
+       /* Get current offset in the output file. */
+       output_res_entry->offset = ftello(out);
+       if (output_res_entry->offset == -1) {
+               ERROR("Failed to get output position: %m\n");
+               return WIMLIB_ERR_WRITE;
+       }
+
+       if (output_ctype == input_ctype) {
+               /* The same compression types; simply copy the resource. */
+
+               ret = copy_between_files(in, offset, out, size);
+               if (ret != 0)
+                       return ret;
+               output_res_entry->size = size;
+       } else {
+               /* Different compression types. */
+
+               if (output_ctype == WIM_COMPRESSION_TYPE_NONE) {
+                       /* Uncompress a compressed file resource */
+                       ret = uncompress_resource(in, size,
+                                               original_size, offset, 
+                                               input_ctype, out);
+                       if (ret != 0)
+                               return ret;
+                       output_res_entry->size = original_size;
+               } else {
+                       u64 new_size;
+                       /* Compress an uncompressed file resource, or compress a
+                        * compressed file resource using a different
+                        * compression type */
+                       ret = recompress_resource(in, NULL, size, original_size,
+                                               offset, input_ctype, out, 
+                                               output_ctype, &new_size);
+                       if (ret != 0)
+                               return ret;
+                       output_res_entry->size = new_size;
+               }
+
+       }
+
+       output_res_entry->original_size = original_size;
+       if (output_ctype == WIM_COMPRESSION_TYPE_NONE)
+               output_res_entry->flags = 0;
+       else
+               output_res_entry->flags = WIM_RESHDR_FLAG_COMPRESSED;
+       return 0;
+}
+
 /* 
  * Reads the metadata metadata resource from the WIM file.  The metadata
  * resource consists of the security data, followed by the directory entry for
@@ -614,4 +1006,178 @@ err1:
        return ret;
 }
 
+/* Write the metadata resource for the current image. */
+int write_metadata_resource(WIMStruct *w)
+{
+       FILE *out;
+       u8 *buf;
+       u8 *p;
+       int ret;
+       off_t subdir_offset;
+       struct dentry *root;
+       struct lookup_table_entry *lte;
+       struct resource_entry *res_entry;
+       off_t metadata_offset;
+       u64 metadata_original_size;
+       u64 metadata_compressed_size;
+       int metadata_ctype;
+       u8  hash[WIM_HASH_SIZE];
+
+       DEBUG("Writing metadata resource for image %u\n", w->current_image);
+
+       out = w->out_fp;
+       root = wim_root_dentry(w);
+       metadata_ctype = wimlib_get_compression_type(w);
+       metadata_offset = ftello(out);
+       if (metadata_offset == -1)
+               return WIMLIB_ERR_WRITE;
+
+       subdir_offset = 8 + root->length + 8;
+       calculate_subdir_offsets(root, &subdir_offset);
+       metadata_original_size = subdir_offset;
+       buf = MALLOC(metadata_original_size);
+       if (!buf) {
+               ERROR("Failed to allocate %"PRIu64" bytes for "
+                               "metadata resource\n", metadata_original_size);
+               return WIMLIB_ERR_NOMEM;
+       }
+       p = buf;
+       #if 0
+       /* Write the security data. */
+       p = write_security_data(wim_security_data(w), p);
+       #else
+       p = put_u32(p, 8); /* Total length of security data. */
+       p = put_u32(p, 0); /* Number of security data entries. */
+       #endif
+
+       DEBUG("Writing dentry tree.\n");
+       p = write_dentry_tree(root, p);
+
+       /* Like file resources, the lookup table entry for a metadata resource
+        * uses for the hash code a SHA1 message digest of its uncompressed
+        * contents. */
+       sha1_buffer(buf, metadata_original_size, hash);
+
+       ret = write_resource_from_memory(buf, 
+                                        metadata_ctype,
+                                        metadata_original_size, 
+                                        out,
+                                        &metadata_compressed_size);
+       FREE(buf);
+       if (ret != 0)
+               return ret;
+
+       /* Update the lookup table entry, including the hash and output resource
+        * entry fields, for this image's metadata resource.  */
+       lte = wim_metadata_lookup_table_entry(w);
+       res_entry = &lte->output_resource_entry;
+       lte->out_refcnt++;
+       if (memcmp(hash, lte->hash, WIM_HASH_SIZE) != 0) {
+               lookup_table_unlink(w->lookup_table, lte);
+               memcpy(lte->hash, hash, WIM_HASH_SIZE);
+               lookup_table_insert(w->lookup_table, lte);
+       }
+       res_entry->original_size = metadata_original_size;
+       res_entry->offset        = metadata_offset;
+       res_entry->size          = metadata_compressed_size;
+       res_entry->flags         = WIM_RESHDR_FLAG_METADATA;
+       if (metadata_ctype != WIM_COMPRESSION_TYPE_NONE)
+               res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
+       return 0;
+}
+
+/* 
+ * Writes a file resource to the output file. 
+ *
+ * @dentry:  The dentry for the file resource.
+ * @wim_p:  A pointer to the WIMStruct.  The fields of interest to this
+ *     function are the input and output file streams and the lookup table. 
+ * @return zero on success, nonzero on failure. 
+ */
+int write_file_resource(struct dentry *dentry, void *wim_p)
+{
+       WIMStruct *w;
+       FILE *out;
+       FILE *in;
+       struct lookup_table_entry *lte;
+       int in_wim_ctype;
+       int out_wim_ctype;
+       int input_res_ctype;
+       struct resource_entry *input_res_entry;
+       struct resource_entry *output_res_entry;
+       u64 len;
+       int ret;
+
+       w = wim_p;
+       out = w->out_fp;
+
+       /* Directories don't need file resources. */
+       if (dentry_is_directory(dentry))
+               return 0;
+
+       /* Get the lookup entry for the file resource. */
+       lte = wim_lookup_resource(w, dentry);
+       if (!lte)
+               return 0;
+
+       /* No need to write file resources twice.  (This indicates file
+        * resources that are part of a hard link set.) */
+       if (++lte->out_refcnt != 1)
+               return 0;
+
+       out_wim_ctype = wimlib_get_compression_type(w);
+       output_res_entry = &lte->output_resource_entry;
+
+       /* Figure out if we can read the resource from the WIM file, or
+        * if we have to read it from the filesystem outside. */
+       if (lte->file_on_disk) {
+
+               /* Read from disk (uncompressed) */
+
+               len = lte->resource_entry.original_size;
 
+               in = fopen(lte->file_on_disk, "rb");
+               if (!in) {
+                       ERROR("Failed to open the file `%s': %m\n",
+                                       lte->file_on_disk);
+                       return WIMLIB_ERR_OPEN;
+               }
+
+               if (w->verbose)
+                       puts(lte->file_on_disk);
+
+               ret = transfer_file_resource(in, len, len, 0,
+                                            WIM_COMPRESSION_TYPE_NONE, out, 
+                                            out_wim_ctype, output_res_entry);
+               fclose(in);
+       } else {
+
+               /* Read from input WIM (possibly compressed) */
+
+               /* It may be a different WIM file, in the case of
+                * exporting images from one WIM file to another */
+               if (lte->other_wim_fp) {
+                       /* Different WIM file. */
+                       in = lte->other_wim_fp;
+                       in_wim_ctype = lte->other_wim_ctype;
+               } else {
+                       /* Same WIM file. */
+                       in = w->fp;
+                       in_wim_ctype = out_wim_ctype;
+               }
+               input_res_entry = &lte->resource_entry;
+               input_res_ctype = resource_compression_type(
+                                       in_wim_ctype, 
+                                       input_res_entry->flags);
+
+               ret = transfer_file_resource(in, 
+                                       input_res_entry->size,
+                                       input_res_entry->original_size, 
+                                       input_res_entry->offset,
+                                       input_res_ctype, 
+                                       out, 
+                                       out_wim_ctype,
+                                       output_res_entry);
+       }
+       return ret;
+}
diff --git a/src/split.c b/src/split.c
new file mode 100644 (file)
index 0000000..f22780b
--- /dev/null
@@ -0,0 +1,260 @@
+/*
+ * split.c
+ *
+ * Split a WIM file into parts.
+ *
+ * Copyright (C) 2012 Eric Biggers
+ *
+ * wimlib - Library for working with WIM files 
+ *
+ * This library is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option) any
+ * later version.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License along
+ * with this library; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA 
+ */
+
+#include "wimlib_internal.h"
+#include "lookup_table.h"
+#include "xml.h"
+#include "io.h"
+
+struct args {
+       WIMStruct *w;
+       char *swm_base_name;
+       size_t swm_base_name_len;
+       const char *swm_suffix;
+       struct lookup_table_entry *lte_chain_head;
+       struct lookup_table_entry *lte_chain_tail;
+       int    part_number;
+       int    write_flags;
+       long   size_remaining;
+       size_t part_size;
+       u64    total_bytes;
+       u64    total_bytes_written;
+};
+
+static int finish_swm(WIMStruct *w, struct lookup_table_entry *lte_chain_head,
+                     int write_flags)
+{
+       off_t lookup_table_offset = ftello(w->out_fp);
+       int ret;
+
+       DEBUG("Writing lookup table for SWM (offset %"PRIu64")\n", 
+                       lookup_table_offset);
+
+       while (lte_chain_head != NULL) {
+               ret = write_lookup_table_entry(lte_chain_head, w->out_fp);
+               if (ret != 0)
+                       return ret;
+               struct lookup_table_entry *prev = lte_chain_head;
+               lte_chain_head = prev->next_lte_in_swm;
+               prev->next_lte_in_swm = NULL;
+       }
+       off_t xml_data_offset = ftello(w->out_fp);
+
+       if (lookup_table_offset == -1 || xml_data_offset == -1)
+               return WIMLIB_ERR_WRITE;
+       w->hdr.lookup_table_res_entry.offset = lookup_table_offset;
+       w->hdr.lookup_table_res_entry.size = 
+                               xml_data_offset - lookup_table_offset;
+       ret = finish_write(w, WIM_ALL_IMAGES, write_flags, 0);
+       if (ret != 0)
+               return ret;
+
+       ret = fclose(w->out_fp);
+       if (ret != 0)
+               ret = WIMLIB_ERR_WRITE;
+       w->out_fp = NULL;
+       return ret;
+}
+
+static int copy_resource_to_swm(struct lookup_table_entry *lte, void *__args)
+{
+       struct args *args = (struct args*)__args;
+       WIMStruct *w = args->w;
+       FILE *out_fp = w->out_fp;
+       int ret;
+
+       /* metadata resources were already written. */
+       if (lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA)
+               return 0;
+
+       if (args->size_remaining < 0 || 
+                       (u64)args->size_remaining < lte->resource_entry.size) {
+
+               /* No space for this resource.  Finish the previous swm and
+                * start a new one. */
+
+               ret = finish_swm(w, args->lte_chain_head, args->write_flags);
+
+               args->lte_chain_tail = NULL;
+               args->lte_chain_head = NULL;
+
+               sprintf(args->swm_base_name + args->swm_base_name_len, "%d", 
+                       ++args->part_number);
+               strcat(args->swm_base_name, args->swm_suffix);
+
+               w->hdr.part_number = args->part_number;
+
+               if (args->write_flags & WIMLIB_OPEN_FLAG_SHOW_PROGRESS)
+                       printf("Writing `%s' (%"PRIu64" of %"PRIu64" bytes, "
+                                       "%.0f%% done)\n", 
+                               args->swm_base_name, 
+                               args->total_bytes_written,
+                               args->total_bytes,
+                               (double)args->total_bytes_written /
+                                       (double)args->total_bytes * 100.0);
+
+               ret = begin_write(w, args->swm_base_name, args->write_flags);
+               if (ret != 0)
+                       return ret;
+               args->size_remaining = args->part_size;
+       }
+       args->size_remaining -= lte->resource_entry.size;
+       args->total_bytes_written += lte->resource_entry.size;
+       if (args->lte_chain_tail)
+               args->lte_chain_tail->next_lte_in_swm = lte;
+       else
+               args->lte_chain_head = lte;
+       args->lte_chain_tail = lte;
+       return copy_resource(lte, w);
+}
+
+/* Splits the WIM file @wimfile into multiple parts prefixed by @swm_name with
+ * size at most @part_size. */
+WIMLIBAPI int wimlib_split(const char *wimfile, const char *swm_name, 
+                          size_t part_size, int flags)
+{
+       int ret;
+       WIMStruct *w;
+       int write_flags = 0;
+       size_t swm_name_len = strlen(swm_name);
+       size_t swm_base_name_len;
+       char name[swm_name_len + 20];
+       char *swm_suffix;
+
+       struct lookup_table_entry *lte_chain_head = NULL;
+       struct lookup_table_entry *lte_chain_tail = NULL;
+       long size_remaining = part_size;
+       u64 total_bytes_written = 0;
+       u64 total_bytes;
+
+       ret = wimlib_open_wim(wimfile, flags, &w);
+       if (ret != 0)
+               return ret;
+
+       total_bytes = wim_info_get_total_bytes(w->wim_info);
+
+       if (flags & WIMLIB_OPEN_FLAG_CHECK_INTEGRITY)
+               write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY;
+       if (flags & WIMLIB_OPEN_FLAG_SHOW_PROGRESS)
+               write_flags |= WIMLIB_WRITE_FLAG_SHOW_PROGRESS;
+
+       w->hdr.flags |= WIM_HDR_FLAG_SPANNED;
+       w->hdr.boot_idx = 0;
+       randomize_byte_array(w->hdr.guid, WIM_GID_LEN);
+       ret = begin_write(w, swm_name, write_flags);
+       if (ret != 0)
+               return ret;
+
+       swm_suffix = strchr(swm_name, '.');
+       memcpy(name, swm_name, swm_name_len + 1);
+       if (swm_suffix) {
+               swm_base_name_len = swm_suffix - swm_name;
+       } else {
+               swm_base_name_len = swm_name_len;
+               name[sizeof(name) - 1] = '\0';
+               swm_suffix = &name[sizeof(name) - 1];
+       }
+
+       if (write_flags & WIMLIB_OPEN_FLAG_SHOW_PROGRESS)
+               printf("Writing `%s' (%.2f %% done)\n", 
+                       swm_name, 
+                       (double)total_bytes_written /
+                               (double)total_bytes * 100.0);
+
+       w->write_metadata = true;
+       for (int i = 0; i < w->hdr.image_count; i++) {
+
+               struct lookup_table_entry *metadata_lte;
+
+               metadata_lte = w->image_metadata[i].lookup_table_entry;
+               ret = copy_resource(metadata_lte, w);
+               if (ret != 0)
+                       return ret;
+               size_remaining -= metadata_lte->resource_entry.size;
+               total_bytes_written += metadata_lte->resource_entry.size;
+               if (lte_chain_tail)
+                       lte_chain_tail->next_lte_in_swm = metadata_lte;
+               else
+                       lte_chain_head = metadata_lte;
+               lte_chain_tail = metadata_lte;
+       }
+       w->write_metadata = false;
+
+       struct args args = {
+               .w                 = w,
+               .swm_base_name     = name,
+               .swm_base_name_len = swm_base_name_len,
+               .swm_suffix        = swm_suffix,
+               .lte_chain_head    = lte_chain_head,
+               .lte_chain_tail    = lte_chain_tail,
+               .part_number       = 1,
+               .write_flags       = write_flags,
+               .size_remaining    = size_remaining,
+               .part_size         = part_size,
+               .total_bytes        = total_bytes,
+               .total_bytes_written = total_bytes_written,
+       };
+
+       ret = for_lookup_table_entry(w->lookup_table, copy_resource_to_swm, &args);
+       if (ret != 0)
+               return ret;
+
+       ret = finish_swm(w, args.lte_chain_head, write_flags);
+       if (ret != 0)
+               return ret;
+
+
+       /* The swms are all ready now, except the total_parts and part_number
+        * fields in their headers are wrong (we don't know the total parts
+        * until they are all written).  Fix them. */
+       int total_parts = args.part_number;
+       for (int i = 1; i <= total_parts; i++) {
+               const char *p;
+               if (i == 1) {
+                       p = swm_name;
+               } else {
+                       sprintf(name + swm_base_name_len, "%d", i);
+                       p = strcat(name, swm_suffix);
+               }
+
+               FILE *fp = fopen(p, "r+b");
+               if (!fp) {
+                       ERROR("Failed to open `%s': %m\n", p);
+                       return WIMLIB_ERR_OPEN;
+               }
+               char buf[4];
+               put_u16(buf, i);
+               put_u16(buf + 2, total_parts);
+
+               if (fseek(fp, 40, SEEK_SET) != 0 || 
+                               fwrite(buf, 1, sizeof(buf), fp) != sizeof(buf)
+                               || fclose(fp) != 0) {
+                       ERROR("Error overwriting header of `%s': %m\n", name);
+                       return WIMLIB_ERR_WRITE;
+               }
+       }
+       if (write_flags & WIMLIB_OPEN_FLAG_SHOW_PROGRESS)
+               printf("Done!\n");
+       wimlib_free(w);
+       return 0;
+}
index 1bc2397..14a3826 100644 (file)
  *   WIM without security data, including a boot.wim for Windows PE, but <b>do
  *   not expect to be able to use wimlib to image a Windows installation and
  *   preserve file attributes</b>.
- * - There is no way to create split WIMs.
+ * - There is no way to directly extract or mount split WIMs.
  * - There is not yet any code to verify that there are no collisions between
  *   different files that happen to have the same SHA1 message digest.
  *   This is extremely unlikely, but could result in something bad such as a
  *   file going missing.
- * - Alternate stream entries for directory entries are ignored.  I'm not sure
- *   if these are ever used for anything important.
+ * - Alternate stream entries for directory entries are ignored.
  * - Different versions of the WIM file format, if they even exist, are
  *   unsupported.  Let me know if you notice WIM files with a different version.
  * - Chunk sizes other than 32768 are unsupported (except for uncompressed WIMs,
  * split up LZX compressed blocks, which is not yet implemented in wimlib.
  *
  * wimlib is experimental and likely contains bugs; use Microsoft's @a
- * imagex.exe if you want to make sure your WIM files are made correctly.
+ * imagex.exe if you want to make sure your WIM files are made "correctly".
  *
  * \section legal License
  *
@@ -706,7 +705,7 @@ extern bool wimlib_image_name_in_use(const WIMStruct *wim, const char *name);
  *     the parts of the original WIM, there are duplicate parts, or not all the
  *     parts have the same GUID and compression type.
  * @retval ::WIMLIB_ERR_WRITE
- *     An error occurred when trying to write data to to the new WIM at @a output_path.
+ *     An error occurred when trying to write data to the new WIM at @a output_path.
  *
  * Note that this function merely copies the resources, so it will not check to
  * see if the resources, including the metadata resource, are valid or not.
@@ -1206,6 +1205,34 @@ extern void wimlib_set_verbose(WIMStruct *wim, bool verbose);
 extern int wimlib_set_output_dir(WIMStruct *wim, const char *dir);
 
 /**
+ * Splits a WIM into multiple parts.
+ *
+ * @param wimfile
+ *     Name of the WIM file to split.  It must be a standalone, one-part WIM.
+ * @param swm_name
+ *     Name of the SWM file to create.  This will be the name of the first
+ *     part.  The other parts will have the same name with 2, 3, 4, ..., etc.
+ *     appended.
+ * @param part_size
+ *     The maximum size per part.  It is not guaranteed that this will really
+ *     be the maximum size per part, because some file resources in the WIM may
+ *     be larger than this size, and the WIM file format provides no way to
+ *     split up file resources among multiple WIMs.
+ * @param flags
+ *     Bitwise OR of ::WIMLIB_OPEN_FLAG_CHECK_INTEGRITY and/or
+ *     ::WIMLIB_OPEN_FLAG_SHOW_PROGRESS.
+ *
+ * @return 0 on success; nonzero on error.  This function may return any value
+ * returned by wimlib_open_wim() as well as the following error codes:
+ *
+ * @retval ::WIMLIB_ERR_WRITE
+ *     An error occurred when trying to write data to one of the split WIMs.
+ *
+ */
+extern int wimlib_split(const char *wimfile, const char *swm_name, 
+                       size_t part_size, int flags);
+
+/**
  * Unmounts a WIM image that was mounted using wimlib_mount().
  *
  * Blocks until it is known whether the mount succeeded or failed.
index 269f0a5..02b96b3 100644 (file)
@@ -224,23 +224,19 @@ struct image_metadata {
 
 /* The opaque structure exposed to the wimlib API. */
 typedef struct WIMStruct {
-       /* The name of the WIM file that has been opened. */
-       char                *filename;
 
        /* A pointer to the file indicated by @filename, opened for reading. */
        FILE                *fp;
 
-       /* The currently selected image, indexed starting at 1.  If not 0,
-        * subtract 1 from this to get the index of the current image in the
-        * image_metadata array. */
-       int                  current_image;
+       /* FILE pointer for the WIM file that is being written. */
+       FILE  *out_fp;
+
+       /* The name of the WIM file that has been opened. */
+       char                *filename;
 
        /* The lookup table for the WIM file. */ 
        struct lookup_table *lookup_table;
 
-       /* The header of the WIM file. */
-       struct wim_header    hdr;
-
        /* Pointer to the XML data read from the WIM file. */
        u8                  *xml_data;
 
@@ -252,22 +248,31 @@ typedef struct WIMStruct {
         * WIM has a image metadata associated with it. */
        struct image_metadata     *image_metadata;
 
-       /* True if files names are to be printed when doing extraction. 
-        * May be used for other things later. */
-       bool   verbose;
+       /* Name of the output directory for extraction. */
+       char  *output_dir;
+
+       /* The header of the WIM file. */
+       struct wim_header    hdr;
 
        /* The type of links to create when extracting files (hard, symbolic, or
         * none.) */
        int    link_type;
 
-       /* Name of the output directory for extraction. */
-       char  *output_dir;
+       /* The currently selected image, indexed starting at 1.  If not 0,
+        * subtract 1 from this to get the index of the current image in the
+        * image_metadata array. */
+       int                  current_image;
 
-       /* Set to true when extracting multiple images */
-       bool   is_multi_image_extraction;
+       /* True if files names are to be printed when doing extraction. 
+        * May be used for other things later. */
+       bool   verbose;
 
-       /* FILE pointer for the WIM file that is being written. */
-       FILE  *out_fp;
+       union {
+               /* Set to true when extracting multiple images */
+               bool is_multi_image_extraction;
+
+               bool write_metadata;
+       };
 } WIMStruct;
 
 
@@ -369,6 +374,14 @@ static inline int read_full_resource(FILE *fp, u64 resource_size,
                                resource_original_size, 0, contents_ret);
 }
 
+extern int write_file_resource(struct dentry *dentry, void *wim_p);
+extern int copy_resource(struct lookup_table_entry *lte, void *w);
+extern int copy_between_files(FILE *in, off_t in_offset, FILE *out, size_t len);
+extern int write_resource_from_memory(const u8 resource[], int out_ctype,
+                                     u64 resource_original_size, FILE *out,
+                                     u64 *resource_size_ret);
+extern int write_metadata_resource(WIMStruct *w);
+
 #if 0
 /* security.c */
 bool read_security_data(const u8 metadata_resource[], 
@@ -391,13 +404,8 @@ extern int for_image(WIMStruct *w, int image, int (*visitor)(WIMStruct *));
 /* write.c */
 extern int finish_write(WIMStruct *w, int image, int flags, 
                        int write_lookup_table);
-extern int copy_between_files(FILE *in, off_t in_offset, FILE *out, size_t len);
-extern int write_resource_from_memory(const u8 resource[], int out_ctype,
-                                     u64 resource_original_size, FILE *out,
-                                     u64 *resource_size_ret);
 
 extern int begin_write(WIMStruct *w, const char *path, int flags);
-extern int write_metadata_resource(WIMStruct *w);
 
 
 #include "wimlib.h"
index 6b67498..470ab14 100644 (file)
  */
 #include "wimlib_internal.h"
 #include "io.h"
-#include "lookup_table.h"
 #include "dentry.h"
-#include "sha1.h"
-#include "lzx.h"
+#include "lookup_table.h"
 #include "xml.h"
-#include "xpress.h"
 #include <unistd.h>
 
-
-
-/* Used for buffering FILE IO */
-#define BUFFER_SIZE 4096
-
-/*
- * Copies bytes between two file streams.
- *
- * Copies @len bytes from @in to @out, at the current position in @out, and at
- * an offset of @in_offset in @in.
- */
-int copy_between_files(FILE *in, off_t in_offset, FILE *out, size_t len)
-{
-       u8 buf[BUFFER_SIZE];
-       size_t n;
-
-       if (fseeko(in, in_offset, SEEK_SET) != 0) {
-               ERROR("Failed to seek to byte %"PRIu64" of input file: %m\n",
-                               in_offset);
-               return WIMLIB_ERR_READ;
-       }
-       /* To reduce memory usage and improve speed, read and write BUFFER_SIZE
-        * bytes at a time. */
-       while (len != 0) {
-               n = min(len, BUFFER_SIZE);
-               if (fread(buf, 1, n, in) != n) {
-                       if (feof(in)) {
-                               ERROR("Unexpected EOF when copying data "
-                                               "between files\n");
-                       } else {
-                               ERROR("Error copying data between files: %m\n");
-                       }
-                       return WIMLIB_ERR_READ;
-               }
-
-               if (fwrite(buf, 1, n, out) != n) {
-                       ERROR("Error copying data between files: %m\n");
-                       return WIMLIB_ERR_WRITE;
-               }
-               len -= n;
-       }
-       return 0;
-}
-
-
-/* 
- * Uncompresses a WIM file resource and writes it uncompressed to a file stream.
- *
- * @in:                    The file stream that contains the file resource.
- * @size:           The size of the resource in the input file.
- * @original_size:  The original (uncompressed) size of the resource. 
- * @offset:        The offset of the start of the resource in @in.
- * @input_ctype:    The compression type of the resource in @in.
- * @out:           The file stream to write the file resource to.
- */
-static int uncompress_resource(FILE *in, u64 size, u64 original_size,
-                              off_t offset, int input_ctype, FILE *out)
-{
-       int ret;
-       u8 buf[WIM_CHUNK_SIZE];
-       /* Determine how many compressed chunks the file is divided into. */
-       u64 num_chunks;
-       u64 i;
-       u64 uncompressed_offset;
-       u64 uncompressed_chunk_size;
-       
-       num_chunks = (original_size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
-
-       for (i = 0; i < num_chunks; i++) {
-
-               uncompressed_offset = i * WIM_CHUNK_SIZE;
-               uncompressed_chunk_size = min(WIM_CHUNK_SIZE, 
-                                       original_size - uncompressed_offset);
-
-               ret = read_resource(in, size, original_size, offset, input_ctype, 
-                                       uncompressed_chunk_size, 
-                                       uncompressed_offset, buf);
-               if (ret != 0)
-                       return ret;
-
-               if (fwrite(buf, 1, uncompressed_chunk_size, out) != 
-                                               uncompressed_chunk_size) {
-                       ERROR("Failed to write file resource: %m\n");
-                       return WIMLIB_ERR_WRITE;
-               }
-       }
-       return 0;
-}
-
-/* 
- * Transfers a file resource between two files, writing it compressed.  The file
- * resource in the input file may be either compressed or uncompressed.
- * Alternatively, the input resource may be in-memory, but it must be
- * uncompressed.
- *
- * @in:                    The file stream that contains the file resource.  Ignored
- *                     if uncompressed_resource != NULL.
- * @uncompressed_resource:     If this pointer is not NULL, it points to an
- *                                     array of @original_size bytes that are
- *                                     the uncompressed input resource.
- * @size:           The size of the resource in the input file.
- * @original_size:  The original (uncompressed) size of the resource. 
- * @offset:        The offset of the start of the resource in @in.  Ignored
- *                     if uncompressed_resource != NULL.
- * @input_ctype:    The compression type of the resource in @in.  Ignored if
- *                     uncompressed_resource != NULL.
- * @out:           The file stream to write the file resource to.
- * @output_type:    The compression type to use when writing the resource to
- *                     @out.
- * @new_size_ret:   A location into which the new compressed size of the file
- *                     resource in returned.
- */
-static int recompress_resource(FILE *in, const u8 uncompressed_resource[], 
-                                       u64 size, u64 original_size,
-                                       off_t offset, int input_ctype, FILE *out,
-                                       int output_ctype, u64 *new_size_ret)
-{
-       int ret;
-       int (*compress)(const void *, uint, void *, uint *);
-       if (output_ctype == WIM_COMPRESSION_TYPE_LZX)
-               compress = lzx_compress;
-       else
-               compress = xpress_compress;
-
-       u8 uncompressed_buf[WIM_CHUNK_SIZE];
-       u8 compressed_buf[WIM_CHUNK_SIZE - 1];
-
-       /* Determine how many compressed chunks the file needs to be divided
-        * into. */
-       u64 num_chunks = (original_size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
-
-       u64 num_chunk_entries = num_chunks - 1;
-
-       /* Size of the chunk entries--- 8 bytes for files over 4GB, otherwise 4
-        * bytes */
-       uint chunk_entry_size = (original_size >= (u64)1 << 32) ?  8 : 4;
-
-       /* Array in which to construct the chunk offset table. */
-       u64 chunk_offsets[num_chunk_entries];
-
-       /* Offset of the start of the chunk table in the output file. */
-       off_t chunk_tab_offset = ftello(out);
-
-       /* Total size of the chunk table (as written to the file) */
-       u64 chunk_tab_size = chunk_entry_size * num_chunk_entries;
-
-       /* Reserve space for the chunk table. */
-       if (fwrite(chunk_offsets, 1, chunk_tab_size, out) != chunk_tab_size) {
-               ERROR("Failed to write chunk offset table: %m\n");
-               return WIMLIB_ERR_WRITE;
-       }
-
-       /* Read each chunk of the file, compress it, write it to the output
-        * file, and update th chunk offset table. */
-       u64 cur_chunk_offset = 0;
-       for (u64 i = 0; i < num_chunks; i++) {
-
-               u64 uncompressed_offset = i * WIM_CHUNK_SIZE;
-               u64 uncompressed_chunk_size = min(WIM_CHUNK_SIZE, 
-                                       original_size - uncompressed_offset);
-
-               const u8 *uncompressed_p;
-               if (uncompressed_resource != NULL) {
-                       uncompressed_p = uncompressed_resource + 
-                                                       uncompressed_offset;
-
-               } else {
-                       /* Read chunk i of the file into uncompressed_buf. */
-                       ret = read_resource(in, size, original_size, offset, input_ctype, 
-                                               uncompressed_chunk_size, 
-                                               uncompressed_offset, 
-                                               uncompressed_buf);
-                       if (ret != 0)
-                               return ret;
-                       uncompressed_p = uncompressed_buf;
-               }
-
-               if (i != 0)
-                       chunk_offsets[i - 1] = cur_chunk_offset;
-
-               uint compressed_len;
-
-               ret = compress(uncompressed_p, uncompressed_chunk_size, 
-                              compressed_buf, &compressed_len);
-
-               /* if compress() returned nonzero, the compressed chunk would
-                * have been at least as large as the uncompressed chunk.  In
-                * this situation, the WIM format requires that the uncompressed
-                * chunk be written instead. */
-               const u8 *buf_to_write;
-               uint len_to_write;
-               if (ret == 0) {
-                       buf_to_write = compressed_buf;
-                       len_to_write = compressed_len;
-               } else {
-                       buf_to_write = uncompressed_p;
-                       len_to_write = uncompressed_chunk_size;
-               }
-
-               if (fwrite(buf_to_write, 1, len_to_write, out) != len_to_write) {
-                       ERROR("Failed to write compressed file resource: %m\n");
-                       return WIMLIB_ERR_WRITE;
-               }
-               cur_chunk_offset += len_to_write;
-       }
-
-       /* The chunk offset after the last chunk, plus the size of the chunk
-        * table, gives the total compressed size of the resource. */
-       *new_size_ret = cur_chunk_offset + chunk_tab_size;
-
-       /* Now that all entries of the chunk table are determined, rewind the
-        * stream to where the chunk table was, and write it back out. */
-
-       if (fseeko(out, chunk_tab_offset, SEEK_SET) != 0) {
-               ERROR("Failed to seek to beginning of chunk table: %m\n");
-               return WIMLIB_ERR_READ;
-       }
-
-       if (chunk_entry_size == 8) {
-               array_to_le64(chunk_offsets, num_chunk_entries);
-
-               if (fwrite(chunk_offsets, 1, chunk_tab_size, out) != 
-                               chunk_tab_size) {
-                       ERROR("Failed to write chunk table: %m\n");
-                       return WIMLIB_ERR_WRITE;
-               }
-       } else {
-               u32 chunk_entries_small[num_chunk_entries];
-               for (u64 i = 0; i < num_chunk_entries; i++)
-                       chunk_entries_small[i] = to_le32(chunk_offsets[i]);
-               if (fwrite(chunk_entries_small, 1, chunk_tab_size, out) != 
-                               chunk_tab_size) {
-                       ERROR("Failed to write chunk table: %m\n");
-                       return WIMLIB_ERR_WRITE;
-               }
-       }
-
-       if (fseeko(out, 0, SEEK_END) != 0) {
-               ERROR("Failed to seek to end of output file: %m\n");
-               return WIMLIB_ERR_WRITE;
-       }
-
-       return 0;
-}
-
-int write_resource_from_memory(const u8 resource[], int out_ctype,
-                              u64 resource_original_size, FILE *out,
-                              u64 *resource_size_ret)
-{
-       if (out_ctype == WIM_COMPRESSION_TYPE_NONE) {
-               if (fwrite(resource, 1, resource_original_size, out) != 
-                                       resource_original_size) {
-                       ERROR("Failed to write resource of length "
-                                       "%"PRIu64": %m\n", 
-                                       resource_original_size);
-                       return WIMLIB_ERR_WRITE;
-               }
-               *resource_size_ret = resource_original_size;
-               return 0;
-       } else {
-               return recompress_resource(NULL, resource, resource_original_size,
-                               resource_original_size, 0, 0, out, out_ctype, 
-                                                       resource_size_ret);
-       }
-}
-
-
-/* 
- * Transfers a file resource from a FILE* opened for reading to a FILE* opened
- * for writing, possibly changing the compression type. 
- *
- * @in:                        The FILE* that contains the file resource.
- * @size:              The (compressed) size of the file resource.
- * @original_size:     The uncompressed size of the file resource.
- * @offset:            The offset of the file resource in the input file.
- * @input_ctype:       The compression type of the file resource in the input
- *                             file.
- * @out:               The FILE* for the output file.  The file resource is 
- *                             written at the current position of @out.
- * @output_ctype:      The compression type to which the file resource will be
- *                             converted.
- * @output_res_entry:  A pointer to a resource entry that, upon successful
- *                             return of this function,  will have the size,
- *                             original size, offset, and flags fields filled
- *                             in for the file resource written to the output
- *                             file.
- */
-static int transfer_file_resource(FILE *in, u64 size, u64 original_size, 
-                                 off_t offset, int input_ctype, FILE *out, 
-                                 int output_ctype, 
-                                 struct resource_entry *output_res_entry)
-{
-       int ret;
-
-       /* Handle zero-length files */
-       if (original_size == 0) {
-               memset(output_res_entry, 0, sizeof(*output_res_entry));
-               return 0;
-       }
-
-       /* Get current offset in the output file. */
-       output_res_entry->offset = ftello(out);
-       if (output_res_entry->offset == -1) {
-               ERROR("Failed to get output position: %m\n");
-               return WIMLIB_ERR_WRITE;
-       }
-
-       if (output_ctype == input_ctype) {
-               /* The same compression types; simply copy the resource. */
-
-               ret = copy_between_files(in, offset, out, size);
-               if (ret != 0)
-                       return ret;
-               output_res_entry->size = size;
-       } else {
-               /* Different compression types. */
-
-               if (output_ctype == WIM_COMPRESSION_TYPE_NONE) {
-                       /* Uncompress a compressed file resource */
-                       ret = uncompress_resource(in, size,
-                                               original_size, offset, 
-                                               input_ctype, out);
-                       if (ret != 0)
-                               return ret;
-                       output_res_entry->size = original_size;
-               } else {
-                       u64 new_size;
-                       /* Compress an uncompressed file resource, or compress a
-                        * compressed file resource using a different
-                        * compression type (the latter is currently unsupported
-                        * since only LZX compression is supported. */
-                       ret = recompress_resource(in, NULL, size, original_size,
-                                               offset, input_ctype, out, 
-                                               output_ctype, &new_size);
-                       if (ret != 0)
-                               return ret;
-                       output_res_entry->size = new_size;
-               }
-
-       }
-
-       output_res_entry->original_size = original_size;
-       if (output_ctype == WIM_COMPRESSION_TYPE_NONE)
-               output_res_entry->flags = 0;
-       else
-               output_res_entry->flags = WIM_RESHDR_FLAG_COMPRESSED;
-       return 0;
-}
-
-/* 
- * Writes a file resource to the output file. 
- *
- * @dentry:  The dentry for the file resource.
- * @wim_p:  A pointer to the WIMStruct.  The fields of interest to this
- *     function are the input and output file streams and the lookup table. 
- * @return zero on success, nonzero on failure. 
- */
-static int write_file_resource(struct dentry *dentry, void *wim_p)
-{
-       WIMStruct *w;
-       FILE *out;
-       FILE *in;
-       struct lookup_table_entry *lte;
-       int in_wim_ctype;
-       int out_wim_ctype;
-       int input_res_ctype;
-       struct resource_entry *input_res_entry;
-       struct resource_entry *output_res_entry;
-       u64 len;
-       int ret;
-
-       w = wim_p;
-       out = w->out_fp;
-
-       /* Directories don't need file resources. */
-       if (dentry_is_directory(dentry))
-               return 0;
-
-       /* Get the lookup entry for the file resource. */
-       lte = wim_lookup_resource(w, dentry);
-       if (!lte)
-               return 0;
-
-       /* No need to write file resources twice.  (This indicates file
-        * resources that are part of a hard link set.) */
-       if (++lte->out_refcnt != 1)
-               return 0;
-
-       out_wim_ctype = wimlib_get_compression_type(w);
-       output_res_entry = &lte->output_resource_entry;
-
-       /* Figure out if we can read the resource from the WIM file, or
-        * if we have to read it from the filesystem outside. */
-       if (lte->file_on_disk) {
-
-               /* Read from disk (uncompressed) */
-
-               len = lte->resource_entry.original_size;
-
-               in = fopen(lte->file_on_disk, "rb");
-               if (!in) {
-                       ERROR("Failed to open the file `%s': %m\n",
-                                       lte->file_on_disk);
-                       return WIMLIB_ERR_OPEN;
-               }
-
-               if (w->verbose)
-                       puts(lte->file_on_disk);
-
-               ret = transfer_file_resource(in, len, len, 0,
-                                            WIM_COMPRESSION_TYPE_NONE, out, 
-                                            out_wim_ctype, output_res_entry);
-               fclose(in);
-       } else {
-
-               /* Read from input WIM (possibly compressed) */
-
-               /* It may be a different WIM file, in the case of
-                * exporting images from one WIM file to another */
-               if (lte->other_wim_fp) {
-                       /* Different WIM file. */
-                       in = lte->other_wim_fp;
-                       in_wim_ctype = lte->other_wim_ctype;
-               } else {
-                       /* Same WIM file. */
-                       in = w->fp;
-                       in_wim_ctype = out_wim_ctype;
-               }
-               input_res_entry = &lte->resource_entry;
-               input_res_ctype = resource_compression_type(
-                                       in_wim_ctype, 
-                                       input_res_entry->flags);
-
-               ret = transfer_file_resource(in, 
-                                       input_res_entry->size,
-                                       input_res_entry->original_size, 
-                                       input_res_entry->offset,
-                                       input_res_ctype, 
-                                       out, 
-                                       out_wim_ctype,
-                                       output_res_entry);
-       }
-       return ret;
-}
-
 /* Reopens the FILE* for a WIM read-write. */
 static int reopen_rw(WIMStruct *w)
 {
@@ -599,7 +151,7 @@ WIMLIBAPI int wimlib_overwrite_xml_and_header(WIMStruct *w, int flags)
                ret = WIMLIB_ERR_WRITE;
                goto err;
        }
-       ret = write_xml_data(w->wim_info, WIM_ALL_IMAGES, fp);
+       ret = write_xml_data(w->wim_info, WIM_ALL_IMAGES, fp, 0);
        if (ret != 0)
                goto err;
 
@@ -687,85 +239,6 @@ err:
        return ret;
 }
 
-/* Write the metadata resource for the current image. */
-int write_metadata_resource(WIMStruct *w)
-{
-       FILE *out;
-       u8 *buf;
-       u8 *p;
-       int ret;
-       off_t subdir_offset;
-       struct dentry *root;
-       struct lookup_table_entry *lte;
-       struct resource_entry *res_entry;
-       off_t metadata_offset;
-       u64 metadata_original_size;
-       u64 metadata_compressed_size;
-       int metadata_ctype;
-       u8  hash[WIM_HASH_SIZE];
-
-       DEBUG("Writing metadata resource for image %u\n", w->current_image);
-
-       out = w->out_fp;
-       root = wim_root_dentry(w);
-       metadata_ctype = wimlib_get_compression_type(w);
-       metadata_offset = ftello(out);
-       if (metadata_offset == -1)
-               return WIMLIB_ERR_WRITE;
-
-       subdir_offset = 8 + root->length + 8;
-       calculate_subdir_offsets(root, &subdir_offset);
-       metadata_original_size = subdir_offset;
-       buf = MALLOC(metadata_original_size);
-       if (!buf) {
-               ERROR("Failed to allocate %"PRIu64" bytes for "
-                               "metadata resource\n", metadata_original_size);
-               return WIMLIB_ERR_NOMEM;
-       }
-       p = buf;
-       #if 0
-       /* Write the security data. */
-       p = write_security_data(wim_security_data(w), p);
-       #else
-       p = put_u32(p, 8); /* Total length of security data. */
-       p = put_u32(p, 0); /* Number of security data entries. */
-       #endif
-
-       DEBUG("Writing dentry tree.\n");
-       p = write_dentry_tree(root, p);
-
-       /* Like file resources, the lookup table entry for a metadata resource
-        * uses for the hash code a SHA1 message digest of its uncompressed
-        * contents. */
-       sha1_buffer(buf, metadata_original_size, hash);
-
-       ret = write_resource_from_memory(buf, 
-                                        metadata_ctype,
-                                        metadata_original_size, 
-                                        out,
-                                        &metadata_compressed_size);
-       FREE(buf);
-       if (ret != 0)
-               return ret;
-
-       /* Update the lookup table entry, including the hash and output resource
-        * entry fields, for this image's metadata resource.  */
-       lte = wim_metadata_lookup_table_entry(w);
-       res_entry = &lte->output_resource_entry;
-       lte->out_refcnt++;
-       if (memcmp(hash, lte->hash, WIM_HASH_SIZE) != 0) {
-               lookup_table_unlink(w->lookup_table, lte);
-               memcpy(lte->hash, hash, WIM_HASH_SIZE);
-               lookup_table_insert(w->lookup_table, lte);
-       }
-       res_entry->original_size = metadata_original_size;
-       res_entry->offset        = metadata_offset;
-       res_entry->size          = metadata_compressed_size;
-       res_entry->flags         = WIM_RESHDR_FLAG_METADATA;
-       if (metadata_ctype != WIM_COMPRESSION_TYPE_NONE)
-               res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
-       return 0;
-}
 
 /* Write the file resources for the current image. */
 static int write_file_resources(WIMStruct *w)
@@ -798,18 +271,18 @@ int finish_write(WIMStruct *w, int image, int flags, int write_lt)
                if (lookup_table_offset == -1)
                        return WIMLIB_ERR_WRITE;
 
-               DEBUG("Writing lookup table.\n");
+               DEBUG("Writing lookup table (offset %"PRIu64")\n", lookup_table_offset);
                /* Write the lookup table. */
                ret = write_lookup_table(w->lookup_table, out);
                if (ret != 0)
                        return ret;
        }
 
-       DEBUG("Writing XML data.\n");
 
        xml_data_offset = ftello(out);
        if (xml_data_offset == -1)
                return WIMLIB_ERR_WRITE;
+       DEBUG("Writing XML data (offset %"PRIu64")\n", xml_data_offset);
 
        /* @hdr will be the header for the new WIM.  First copy all the data
         * from the header in the WIMStruct; then set all the fields that may
@@ -824,7 +297,8 @@ int finish_write(WIMStruct *w, int image, int flags, int write_lt)
        hdr.lookup_table_res_entry.original_size = hdr.lookup_table_res_entry.size;
        hdr.lookup_table_res_entry.flags         = WIM_RESHDR_FLAG_METADATA;
 
-       ret = write_xml_data(w->wim_info, image, out);
+       ret = write_xml_data(w->wim_info, image, out, 
+                            write_lt ? 0 : wim_info_get_total_bytes(w->wim_info));
        if (ret != 0)
                return ret;
 
@@ -860,7 +334,6 @@ int finish_write(WIMStruct *w, int image, int flags, int write_lt)
 
        DEBUG("Updating WIM header.\n");
 
-
        /* 
         * In the WIM header, there is room for the resource entry for a
         * metadata resource labeled as the "boot metadata".  This entry should
index ee75e17..a214f35 100644 (file)
--- a/src/xml.c
+++ b/src/xml.c
@@ -1101,7 +1101,8 @@ int read_xml_data(FILE *fp, const struct resource_entry *res, u8 **xml_data_ret,
        xmlNode *root;
        int ret;
 
-       DEBUG("XML data is %"PRIu64" bytes long.\n", (u64)res->size);
+       DEBUG("XML data is %"PRIu64" bytes at offset %"PRIu64"\n", 
+                       (u64)res->size, res->offset);
 
        if (resource_is_compressed(res)) {
                ERROR("XML data is supposed to be uncompressed!\n");
@@ -1181,14 +1182,18 @@ err0:
 
 /* 
  * Writes XML data to a WIM file.
+ *
+ * If @total_bytes is non-zero, it specifies what to write to the TOTALBYTES
+ * element in the XML data.  If zero, TOTALBYTES is given the default value of
+ * the offset of the XML data.
  */
-int write_xml_data(const struct wim_info *wim_info, int image, FILE *out)
+int write_xml_data(const struct wim_info *wim_info, int image, FILE *out, 
+                  u64 total_bytes)
 {
        xmlBuffer     *buf;
        xmlTextWriter *writer;
        char          *utf16_str;
        int ret;
-       off_t total_bytes;
        int num_images;
        int i;
        const xmlChar *content;
@@ -1201,13 +1206,15 @@ int write_xml_data(const struct wim_info *wim_info, int image, FILE *out)
                         image <= wim_info->num_images));
 
        /* The contents of the <TOTALBYTES> element in the XML data, under the
-        * <WIM> element not the <IMAGE> element, is the size of the WIM file
-        * excluding the XML data and integrity table.  Which is the current
-        * offset, since the XML data goes at the end of the WIM file before the
-        * integrity table. */
-       total_bytes = ftello(out);
-       if (total_bytes == -1)
-               return WIMLIB_ERR_WRITE;
+        * <WIM> element not the <IMAGE> element, is (for non-spit WIMs) the
+        * size of the WIM file excluding the XML data and integrity table,
+        * which is the current offset, since the XML data goes at the end of
+        * the WIM file before the integrity table. */
+       if (total_bytes == 0) {
+               total_bytes = ftello(out);
+               if (total_bytes == (u64)-1)
+                       return WIMLIB_ERR_WRITE;
+       }
 
        DEBUG("Creating XML buffer and text writer\n");
        buf = xmlBufferCreate();
index cf949f3..d1670f8 100644 (file)
--- a/src/xml.h
+++ b/src/xml.h
@@ -35,7 +35,8 @@ extern void print_image_info(const struct wim_info *wim_info, int image);
 extern int read_xml_data(FILE *fp, const struct resource_entry *res, 
                         u8 **xml_data_ret, struct wim_info **info_ret);
 
-extern int write_xml_data(const struct wim_info *wim_info, int image, FILE *out);
+extern int write_xml_data(const struct wim_info *wim_info, int image, FILE *out,
+                         u64 total_bytes);
 
 static inline u64 wim_info_get_total_bytes(const struct wim_info *info)
 {