From 49a63aa13cdeb4c1348697ccd92207a1a65ec7b0 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers3@gmail.com>
Date: Mon, 9 Dec 2013 00:26:22 -0600
Subject: [PATCH] Variable chunk size support (currently XPRESS only)

---
 include/wimlib.h              |  74 +++++-
 include/wimlib/compress.h     |   4 +-
 include/wimlib/header.h       |  13 +-
 include/wimlib/lookup_table.h |  14 +-
 include/wimlib/ntfs_3g.h      |   1 +
 include/wimlib/resource.h     |  12 +-
 include/wimlib/util.h         |   7 +
 include/wimlib/wim.h          |  11 +-
 include/wimlib/win32.h        |   2 +
 programs/imagex.c             |  47 ++++
 src/extract.c                 |   7 +-
 src/header.c                  |  21 +-
 src/integrity.c               |   1 +
 src/lookup_table.c            |   7 +-
 src/lz77.c                    |  31 ++-
 src/lzx-compress.c            |  17 +-
 src/metadata_resource.c       |   1 +
 src/ntfs-3g_capture.c         |  29 ++-
 src/resource.c                | 426 +++++++++++++++++++++-------------
 src/util.c                    |   3 +-
 src/wim.c                     | 116 ++++++++-
 src/win32_capture.c           |  34 ++-
 src/write.c                   | 174 +++++++++-----
 src/xml.c                     |   1 +
 src/xpress-compress.c         |  50 +++-
 25 files changed, 811 insertions(+), 292 deletions(-)

diff --git a/include/wimlib.h b/include/wimlib.h
index f4b0dc5a..82dd1afc 100644
--- a/include/wimlib.h
+++ b/include/wimlib.h
@@ -3359,15 +3359,39 @@ wimlib_set_image_descripton(WIMStruct *wim, int image,
 /**
  * @ingroup G_writing_and_overwriting_wims
  *
- * Set the compression type of a WIM to use in subsequent calls to
+ * Set the compression chunk size of a WIM to use in subsequent calls to
  * wimlib_write() or wimlib_overwrite().
  *
+ * @param wim
+ *	::WIMStruct for a WIM.
+ * @param out_chunk_size
+ *	The chunk size (in bytes) to set.  The valid chunk sizes are dependent
+ *	on the compression format.  The XPRESS compression format supports chunk
+ *	sizes that are powers of 2 with exponents between 15 and 26 inclusively,
+ *	whereas the LZX compression format currently only supports a chunk size
+ *	of 32768.
+ *
  * @return 0 on success; nonzero on error.
  *
+ * @retval ::WIMLIB_ERR_INVALID_CHUNK_SIZE
+ *	@p ctype is not a supported chunk size.
+ */
+extern int
+wimlib_set_output_chunk_size(WIMStruct *wim, uint32_t chunk_size);
+
+/**
+ * @ingroup G_writing_and_overwriting_wims
+ *
+ * Set the compression type of a WIM to use in subsequent calls to
+ * wimlib_write() or wimlib_overwrite().
+ *
  * @param wim
  *	::WIMStruct for a WIM.
  * @param ctype
- *	The compression type to set (one of ::wimlib_compression_type).
+ *	The compression type to set (one of ::wimlib_compression_type).  If this
+ *	compression type is incompatible with the current output chunk size
+ *	(either the default or as set with wimlib_set_output_chunk_size()), the
+ *	output chunk size is reset to the default for that compression type.
  *
  * @return 0 on success; nonzero on error.
  *
@@ -3871,8 +3895,28 @@ wimlib_write_to_fd(WIMStruct *wim,
 /**
  * @ingroup G_compression
  *
- * This function is equivalent to wimlib_lzx_compress(), but instead compresses
- * the data using "XPRESS" compression.
+ * Compress a chunk of data using XPRESS compression.
+ *
+ * This function is exported for convenience only and should only be used by
+ * library clients looking to make use of wimlib's compression code for another
+ * purpose.
+ *
+ * As of wimlib v1.5.4, this function can be used with @p chunk_size greater
+ * than 32768 bytes and is only limited by available memory.  However, the
+ * XPRESS format itself still caps match offsets to 65535, so if a larger chunk
+ * size is chosen, then the matching will effectively occur in a sliding window
+ * over it.
+ *
+ * @param chunk
+ * 	Uncompressed data of the chunk.
+ * @param chunk_size
+ * 	Size of the uncompressed chunk, in bytes.
+ * @param out
+ * 	Pointer to output buffer of size at least (@p chunk_size - 1) bytes.
+ *
+ * @return
+ * 	The size of the compressed data written to @p out in bytes, or 0 if the
+ * 	data could not be compressed to (@p chunk_size - 1) bytes or fewer.
  */
 extern unsigned
 wimlib_xpress_compress(const void *chunk, unsigned chunk_size, void *out);
@@ -3880,8 +3924,26 @@ wimlib_xpress_compress(const void *chunk, unsigned chunk_size, void *out);
 /**
  * @ingroup G_compression
  *
- * This function is equivalent to wimlib_lzx_decompress(), but instead assumes
- * the data is compressed using "XPRESS" compression.
+ * Decompresses a chunk of XPRESS-compressed data.
+ *
+ * This function is exported for convenience only and should only be used by
+ * library clients looking to make use of wimlib's compression code for another
+ * purpose.
+ *
+ * @param compressed_data
+ * 	Pointer to the compressed data.
+ *
+ * @param compressed_len
+ * 	Length of the compressed data, in bytes.
+ *
+ * @param uncompressed_data
+ * 	Pointer to the buffer into which to write the uncompressed data.
+ *
+ * @param uncompressed_len
+ * 	Length of the uncompressed data.
+ *
+ * @return
+ * 	0 on success; non-zero on failure.
  */
 extern int
 wimlib_xpress_decompress(const void *compressed_data, unsigned compressed_len,
diff --git a/include/wimlib/compress.h b/include/wimlib/compress.h
index f3ce6e2d..eaa6ea0e 100644
--- a/include/wimlib/compress.h
+++ b/include/wimlib/compress.h
@@ -69,6 +69,7 @@ bitstream_put_byte(struct output_bitstream *ostream, u8 n);
 struct lz_params {
 	unsigned min_match;
 	unsigned max_match;
+	unsigned max_offset;
 	unsigned nice_match;
 	unsigned good_match;
 	unsigned max_chain_len;
@@ -85,7 +86,8 @@ lz_analyze_block(const u8 window[],
 		 lz_record_match_t record_match,
 		 lz_record_literal_t record_literal,
 		 void *record_ctx,
-		 const struct lz_params *params);
+		 const struct lz_params *params,
+		 input_idx_t prev_tab[]);
 
 extern void
 make_canonical_huffman_code(unsigned num_syms,
diff --git a/include/wimlib/header.h b/include/wimlib/header.h
index cd68486b..f7c4b7ff 100644
--- a/include/wimlib/header.h
+++ b/include/wimlib/header.h
@@ -11,11 +11,6 @@
 /* Length of the WIM header on disk.  */
 #define WIM_HEADER_DISK_SIZE 208
 
-/* Compressed resources in the WIM are divided into separated compressed chunks
- * of this size.  This value is unfortunately not configurable (at least when
- * compatibility with Microsoft's software is desired).  */
-#define WIM_CHUNK_SIZE 32768
-
 /* Version of the WIM file.  There is an older version (used for prerelease
  * versions of Windows Vista), but wimlib doesn't support it.  The differences
  * between the versions are undocumented.  */
@@ -69,9 +64,8 @@ struct wim_header_disk {
 	/* Flags for the WIM file (WIM_HDR_FLAG_*) */
 	u32 wim_flags;
 
-	/* Uncompressed chunk size of resources in the WIM.  0 if the WIM is
-	 * uncompressed.  If compressed, WIM_CHUNK_SIZE is expected (currently
-	 * the only supported value).  */
+	/* Chunk size for compressed resources in the WIM, or 0 if the WIM is
+	 * uncompressed.  */
 	u32 chunk_size;
 
 	/* Globally unique identifier for the WIM file.  Basically a bunch of
@@ -127,6 +121,9 @@ struct wim_header {
 	/* Bitwise OR of one or more of the WIM_HDR_FLAG_* defined below. */
 	u32 flags;
 
+	/* Compressed resource chunk size  */
+	u32 chunk_size;
+
 	/* A unique identifier for the WIM file. */
 	u8 guid[WIM_GID_LEN];
 
diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h
index 6a14daaa..d1b980da 100644
--- a/include/wimlib/lookup_table.h
+++ b/include/wimlib/lookup_table.h
@@ -298,16 +298,26 @@ wim_resource_size(const struct wim_lookup_table_entry *lte)
 	return lte->resource_entry.original_size;
 }
 
+static inline u32
+wim_resource_chunk_size(const struct wim_lookup_table_entry * lte)
+{
+	if (lte->resource_location == RESOURCE_IN_WIM &&
+	    lte->compression_type != WIMLIB_COMPRESSION_TYPE_NONE)
+		return lte->wim->chunk_size;
+	else
+		return 32768;
+}
+
+
 static inline u64
 wim_resource_chunks(const struct wim_lookup_table_entry *lte)
 {
-	return DIV_ROUND_UP(wim_resource_size(lte), WIM_CHUNK_SIZE);
+	return DIV_ROUND_UP(wim_resource_size(lte), wim_resource_chunk_size(lte));
 }
 
 static inline int
 wim_resource_compression_type(const struct wim_lookup_table_entry *lte)
 {
-	BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
 	return lte->compression_type;
 }
 
diff --git a/include/wimlib/ntfs_3g.h b/include/wimlib/ntfs_3g.h
index 425d9dfd..fd93fa7b 100644
--- a/include/wimlib/ntfs_3g.h
+++ b/include/wimlib/ntfs_3g.h
@@ -14,6 +14,7 @@ extern int
 read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
 		      u64 size,
 		      consume_data_callback_t cb,
+		      u32 in_chunk_size,
 		      void *ctx_or_buf,
 		      int _ignored_flags);
 
diff --git a/include/wimlib/resource.h b/include/wimlib/resource.h
index 799cb740..91b07ce8 100644
--- a/include/wimlib/resource.h
+++ b/include/wimlib/resource.h
@@ -110,7 +110,8 @@ put_resource_entry(const struct resource_entry *entry,
 extern int
 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
 			  u64 size, consume_data_callback_t cb,
-			  void *ctx_or_buf, int flags, u64 offset);
+			  u32 in_chunk_size, void *ctx_or_buf,
+			  int flags, u64 offset);
 
 extern int
 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
@@ -128,14 +129,16 @@ res_entry_to_data(const struct resource_entry *res_entry,
 
 extern int
 read_resource_prefix(const struct wim_lookup_table_entry *lte,
-		     u64 size, consume_data_callback_t cb, void *ctx_or_buf,
-		     int flags);
+		     u64 size, consume_data_callback_t cb,
+		     u32 in_chunk_size, void *ctx_or_buf, int flags);
 
 /* Functions to write a resource.  */
 
 extern int
 write_wim_resource(struct wim_lookup_table_entry *lte, struct filedes *out_fd,
-		   int out_ctype, struct resource_entry *out_res_entry,
+		   int out_ctype,
+		   u32 out_chunk_size,
+		   struct resource_entry *out_res_entry,
 		   int write_resource_flags,
 		   struct wimlib_lzx_context **comp_ctx);
 
@@ -143,6 +146,7 @@ extern int
 write_wim_resource_from_buffer(const void *buf, size_t buf_size,
 			       int reshdr_flags, struct filedes *out_fd,
 			       int out_ctype,
+			       u32 out_chunk_size,
 			       struct resource_entry *out_res_entry,
 			       u8 *hash_ret, int write_resource_flags,
 			       struct wimlib_lzx_context **comp_ctx);
diff --git a/include/wimlib/util.h b/include/wimlib/util.h
index 7bc8359c..c61c1f97 100644
--- a/include/wimlib/util.h
+++ b/include/wimlib/util.h
@@ -121,6 +121,13 @@ bsr32(u32 n)
 #endif
 }
 
+static inline bool
+is_power_of_2(unsigned long n)
+{
+	return (n != 0 && (n & (n - 1)) == 0);
+
+}
+
 static inline u64
 hash_u64(u64 n)
 {
diff --git a/include/wimlib/wim.h b/include/wimlib/wim.h
index b6cf84a7..e972950c 100644
--- a/include/wimlib/wim.h
+++ b/include/wimlib/wim.h
@@ -76,6 +76,15 @@ struct WIMStruct {
 	 * wimlib_set_output_compression_type(); otherwise is the same as
 	 * compression_type.  */
 	u8 out_compression_type : 2;
+
+	/* Uncompressed size of compressed chunks in this WIM (cached from
+	 * header).  */
+	u32 chunk_size;
+
+	/* Overridden chunk size for wimlib_overwrite() or wimlib_write().  Can
+	 * be changed by wimlib_set_output_chunk_size(); otherwise is the same
+	 * as chunk_size.  */
+	u32 out_chunk_size;
 };
 
 static inline bool wim_is_pipable(const WIMStruct *wim)
@@ -100,7 +109,7 @@ extern u32
 get_wim_hdr_cflags(int ctype);
 
 extern int
-init_wim_header(struct wim_header *hdr, int ctype);
+init_wim_header(struct wim_header *hdr, int ctype, u32 chunk_size);
 
 extern int
 read_wim_header(const tchar *filename, struct filedes *in_fd,
diff --git a/include/wimlib/win32.h b/include/wimlib/win32.h
index e2bdde0e..f4220127 100644
--- a/include/wimlib/win32.h
+++ b/include/wimlib/win32.h
@@ -15,6 +15,7 @@ extern int
 read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
 		       u64 size,
 		       consume_data_callback_t cb,
+		       u32 in_chunk_size,
 		       void *ctx_or_buf,
 		       int _ignored_flags);
 
@@ -22,6 +23,7 @@ extern int
 read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
 				 u64 size,
 				 consume_data_callback_t cb,
+				 u32 in_chunk_size,
 				 void *ctx_or_buf,
 				 int _ignored_flags);
 
diff --git a/programs/imagex.c b/programs/imagex.c
index 06ddeddd..d3588a9e 100644
--- a/programs/imagex.c
+++ b/programs/imagex.c
@@ -120,6 +120,7 @@ enum {
 	IMAGEX_ALLOW_OTHER_OPTION,
 	IMAGEX_BOOT_OPTION,
 	IMAGEX_CHECK_OPTION,
+	IMAGEX_CHUNK_SIZE_OPTION,
 	IMAGEX_COMMAND_OPTION,
 	IMAGEX_COMMIT_OPTION,
 	IMAGEX_COMPRESS_OPTION,
@@ -190,6 +191,7 @@ static const struct option capture_or_append_options[] = {
 	{T("nocheck"),     no_argument,       NULL, IMAGEX_NOCHECK_OPTION},
 	{T("compress"),    required_argument, NULL, IMAGEX_COMPRESS_OPTION},
 	{T("compress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
+	{T("chunk-size"),  required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
 	{T("config"),      required_argument, NULL, IMAGEX_CONFIG_OPTION},
 	{T("dereference"), no_argument,       NULL, IMAGEX_DEREFERENCE_OPTION},
 	{T("flags"),       required_argument, NULL, IMAGEX_FLAGS_OPTION},
@@ -286,6 +288,7 @@ static const struct option optimize_options[] = {
 	{T("recompress"),  no_argument,       NULL, IMAGEX_RECOMPRESS_OPTION},
 	{T("compress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
 	{T("recompress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
+	{T("chunk-size"),  required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
 	{T("threads"),     required_argument, NULL, IMAGEX_THREADS_OPTION},
 	{T("pipable"),     no_argument,       NULL, IMAGEX_PIPABLE_OPTION},
 	{T("not-pipable"), no_argument,       NULL, IMAGEX_NOT_PIPABLE_OPTION},
@@ -1273,6 +1276,19 @@ parse_num_threads(const tchar *optarg)
 	}
 }
 
+static uint32_t parse_chunk_size(const char *optarg)
+{
+       char *tmp;
+       unsigned long chunk_size = strtoul(optarg, &tmp, 10);
+       if (chunk_size >= UINT32_MAX || *tmp || tmp == optarg) {
+               imagex_error(T("Chunk size must be a non-negative integer!"));
+               return UINT32_MAX;
+       } else {
+               return chunk_size;
+       }
+}
+
+
 /*
  * Parse an option passed to an update command.
  *
@@ -1671,6 +1687,7 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
 			      WIMLIB_ADD_IMAGE_FLAG_WINCONFIG;
 	int write_flags = 0;
 	int compression_type = WIMLIB_COMPRESSION_TYPE_INVALID;
+	uint32_t chunk_size = UINT32_MAX;
 	const tchar *wimfile;
 	int wim_fd;
 	const tchar *name;
@@ -1731,6 +1748,11 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
 				goto out_err;
 			compression_type = WIMLIB_COMPRESSION_TYPE_LZX;
 			break;
+		case IMAGEX_CHUNK_SIZE_OPTION:
+			chunk_size = parse_chunk_size(optarg);
+			if (chunk_size == UINT32_MAX)
+				goto out_err;
+			break;
 		case IMAGEX_FLAGS_OPTION:
 			flags_element = optarg;
 			break;
@@ -1961,6 +1983,13 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
 	if (ret)
 		goto out_free_config;
 
+	/* Set chunk size if non-default.  */
+	if (chunk_size != UINT32_MAX) {
+		ret = wimlib_set_output_chunk_size(wim, chunk_size);
+		if (ret)
+			goto out_free_wim;
+	}
+
 #ifndef __WIN32__
 	/* Detect if source is regular file or block device and set NTFS volume
 	 * capture mode.  */
@@ -2486,6 +2515,8 @@ imagex_export(int argc, tchar **argv, int cmd)
 		ret = wimlib_create_new_wim(compression_type, &dest_wim);
 		if (ret)
 			goto out_free_src_wim;
+
+		wimlib_set_output_chunk_size(dest_wim, src_info.chunk_size);
 	}
 
 	image = wimlib_resolve_image(src_wim, src_image_num_or_name);
@@ -2745,6 +2776,8 @@ print_wim_information(const tchar *wimfile, const struct wimlib_wim_info *info)
 	tprintf(T("Image Count:    %d\n"), info->image_count);
 	tprintf(T("Compression:    %"TS"\n"),
 		wimlib_get_compression_type_string(info->compression_type));
+	tprintf(T("Chunk Size:     %"PRIu32" bytes\n"),
+		info->chunk_size);
 	tprintf(T("Part Number:    %d/%d\n"), info->part_number, info->total_parts);
 	tprintf(T("Boot Index:     %d\n"), info->boot_index);
 	tprintf(T("Size:           %"PRIu64" bytes\n"), info->total_bytes);
@@ -3248,6 +3281,7 @@ imagex_optimize(int argc, tchar **argv, int cmd)
 	int open_flags = WIMLIB_OPEN_FLAG_WRITE_ACCESS;
 	int write_flags = WIMLIB_WRITE_FLAG_REBUILD;
 	int compression_type = WIMLIB_COMPRESSION_TYPE_INVALID;
+	uint32_t chunk_size = UINT32_MAX;
 	int ret;
 	WIMStruct *wim;
 	const tchar *wimfile;
@@ -3280,6 +3314,11 @@ imagex_optimize(int argc, tchar **argv, int cmd)
 			if (ret)
 				goto out_err;
 			break;
+		case IMAGEX_CHUNK_SIZE_OPTION:
+			chunk_size = parse_chunk_size(optarg);
+			if (chunk_size == UINT32_MAX)
+				goto out_err;
+			break;
 		case IMAGEX_THREADS_OPTION:
 			num_threads = parse_num_threads(optarg);
 			if (num_threads == UINT_MAX)
@@ -3308,11 +3347,19 @@ imagex_optimize(int argc, tchar **argv, int cmd)
 		goto out;
 
 	if (compression_type != WIMLIB_COMPRESSION_TYPE_INVALID) {
+		/* Change compression type.  */
 		ret = wimlib_set_output_compression_type(wim, compression_type);
 		if (ret)
 			goto out_wimlib_free;
 	}
 
+	if (chunk_size != UINT32_MAX) {
+		/* Change chunk size.  */
+		ret = wimlib_set_output_chunk_size(wim, chunk_size);
+		if (ret)
+			goto out_wimlib_free;
+	}
+
 	old_size = file_get_size(wimfile);
 	tprintf(T("\"%"TS"\" original size: "), wimfile);
 	if (old_size == -1)
diff --git a/src/extract.c b/src/extract.c
index 656a76c9..4d5ba7e0 100644
--- a/src/extract.c
+++ b/src/extract.c
@@ -1382,8 +1382,11 @@ read_error:
 static int
 skip_pwm_stream(struct wim_lookup_table_entry *lte)
 {
-	return read_partial_wim_resource(lte, wim_resource_size(lte),
-					 NULL, NULL,
+	return read_partial_wim_resource(lte,
+					 wim_resource_size(lte),
+					 NULL,
+					 wim_resource_chunk_size(lte),
+					 NULL,
 					 WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY,
 					 0);
 }
diff --git a/src/header.c b/src/header.c
index 9492e7f2..e3f64493 100644
--- a/src/header.c
+++ b/src/header.c
@@ -125,14 +125,8 @@ read_wim_header(const tchar *filename, struct filedes *in_fd,
 	}
 
 	hdr->flags = le32_to_cpu(disk_hdr.wim_flags);
-	if (le32_to_cpu(disk_hdr.chunk_size) != WIM_CHUNK_SIZE &&
-	    (hdr->flags & WIM_HDR_FLAG_COMPRESSION)) {
-		ERROR("\"%"TS"\": Unexpected chunk size of %u! Ask the author to "
-		      "implement support for other chunk sizes.",
-		      filename, le32_to_cpu(disk_hdr.chunk_size));
-		ERROR("(Or it might just be that the WIM header is invalid.)");
-		return WIMLIB_ERR_INVALID_CHUNK_SIZE;
-	}
+
+	hdr->chunk_size = le32_to_cpu(disk_hdr.chunk_size);
 
 	memcpy(hdr->guid, disk_hdr.guid, WIM_GID_LEN);
 
@@ -188,8 +182,10 @@ write_wim_header_at_offset(const struct wim_header *hdr, struct filedes *out_fd,
 	disk_hdr.hdr_size = cpu_to_le32(sizeof(struct wim_header_disk));
 	disk_hdr.wim_version = cpu_to_le32(WIM_VERSION);
 	disk_hdr.wim_flags = cpu_to_le32(hdr->flags);
-	disk_hdr.chunk_size = cpu_to_le32((hdr->flags & WIM_HDR_FLAG_COMPRESSION) ?
-					  	WIM_CHUNK_SIZE : 0);
+	if (hdr->flags & WIM_HDR_FLAG_COMPRESSION)
+		disk_hdr.chunk_size = cpu_to_le32(hdr->chunk_size);
+	else
+		disk_hdr.chunk_size = 0;
 	memcpy(disk_hdr.guid, hdr->guid, WIM_GID_LEN);
 
 	disk_hdr.part_number = cpu_to_le16(hdr->part_number);
@@ -249,7 +245,7 @@ get_wim_hdr_cflags(int ctype)
  * Initializes the header for a WIM file.
  */
 int
-init_wim_header(struct wim_header *hdr, int ctype)
+init_wim_header(struct wim_header *hdr, int ctype, u32 chunk_size)
 {
 	memset(hdr, 0, sizeof(struct wim_header));
 	hdr->flags = get_wim_hdr_cflags(ctype);
@@ -257,6 +253,7 @@ init_wim_header(struct wim_header *hdr, int ctype)
 		ERROR("Invalid compression type specified (%d)", ctype);
 		return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
 	}
+	hdr->chunk_size = chunk_size;
 	hdr->total_parts = 1;
 	hdr->part_number = 1;
 	randomize_byte_array(hdr->guid, sizeof(hdr->guid));
@@ -297,7 +294,7 @@ wimlib_print_header(const WIMStruct *wim)
 		if (hdr_flags[i].flag & hdr->flags)
 			tprintf(T("    WIM_HDR_FLAG_%s is set\n"), hdr_flags[i].name);
 
-	tprintf(T("Chunk Size                  = %u\n"), WIM_CHUNK_SIZE);
+	tprintf(T("Chunk Size                  = %u\n"), wim->hdr.chunk_size);
 	tfputs (T("GUID                        = "), stdout);
 	print_byte_field(hdr->guid, WIM_GID_LEN, stdout);
 	tputchar(T('\n'));
diff --git a/src/integrity.c b/src/integrity.c
index b83b9c06..21bc2f15 100644
--- a/src/integrity.c
+++ b/src/integrity.c
@@ -365,6 +365,7 @@ write_integrity_table(WIMStruct *wim,
 					     0,
 					     &wim->out_fd,
 					     WIMLIB_COMPRESSION_TYPE_NONE,
+					     0,
 					     &wim->hdr.integrity,
 					     NULL,
 					     0,
diff --git a/src/lookup_table.c b/src/lookup_table.c
index 553cf33b..118cd065 100644
--- a/src/lookup_table.c
+++ b/src/lookup_table.c
@@ -75,8 +75,10 @@ new_lookup_table_entry(void)
 
 	lte = CALLOC(1, sizeof(struct wim_lookup_table_entry));
 	if (lte) {
-		lte->part_number  = 1;
-		lte->refcnt       = 1;
+		lte->part_number = 1;
+		lte->refcnt = 1;
+		BUILD_BUG_ON(RESOURCE_NONEXISTENT != 0);
+		BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
 	} else {
 		ERROR("Out of memory (tried to allocate %zu bytes for "
 		      "lookup table entry)",
@@ -711,6 +713,7 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list,
 					     WIM_RESHDR_FLAG_METADATA,
 					     out_fd,
 					     WIMLIB_COMPRESSION_TYPE_NONE,
+					     0,
 					     out_res_entry,
 					     NULL,
 					     write_resource_flags,
diff --git a/src/lz77.c b/src/lz77.c
index b9a40174..887b8745 100644
--- a/src/lz77.c
+++ b/src/lz77.c
@@ -107,6 +107,9 @@ insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[],
  * @params:		Parameters that affect how long the search will proceed
  * 				before going with the best that has been found
  * 				so far.
+ * @min_start_pos:	If the chain reaches a match starting before this
+ *			position (including the end-of-chain 0), the search will
+ *			be terminated.
  *
  * Returns the length of the match that was found.
  */
@@ -115,7 +118,8 @@ longest_match(const u8 window[], unsigned bytes_remaining,
 	      unsigned strstart, const input_idx_t prev_tab[],
 	      unsigned cur_match, unsigned prev_len,
 	      unsigned *match_start_ret,
-	      const struct lz_params *params)
+	      const struct lz_params *params,
+	      unsigned min_start_pos)
 {
 	unsigned chain_len = params->max_chain_len;
 
@@ -146,9 +150,8 @@ longest_match(const u8 window[], unsigned bytes_remaining,
 		 * performance reasons.  Therefore uninitialized memory will be
 		 * accessed, and conditional jumps will be made that depend on
 		 * those values.  However the length of the match is limited to
-		 * the lookahead, so the output of deflate is not affected by
-		 * the uninitialized values.
-		 */
+		 * the lookahead, so the output of lz_analyze_block() is not
+		 * affected by the uninitialized values.  */
 
 		if (match[best_len] != scan_end
 		    || match[best_len - 1] != scan_end1
@@ -182,7 +185,7 @@ longest_match(const u8 window[], unsigned bytes_remaining,
 			scan_end1  = scan[best_len - 1];
 			scan_end   = scan[best_len];
 		}
-	} while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0);
+	} while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos);
 	*match_start_ret = match_start;
 	return min(min(best_len, bytes_remaining), params->max_match);
 }
@@ -201,6 +204,7 @@ longest_match(const u8 window[], unsigned bytes_remaining,
  * @params:		Structure that contains parameters that affect how the
  * 				analysis proceeds (mainly how good the matches
  * 				have to be).
+ * @prev_tab:		Temporary space containing least @window_size elements.
  */
 void
 lz_analyze_block(const u8 window[],
@@ -208,7 +212,8 @@ lz_analyze_block(const u8 window[],
 		 lz_record_match_t record_match,
 		 lz_record_literal_t record_literal,
 		 void *record_ctx,
-		 const struct lz_params *params)
+		 const struct lz_params *params,
+		 input_idx_t prev_tab[])
 {
 	unsigned cur_input_pos = 0;
 	unsigned hash          = 0;
@@ -219,7 +224,7 @@ lz_analyze_block(const u8 window[],
 	unsigned match_start   = 0;
 	bool match_available = false;
 	input_idx_t hash_tab[HASH_SIZE];
-	input_idx_t prev_tab[window_size];
+	unsigned min_start_pos = 1;
 
 	ZERO_ARRAY(hash_tab);
 
@@ -245,7 +250,14 @@ lz_analyze_block(const u8 window[],
 		prev_start = match_start;
 		match_len = params->min_match - 1;
 
-		if (hash_head != 0 && prev_len < params->max_lazy_match) {
+		if (cur_input_pos > params->max_offset)
+			min_start_pos = cur_input_pos - params->max_offset;
+		else
+			min_start_pos = 1;
+
+		if (hash_head >= min_start_pos &&
+		    prev_len < params->max_lazy_match)
+		{
 			/* To simplify the code, we prevent matches with the
 			 * string of window index 0 (in particular we have to
 			 * avoid a match of the string with itself at the start
@@ -254,7 +266,8 @@ lz_analyze_block(const u8 window[],
 						  window_size - cur_input_pos,
 						  cur_input_pos, prev_tab,
 						  hash_head, prev_len,
-						  &match_start, params);
+						  &match_start, params,
+						  min_start_pos);
 
 			if (match_len == params->min_match &&
 			     cur_input_pos - match_start > params->too_far)
diff --git a/src/lzx-compress.c b/src/lzx-compress.c
index 9874a7b9..6e1a0d9c 100644
--- a/src/lzx-compress.c
+++ b/src/lzx-compress.c
@@ -2174,6 +2174,7 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx)
 		 * aren't worth choosing when using greedy or lazy parsing.  */
 		.min_match      = 3,
 		.max_match      = LZX_MAX_MATCH_LEN,
+		.max_offset	= 32768,
 		.good_match     = LZX_MAX_MATCH_LEN,
 		.nice_match     = LZX_MAX_MATCH_LEN,
 		.max_chain_len  = LZX_MAX_MATCH_LEN,
@@ -2187,12 +2188,16 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx)
 	record_ctx.matches = ctx->chosen_matches;
 
 	/* Determine series of matches/literals to output.  */
-	lz_analyze_block(ctx->window,
-			 ctx->window_size,
-			 lzx_record_match,
-			 lzx_record_literal,
-			 &record_ctx,
-			 &lzx_lz_params);
+	{
+		input_idx_t prev_tab[ctx->window_size];
+		lz_analyze_block(ctx->window,
+				 ctx->window_size,
+				 lzx_record_match,
+				 lzx_record_literal,
+				 &record_ctx,
+				 &lzx_lz_params,
+				 prev_tab);
+	}
 
 
 	/* Set up block specification.  */
diff --git a/src/metadata_resource.c b/src/metadata_resource.c
index d69ad82c..5ee29664 100644
--- a/src/metadata_resource.c
+++ b/src/metadata_resource.c
@@ -297,6 +297,7 @@ write_metadata_resource(WIMStruct *wim, int image, int write_resource_flags)
 	ret = write_wim_resource_from_buffer(buf, len, WIM_RESHDR_FLAG_METADATA,
 					     &wim->out_fd,
 					     wim->out_compression_type,
+					     wim->out_chunk_size,
 					     &imd->metadata_lte->output_resource_entry,
 					     imd->metadata_lte->hash,
 					     write_resource_flags,
diff --git a/src/ntfs-3g_capture.c b/src/ntfs-3g_capture.c
index c674cca9..8d5d4c63 100644
--- a/src/ntfs-3g_capture.c
+++ b/src/ntfs-3g_capture.c
@@ -78,6 +78,7 @@ int
 read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
 		      u64 size,
 		      consume_data_callback_t cb,
+		      u32 in_chunk_size,
 		      void *ctx_or_buf,
 		      int _ignored_flags)
 {
@@ -88,7 +89,9 @@ read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
 	s64 pos;
 	s64 bytes_remaining;
 	void *out_buf;
+	bool out_buf_malloced;
 	int ret;
+	size_t stack_max = 32768;
 
  	ni = ntfs_pathname_to_inode(vol, NULL, loc->path);
 	if (!ni) {
@@ -103,30 +106,44 @@ read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
 		goto out_close_ntfs_inode;
 	}
 
-	if (cb)
-		out_buf = alloca(WIM_CHUNK_SIZE);
-	else
+	out_buf_malloced = false;
+	if (cb) {
+		if (in_chunk_size <= stack_max) {
+			out_buf = alloca(in_chunk_size);
+		} else {
+			out_buf = MALLOC(in_chunk_size);
+			if (out_buf == NULL) {
+				ret = WIMLIB_ERR_NOMEM;
+				goto out_close_ntfs_attr;
+			}
+			out_buf_malloced = true;
+		}
+	} else {
 		out_buf = ctx_or_buf;
+	}
 	pos = (loc->is_reparse_point) ? 8 : 0;
 	bytes_remaining = size;
 	while (bytes_remaining) {
-		s64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
+		s64 to_read = min(bytes_remaining, in_chunk_size);
 		if (ntfs_attr_pread(na, pos, to_read, out_buf) != to_read) {
 			ERROR_WITH_ERRNO("Error reading \"%"TS"\"", loc->path);
 			ret = WIMLIB_ERR_NTFS_3G;
-			goto out_close_ntfs_attr;
+			goto out_free_memory;
 		}
 		pos += to_read;
 		bytes_remaining -= to_read;
 		if (cb) {
 			ret = cb(out_buf, to_read, ctx_or_buf);
 			if (ret)
-				goto out_close_ntfs_attr;
+				goto out_free_memory;
 		} else {
 			out_buf += to_read;
 		}
 	}
 	ret = 0;
+out_free_memory:
+	if (out_buf_malloced)
+		FREE(out_buf);
 out_close_ntfs_attr:
 	ntfs_attr_close(na);
 out_close_ntfs_inode:
diff --git a/src/resource.c b/src/resource.c
index 001ea024..092669e1 100644
--- a/src/resource.c
+++ b/src/resource.c
@@ -96,10 +96,15 @@ typedef int (*decompress_func_t)(const void *, unsigned, void *, unsigned);
 static decompress_func_t
 get_decompress_func(int ctype)
 {
-	if (ctype == WIMLIB_COMPRESSION_TYPE_LZX)
+	switch (ctype) {
+	case WIMLIB_COMPRESSION_TYPE_LZX:
 		return wimlib_lzx_decompress;
-	else
+	case WIMLIB_COMPRESSION_TYPE_XPRESS:
 		return wimlib_xpress_decompress;
+	default:
+		wimlib_assert(0);
+		return NULL;
+	}
 }
 
 /*
@@ -122,12 +127,19 @@ get_decompress_func(int ctype)
  *	stream and chunk headers.
  */
 static int
-read_compressed_resource(const struct wim_lookup_table_entry *lte,
-			 u64 size, consume_data_callback_t cb,
-			 void *ctx_or_buf, int flags, u64 offset)
+read_compressed_resource(const struct wim_lookup_table_entry * const lte,
+			 u64 size, const consume_data_callback_t cb,
+			 const u32 in_chunk_size, void * const ctx_or_buf,
+			 const int flags, const u64 offset)
 {
 	int ret;
 
+	const u32 orig_chunk_size = wim_resource_chunk_size(lte);
+	const u32 orig_chunk_order = bsr32(orig_chunk_size);
+
+	wimlib_assert(is_power_of_2(orig_chunk_size));
+	wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
+
 	/* Currently, reading raw compressed chunks is only guaranteed to work
 	 * correctly when the full resource is requested.  Furthermore, in such
 	 * cases the requested size is specified as the compressed size, but
@@ -136,6 +148,7 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 	if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 		wimlib_assert(offset == 0);
 		wimlib_assert(size == lte->resource_entry.size);
+		wimlib_assert(wim_resource_chunk_size(lte) == in_chunk_size);
 		size = wim_resource_size(lte);
 	}
 
@@ -145,35 +158,45 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 	if (size == 0)
 		return 0;
 
+	u64 *chunk_offsets = NULL;
+	u8 *out_buf = NULL;
+	u8 *tmp_buf = NULL;
+	void *compressed_buf = NULL;
+	bool chunk_offsets_malloced = false;
+	bool out_buf_malloced = false;
+	bool tmp_buf_malloced = false;
+	bool compressed_buf_malloced = false;
+	const size_t stack_max = 32768;
+
 	/* Get the appropriate decompression function.  */
-	decompress_func_t decompress =
+	const decompress_func_t decompress =
 			get_decompress_func(wim_resource_compression_type(lte));
 
 	/* Get the file descriptor for the WIM.  */
-	struct filedes *in_fd = &lte->wim->in_fd;
+	struct filedes * const in_fd = &lte->wim->in_fd;
 
 	/* Calculate the number of chunks the resource is divided into.  */
-	u64 num_chunks = wim_resource_chunks(lte);
+	const u64 num_chunks = wim_resource_chunks(lte);
 
 	/* Calculate the number of entries in the chunk table; it's one less
 	 * than the number of chunks, since the first chunk has no entry.  */
-	u64 num_chunk_entries = num_chunks - 1;
+	const u64 num_chunk_entries = num_chunks - 1;
 
 	/* Calculate the 0-based index of the chunk at which the read starts.
 	 */
-	u64 start_chunk = offset / WIM_CHUNK_SIZE;
+	const u64 start_chunk = offset >> orig_chunk_order;
 
 	/* Calculate the offset, within the start chunk, of the first byte of
 	 * the read.  */
-	u64 start_offset_in_chunk = offset % WIM_CHUNK_SIZE;
+	const u32 start_offset_in_chunk = offset & (orig_chunk_size - 1);
 
 	/* Calculate the index of the chunk that contains the last byte of the
 	 * read.  */
-	u64 end_chunk = (offset + size - 1) / WIM_CHUNK_SIZE;
+	const u64 end_chunk = (offset + size - 1) >> orig_chunk_order;
 
 	/* Calculate the offset, within the end chunk, of the last byte of the
 	 * read.  */
-	u64 end_offset_in_chunk = (offset + size - 1) % WIM_CHUNK_SIZE;
+	const u32 end_offset_in_chunk = (offset + size - 1) & (orig_chunk_size - 1);
 
 	/* Calculate the number of chunk entries are actually needed to read the
 	 * requested part of the resource.  Include an entry for the first chunk
@@ -181,28 +204,25 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 	 * account that if the last chunk required for the read is not the last
 	 * chunk of the resource, an extra chunk entry is needed so that the
 	 * compressed size of the last chunk of the read can be determined.  */
-	u64 num_alloc_chunk_entries = end_chunk - start_chunk + 1;
-	if (end_chunk != num_chunks - 1)
-		num_alloc_chunk_entries++;
+	const u64 num_alloc_chunk_entries = end_chunk - start_chunk +
+					    1 + (end_chunk != num_chunks - 1);
 
 	/* Set the size of each chunk table entry based on the resource's
 	 * uncompressed size.  */
-	u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
+	const u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
 
 	/* Calculate the size, in bytes, of the full chunk table.  */
-	u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
+	const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
 
 	/* Allocate a buffer to hold a subset of the chunk table.  It will only
 	 * contain offsets for the chunks that are actually needed for this
 	 * read.  For speed, allocate the buffer on the stack unless it's too
 	 * large.  */
-	u64 *chunk_offsets;
-	bool chunk_offsets_malloced;
-	if (num_alloc_chunk_entries < 1024) {
+	if (num_alloc_chunk_entries <= stack_max) {
 		chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
 		chunk_offsets_malloced = false;
 	} else {
-		chunk_offsets = malloc(num_alloc_chunk_entries * sizeof(u64));
+		chunk_offsets = MALLOC(num_alloc_chunk_entries * sizeof(u64));
 		if (!chunk_offsets) {
 			ERROR("Failed to allocate chunk table "
 			      "with %"PRIu64" entries", num_alloc_chunk_entries);
@@ -217,40 +237,38 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 		chunk_offsets[0] = 0;
 
 	/* Calculate the index of the first needed entry in the chunk table.  */
-	u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
+	const u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
 
 	/* Calculate the number of entries that need to be read from the chunk
 	 * table.  */
-	u64 num_needed_chunk_entries = (start_chunk == 0) ?
+	const u64 num_needed_chunk_entries = (start_chunk == 0) ?
 				num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
 
 	/* Calculate the number of bytes of data that need to be read from the
 	 * chunk table.  */
-	size_t chunk_table_needed_size =
+	const size_t chunk_table_needed_size =
 				num_needed_chunk_entries * chunk_entry_size;
 	if ((u64)chunk_table_needed_size !=
 	    num_needed_chunk_entries * chunk_entry_size)
 	{
 		ERROR("Compressed read request too large to fit into memory!");
 		ret = WIMLIB_ERR_NOMEM;
-		goto out_free_chunk_offsets;
+		goto out_free_memory;
 	}
 
 	/* Calculate the byte offset, in the WIM file, of the first chunk table
 	 * entry to read.  Take into account that if the WIM file is in the
 	 * special "pipable" format, then the chunk table is at the end of the
 	 * resource, not the beginning.  */
-	u64 file_offset_of_needed_chunk_entries =
-			lte->resource_entry.offset + (start_table_idx *
-						      chunk_entry_size);
-	if (lte->is_pipable)
-		file_offset_of_needed_chunk_entries += lte->resource_entry.size -
-						       chunk_table_size;
+	const u64 file_offset_of_needed_chunk_entries =
+		lte->resource_entry.offset
+		+ (start_table_idx * chunk_entry_size)
+		+ (lte->is_pipable ? (lte->resource_entry.size - chunk_table_size) : 0);
 
 	/* Read the needed chunk table entries into the end of the chunk_offsets
 	 * buffer.  */
-	void *chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
-				chunk_table_needed_size;
+	void * const chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
+				      chunk_table_needed_size;
 	ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
 			 file_offset_of_needed_chunk_entries);
 	if (ret)
@@ -266,12 +284,9 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 	{
 		typedef le64 __attribute__((may_alias)) aliased_le64_t;
 		typedef le32 __attribute__((may_alias)) aliased_le32_t;
-		u64 *chunk_offsets_p = chunk_offsets;
+		u64 * const chunk_offsets_p = chunk_offsets + (start_chunk == 0);
 		u64 i;
 
-		if (start_chunk == 0)
-			chunk_offsets_p++;
-
 		if (chunk_entry_size == 4) {
 			aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
 			for (i = 0; i < num_needed_chunk_entries; i++)
@@ -283,32 +298,74 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 		}
 	}
 
-	/* Calculate file offset of the first chunk that needs to be read.  N.B.
-	 * if the resource is pipable, the entries in the chunk table do *not*
-	 * include the chunk headers.  */
+	/* Calculate file offset of the first chunk that needs to be read.
+	 * Note: if the resource is pipable, the entries in the chunk table do
+	 * *not* include the chunk headers.  */
 	u64 cur_read_offset = lte->resource_entry.offset + chunk_offsets[0];
 	if (!lte->is_pipable)
 		cur_read_offset += chunk_table_size;
 	else
-		cur_read_offset += start_chunk *
-				   sizeof(struct pwm_chunk_hdr);
+		cur_read_offset += start_chunk * sizeof(struct pwm_chunk_hdr);
 
 	/* If using a callback function, allocate a temporary buffer that will
 	 * be used to pass data to it.  If writing directly to a buffer instead,
 	 * arrange to write data directly into it.  */
-	u8 *out_p;
-	if (cb)
-		out_p = alloca(WIM_CHUNK_SIZE);
-	else
-		out_p = ctx_or_buf;
+	size_t out_buf_size;
+	u8 *out_buf_end, *out_p;
+	if (cb) {
+		out_buf_size = max(in_chunk_size, orig_chunk_size);
+		if (out_buf_size <= stack_max) {
+			out_buf = alloca(out_buf_size);
+		} else {
+			out_buf = MALLOC(out_buf_size);
+			if (out_buf == NULL) {
+				ret = WIMLIB_ERR_NOMEM;
+				goto out_free_memory;
+			}
+			out_buf_malloced = true;
+		}
+	} else {
+		out_buf_size = size;
+		out_buf = ctx_or_buf;
+	}
+	out_buf_end = out_buf + out_buf_size;
+	out_p = out_buf;
 
 	/* Unless the raw compressed data was requested, allocate a temporary
 	 * buffer for reading compressed chunks, each of which can be at most
-	 * WIM_CHUNK_SIZE - 1 bytes.  This excludes compressed chunks that are a
-	 * full WIM_CHUNK_SIZE bytes, which are handled separately.  */
-	void *compressed_buf;
-	if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
-		compressed_buf = alloca(WIM_CHUNK_SIZE - 1);
+	 * orig_chunk_size - 1 bytes.  This excludes compressed chunks that are
+	 * a full orig_chunk_size bytes, which are actually stored uncompressed.
+	 */
+	if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)) {
+		if (orig_chunk_size - 1 <= stack_max) {
+			compressed_buf = alloca(orig_chunk_size - 1);
+		} else {
+			compressed_buf = MALLOC(orig_chunk_size - 1);
+			if (compressed_buf == NULL) {
+				ret = WIMLIB_ERR_NOMEM;
+				goto out_free_memory;
+			}
+			compressed_buf_malloced = true;
+		}
+	}
+
+	/* Allocate yet another temporary buffer, this one for reading partial
+	 * chunks.  */
+	if (start_offset_in_chunk != 0 ||
+	    (end_offset_in_chunk != orig_chunk_size - 1 &&
+	     offset + size != wim_resource_size(lte)))
+	{
+		if (orig_chunk_size <= stack_max) {
+			tmp_buf = alloca(orig_chunk_size);
+		} else {
+			tmp_buf = MALLOC(orig_chunk_size);
+			if (tmp_buf == NULL) {
+				ret = WIMLIB_ERR_NOMEM;
+				goto out_free_memory;
+			}
+			tmp_buf_malloced = true;
+		}
+	}
 
 	/* Read, and possibly decompress, each needed chunk, either writing the
 	 * data directly into the @ctx_or_buf buffer or passing it to the @cb
@@ -321,15 +378,15 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 
 		/* Calculate the sizes of the compressed chunk and of the
 		 * uncompressed chunk.  */
-		unsigned compressed_chunk_size;
-		unsigned uncompressed_chunk_size;
+		u32 compressed_chunk_size;
+		u32 uncompressed_chunk_size;
 		if (i != num_chunks - 1) {
 			/* Not the last chunk.  Compressed size is given by
 			 * difference of chunk table entries; uncompressed size
-			 * is always 32768 bytes.  */
+			 * is always the WIM chunk size.  */
 			compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
 						chunk_offsets[i - start_chunk];
-			uncompressed_chunk_size = WIM_CHUNK_SIZE;
+			uncompressed_chunk_size = orig_chunk_size;
 		} else {
 			/* Last chunk.  Compressed size is the remaining size in
 			 * the compressed resource; uncompressed size is the
@@ -341,18 +398,18 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 				compressed_chunk_size -= num_chunks *
 							 sizeof(struct pwm_chunk_hdr);
 
-			if (wim_resource_size(lte) % WIM_CHUNK_SIZE == 0)
-				uncompressed_chunk_size = WIM_CHUNK_SIZE;
+			if ((wim_resource_size(lte) & (orig_chunk_size - 1)) == 0)
+				uncompressed_chunk_size = orig_chunk_size;
 			else
-				uncompressed_chunk_size = wim_resource_size(lte) %
-							  WIM_CHUNK_SIZE;
+				uncompressed_chunk_size = wim_resource_size(lte) &
+							  (orig_chunk_size - 1);
 		}
 
 		/* Calculate how much of this chunk needs to be read.  */
 
-		unsigned partial_chunk_size;
-		u64 start_offset = 0;
-		u64 end_offset = WIM_CHUNK_SIZE - 1;
+		u32 partial_chunk_size;
+		u32 start_offset = 0;
+		u32 end_offset = orig_chunk_size - 1;
 
 		if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 			partial_chunk_size = compressed_chunk_size;
@@ -371,13 +428,14 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 		{
 			/* Chunk stored uncompressed, or reading raw chunk data.  */
 			ret = full_pread(in_fd,
-					 cb ? out_p + start_offset : out_p,
+					 out_p,
 					 partial_chunk_size,
 					 cur_read_offset + start_offset);
 			if (ret)
 				goto read_error;
 		} else {
 			/* Compressed chunk and not doing raw read.  */
+			u8 *target;
 
 			/* Read the compressed data into compressed_buf.  */
 			ret = full_pread(in_fd,
@@ -387,64 +445,72 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
 			if (ret)
 				goto read_error;
 
-			/* For partial chunks and when writing directly to a
-			 * buffer, we must buffer the uncompressed data because
-			 * we don't need all of it.  */
-			if (partial_chunk_size != uncompressed_chunk_size &&
-			    cb == NULL)
-			{
-				u8 uncompressed_buf[uncompressed_chunk_size];
-
-				ret = (*decompress)(compressed_buf,
-						    compressed_chunk_size,
-						    uncompressed_buf,
-						    uncompressed_chunk_size);
-				if (ret) {
-					ERROR("Failed to decompress data.");
-					ret = WIMLIB_ERR_DECOMPRESSION;
-					errno = EINVAL;
-					goto out_free_chunk_offsets;
-				}
-				memcpy(out_p, uncompressed_buf + start_offset,
-				       partial_chunk_size);
-			} else {
-				ret = (*decompress)(compressed_buf,
-						    compressed_chunk_size,
-						    out_p,
-						    uncompressed_chunk_size);
-				if (ret) {
-					ERROR("Failed to decompress data.");
-					ret = WIMLIB_ERR_DECOMPRESSION;
-					errno = EINVAL;
-					goto out_free_chunk_offsets;
-				}
+			/* For partial chunks we must buffer the uncompressed
+			 * data because we don't need all of it.  */
+			if (partial_chunk_size == uncompressed_chunk_size)
+				target = out_p;
+			else
+				target = tmp_buf;
+
+			/* Decompress the chunk.  */
+			ret = (*decompress)(compressed_buf,
+					    compressed_chunk_size,
+					    target,
+					    uncompressed_chunk_size);
+			if (ret) {
+				ERROR("Failed to decompress data.");
+				ret = WIMLIB_ERR_DECOMPRESSION;
+				errno = EINVAL;
+				goto out_free_memory;
 			}
+			if (partial_chunk_size != uncompressed_chunk_size)
+				memcpy(out_p, tmp_buf + start_offset,
+				       partial_chunk_size);
 		}
+
+		out_p += partial_chunk_size;
+
 		if (cb) {
 			/* Feed the data to the callback function.  */
-			ret = cb(out_p + start_offset,
-				 partial_chunk_size, ctx_or_buf);
-			if (ret)
-				goto out_free_chunk_offsets;
-		} else {
-			/* No callback function provided; we are writing
-			 * directly to a buffer.  Advance the pointer into this
-			 * buffer by the number of uncompressed bytes that were
-			 * written.  */
-			out_p += partial_chunk_size;
+			wimlib_assert(offset == 0);
+
+			if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
+				ret = cb(out_buf, out_p - out_buf, ctx_or_buf);
+				if (ret)
+					goto out_free_memory;
+				out_p = out_buf;
+
+			} else if (i == end_chunk || out_p == out_buf_end) {
+				size_t bytes_sent;
+				const u8 *p;
+
+				for (p = out_buf; p != out_p; p += bytes_sent) {
+					bytes_sent = min(in_chunk_size, out_p - p);
+					ret = cb(p, bytes_sent, ctx_or_buf);
+					if (ret)
+						goto out_free_memory;
+				}
+				out_p = out_buf;
+			}
 		}
 		cur_read_offset += compressed_chunk_size;
 	}
 
 	ret = 0;
-out_free_chunk_offsets:
+out_free_memory:
 	if (chunk_offsets_malloced)
 		FREE(chunk_offsets);
+	if (out_buf_malloced)
+		FREE(out_buf);
+	if (compressed_buf_malloced)
+		FREE(compressed_buf);
+	if (tmp_buf_malloced)
+		FREE(tmp_buf);
 	return ret;
 
 read_error:
 	ERROR_WITH_ERRNO("Error reading compressed file resource");
-	goto out_free_chunk_offsets;
+	goto out_free_memory;
 }
 
 /* Skip over the chunk table at the end of pipable, compressed resource being
@@ -473,13 +539,26 @@ skip_chunk_table(const struct wim_lookup_table_entry *lte,
 static int
 read_pipable_resource(const struct wim_lookup_table_entry *lte,
 		      u64 size, consume_data_callback_t cb,
-		      void *ctx_or_buf, int flags, u64 offset)
+		      u32 in_chunk_size, void *ctx_or_buf,
+		      int flags, u64 offset)
 {
 	struct filedes *in_fd;
 	decompress_func_t decompress;
 	int ret;
-	u8 chunk[WIM_CHUNK_SIZE];
-	u8 cchunk[WIM_CHUNK_SIZE - 1];
+	const u32 orig_chunk_size = wim_resource_chunk_size(lte);
+	u8 cchunk[orig_chunk_size - 1];
+
+	size_t out_buf_size;
+	u8 *out_buf, *out_buf_end, *out_p;
+	if (cb) {
+		out_buf_size = max(in_chunk_size, orig_chunk_size);
+		out_buf = alloca(out_buf_size);
+	} else {
+		out_buf_size = size;
+		out_buf = ctx_or_buf;
+	}
+	out_buf_end = out_buf + out_buf_size;
+	out_p = out_buf;
 
 	/* Get pointers to appropriate decompression function and the input file
 	 * descriptor.  */
@@ -495,15 +574,13 @@ read_pipable_resource(const struct wim_lookup_table_entry *lte,
 	wimlib_assert(size == wim_resource_size(lte));
 	wimlib_assert(in_fd->offset == lte->resource_entry.offset);
 
-	for (offset = 0; offset < size; offset += WIM_CHUNK_SIZE) {
+	u32 chunk_usize;
+	for (offset = 0; offset < size; offset += chunk_usize) {
 		struct pwm_chunk_hdr chunk_hdr;
-		u32 chunk_size;
-		u32 cchunk_size;
-		u8 *res_chunk;
-		u32 res_chunk_size;
+		u32 chunk_csize;
 
 		/* Calculate uncompressed size of next chunk.  */
-		chunk_size = min(WIM_CHUNK_SIZE, size - offset);
+		chunk_usize = min(orig_chunk_size, size - offset);
 
 		/* Read the compressed size of the next chunk from the chunk
 		 * header.  */
@@ -511,16 +588,16 @@ read_pipable_resource(const struct wim_lookup_table_entry *lte,
 		if (ret)
 			goto read_error;
 
-		cchunk_size = le32_to_cpu(chunk_hdr.compressed_size);
+		chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
 
-		if (cchunk_size > WIM_CHUNK_SIZE) {
+		if (chunk_csize > orig_chunk_size) {
 			errno = EINVAL;
 			ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
 			goto invalid;
 		}
 
 		/* Read chunk data.  */
-		ret = full_read(in_fd, cchunk, cchunk_size);
+		ret = full_read(in_fd, cchunk, chunk_csize);
 		if (ret)
 			goto read_error;
 
@@ -530,29 +607,34 @@ read_pipable_resource(const struct wim_lookup_table_entry *lte,
 		/* Decompress chunk if needed.  Uncompressed size same
 		 * as compressed size means the chunk is uncompressed.
 		 */
-		res_chunk_size = chunk_size;
-		if (cchunk_size == chunk_size) {
-			res_chunk = cchunk;
+		if (chunk_csize == chunk_usize) {
+			memcpy(out_p, cchunk, chunk_usize);
 		} else {
-			ret = (*decompress)(cchunk, cchunk_size,
-					    chunk, chunk_size);
+			ret = (*decompress)(cchunk, chunk_csize,
+					    out_p, chunk_usize);
 			if (ret) {
 				errno = EINVAL;
 				ret = WIMLIB_ERR_DECOMPRESSION;
 				goto invalid;
 			}
-			res_chunk = chunk;
 		}
+		out_p += chunk_usize;
 
 		/* Feed the uncompressed data into the callback function or copy
 		 * it into the provided buffer.  */
-		if (cb) {
-			ret = cb(res_chunk, res_chunk_size, ctx_or_buf);
-			if (ret)
-				return ret;
-		} else {
-			ctx_or_buf = mempcpy(ctx_or_buf, res_chunk,
-					     res_chunk_size);
+		if (cb && (out_p == out_buf_end ||
+			   offset + chunk_usize == size))
+		{
+			size_t bytes_sent;
+			const u8 *p;
+
+			for (p = out_buf; p != out_p; p += bytes_sent) {
+				bytes_sent = min(in_chunk_size, out_p - p);
+				ret = cb(p, bytes_sent, ctx_or_buf);
+				if (ret)
+					return ret;
+			}
+			out_p = out_buf;
 		}
 	}
 
@@ -631,6 +713,7 @@ invalid:
 int
 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
 			  u64 size, consume_data_callback_t cb,
+			  u32 in_chunk_size,
 			  void *ctx_or_buf, int flags, u64 offset)
 {
 	struct filedes *in_fd;
@@ -640,6 +723,13 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
 	 * somewhere else.  */
 	wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
 
+	/* If a callback was specified, in_chunk_size must be a power of 2 (and
+	 * not 0).  */
+	wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
+
+	/* If a callback was specified, offset must be zero.  */
+	wimlib_assert(cb == NULL || offset == 0);
+
 	/* Retrieve input file descriptor for the WIM file.  */
 	in_fd = &lte->wim->in_fd;
 
@@ -678,10 +768,9 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
 			}
 		} else if (cb) {
 			/* Send data to callback function */
-			u8 buf[min(WIM_CHUNK_SIZE, size)];
+			u8 buf[min(in_chunk_size, size)];
 			while (size) {
-				size_t bytes_to_read = min(WIM_CHUNK_SIZE,
-							   size);
+				size_t bytes_to_read = min(in_chunk_size, size);
 				ret = full_pread(in_fd, buf, bytes_to_read,
 						 offset);
 				if (ret)
@@ -702,11 +791,13 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
 	} else if (lte->is_pipable && !filedes_is_seekable(in_fd)) {
 		/* Reading compressed, pipable resource from pipe.  */
 		ret = read_pipable_resource(lte, size, cb,
+					    in_chunk_size,
 					    ctx_or_buf, flags, offset);
 	} else {
 		/* Reading compressed, possibly pipable resource from seekable
 		 * file.  */
 		ret = read_compressed_resource(lte, size, cb,
+					       in_chunk_size,
 					       ctx_or_buf, flags, offset);
 	}
 	goto out;
@@ -722,17 +813,19 @@ int
 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
 				   size_t size, u64 offset, void *buf)
 {
-	return read_partial_wim_resource(lte, size, NULL, buf, 0, offset);
+	return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
 }
 
 static int
 read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
 			 u64 size,
 			 consume_data_callback_t cb,
+			 u32 in_chunk_size,
 			 void *ctx_or_buf,
 			 int flags)
 {
-	return read_partial_wim_resource(lte, size, cb, ctx_or_buf, flags, 0);
+	return read_partial_wim_resource(lte, size, cb, in_chunk_size,
+					 ctx_or_buf, flags, 0);
 }
 
 
@@ -741,6 +834,7 @@ static int
 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
 			 u64 size,
 			 consume_data_callback_t cb,
+			 u32 in_chunk_size,
 			 void *ctx_or_buf,
 			 int _ignored_flags)
 {
@@ -748,6 +842,9 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
 	int ret;
 	struct filedes fd;
 	int raw_fd;
+	u8 *out_buf;
+	bool out_buf_malloced;
+	const size_t stack_max = 32768;
 
 	DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"",
 	      size, lte->file_on_disk);
@@ -758,16 +855,27 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
 		return WIMLIB_ERR_OPEN;
 	}
 	filedes_init(&fd, raw_fd);
+	out_buf_malloced = false;
 	if (cb) {
 		/* Send data to callback function */
-		u8 buf[min(WIM_CHUNK_SIZE, size)];
+		if (in_chunk_size <= stack_max) {
+			out_buf = alloca(in_chunk_size);
+		} else {
+			out_buf = MALLOC(in_chunk_size);
+			if (out_buf == NULL) {
+				ret = WIMLIB_ERR_NOMEM;
+				goto out_close;
+			}
+			out_buf_malloced = true;
+		}
+
 		size_t bytes_to_read;
 		while (size) {
-			bytes_to_read = min(WIM_CHUNK_SIZE, size);
-			ret = full_read(&fd, buf, bytes_to_read);
+			bytes_to_read = min(in_chunk_size, size);
+			ret = full_read(&fd, out_buf, bytes_to_read);
 			if (ret)
 				goto read_error;
-			ret = cb(buf, bytes_to_read, ctx_or_buf);
+			ret = cb(out_buf, bytes_to_read, ctx_or_buf);
 			if (ret)
 				goto out_close;
 			size -= bytes_to_read;
@@ -785,6 +893,8 @@ read_error:
 	ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
 out_close:
 	filedes_close(&fd);
+	if (out_buf_malloced)
+		FREE(out_buf);
 	return ret;
 }
 #endif /* !__WIN32__ */
@@ -792,22 +902,23 @@ out_close:
 static int
 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
 		   u64 size, consume_data_callback_t cb,
+		   u32 in_chunk_size,
 		   void *ctx_or_buf, int _ignored_flags)
 {
-	const void *inbuf = lte->attached_buffer;
-	int ret;
 
 	if (cb) {
-		while (size) {
-			size_t chunk_size = min(WIM_CHUNK_SIZE, size);
-			ret = cb(inbuf, chunk_size, ctx_or_buf);
+		int ret;
+		u32 chunk_size;
+
+		for (u64 offset = 0; offset < size; offset += chunk_size) {
+			chunk_size = min(in_chunk_size, size - offset);
+			ret = cb((const u8*)lte->attached_buffer + offset,
+				 chunk_size, ctx_or_buf);
 			if (ret)
 				return ret;
-			size -= chunk_size;
-			inbuf += chunk_size;
 		}
 	} else {
-		memcpy(ctx_or_buf, inbuf, size);
+		memcpy(ctx_or_buf, lte->attached_buffer, size);
 	}
 	return 0;
 }
@@ -815,6 +926,7 @@ read_buffer_prefix(const struct wim_lookup_table_entry *lte,
 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
 					      u64 size,
 					      consume_data_callback_t cb,
+					      u32 in_chunk_size,
 					      void *ctx_or_buf,
 					      int flags);
 
@@ -837,8 +949,8 @@ typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entr
  */
 int
 read_resource_prefix(const struct wim_lookup_table_entry *lte,
-		     u64 size, consume_data_callback_t cb, void *ctx_or_buf,
-		     int flags)
+		     u64 size, consume_data_callback_t cb, u32 in_chunk_size,
+		     void *ctx_or_buf, int flags)
 {
 	static const read_resource_prefix_handler_t handlers[] = {
 		[RESOURCE_IN_WIM]             = read_wim_resource_prefix,
@@ -860,14 +972,15 @@ read_resource_prefix(const struct wim_lookup_table_entry *lte,
 	};
 	wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
 		      && handlers[lte->resource_location] != NULL);
-	return handlers[lte->resource_location](lte, size, cb, ctx_or_buf, flags);
+	wimlib_assert(cb == NULL || in_chunk_size > 0);
+	return handlers[lte->resource_location](lte, size, cb, in_chunk_size, ctx_or_buf, flags);
 }
 
 int
 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
 			    void *buf)
 {
-	return read_resource_prefix(lte, wim_resource_size(lte), NULL, buf, 0);
+	return read_resource_prefix(lte, wim_resource_size(lte), NULL, 0, buf, 0);
 }
 
 int
@@ -957,6 +1070,7 @@ extract_wim_resource(const struct wim_lookup_table_entry *lte,
 		sha1_init(&ctx.sha_ctx);
 		ret = read_resource_prefix(lte, size,
 					   extract_chunk_sha1_wrapper,
+					   wim_resource_chunk_size(lte),
 					   &ctx, 0);
 		if (ret == 0) {
 			u8 hash[SHA1_HASH_SIZE];
@@ -975,6 +1089,7 @@ extract_wim_resource(const struct wim_lookup_table_entry *lte,
 	} else {
 		/* Don't do SHA1 */
 		ret = read_resource_prefix(lte, size, extract_chunk,
+					   wim_resource_chunk_size(lte),
 					   extract_chunk_arg, 0);
 	}
 	return ret;
@@ -1014,7 +1129,8 @@ sha1_resource(struct wim_lookup_table_entry *lte)
 
 	sha1_init(&sha_ctx);
 	ret = read_resource_prefix(lte, wim_resource_size(lte),
-				   sha1_chunk, &sha_ctx, 0);
+				   sha1_chunk, wim_resource_chunk_size(lte),
+				   &sha_ctx, 0);
 	if (ret == 0)
 		sha1_final(lte->hash, &sha_ctx);
 	return ret;
diff --git a/src/util.c b/src/util.c
index fd14e1fe..262fa1e1 100644
--- a/src/util.c
+++ b/src/util.c
@@ -303,8 +303,7 @@ static const tchar *error_strings[] = {
 	[WIMLIB_ERR_INVALID_CAPTURE_CONFIG]
 		= T("The capture configuration string was invalid"),
 	[WIMLIB_ERR_INVALID_CHUNK_SIZE]
-		= T("The WIM is compressed but does not have a chunk "
-			"size of 32768"),
+		= T("The WIM chunk size was invalid"),
 	[WIMLIB_ERR_INVALID_COMPRESSION_TYPE]
 		= T("The WIM is compressed, but is not marked as having LZX or "
 			"XPRESS compression"),
diff --git a/src/wim.c b/src/wim.c
index 2a6ac129..63459751 100644
--- a/src/wim.c
+++ b/src/wim.c
@@ -75,6 +75,62 @@ new_wim_struct(void)
 	return wim;
 }
 
+static bool
+wim_chunk_size_valid(u32 chunk_size, int ctype)
+{
+	u32 order;
+
+	/* Chunk size is meaningless for uncompressed WIMs --- any value is
+	 * okay.  */
+	if (ctype == WIMLIB_COMPRESSION_TYPE_NONE)
+		return true;
+
+	/* Chunk size must be power of 2.  */
+	if (chunk_size == 0)
+		return false;
+	order = bsr32(chunk_size);
+	if (chunk_size != 1U << order)
+		return false;
+
+	/* Order	Size
+	 * =====	====
+	 * 15		32768
+	 * 16		65536
+	 * 17		131072
+	 * 18		262144
+	 * 19		524288
+	 * 20		1048576
+	 * 21		2097152
+	 * 22		4194304
+	 * 23		8388608
+	 * 24		16777216
+	 * 25		33554432
+	 * 26		67108864
+	 */
+	switch (ctype) {
+	case WIMLIB_COMPRESSION_TYPE_LZX:
+		/* TODO: Allow other chunk sizes when supported by the LZX
+		 * compressor and decompressor.  */
+		return order == 15;
+
+	case WIMLIB_COMPRESSION_TYPE_XPRESS:
+		/* WIMGAPI (Windows 7) didn't seem to support XPRESS chunk size
+		 * below 32768 bytes, but larger power-of-two sizes appear to be
+		 * supported.  67108864 was the largest size that worked.
+		 * (Note, however, that the offsets of XPRESS matches are still
+		 * limited to 65535 bytes even when a much larger chunk size is
+		 * used!)  */
+		return order >= 15 && order <= 26;
+	}
+	return false;
+}
+
+static u32
+wim_default_chunk_size(int ctype)
+{
+	return 32768;
+}
+
 /*
  * Calls a function on images in the WIM.  If @image is WIMLIB_ALL_IMAGES, @visitor
  * is called on the WIM once for each image, with each image selected as the
@@ -127,7 +183,7 @@ wimlib_create_new_wim(int ctype, WIMStruct **wim_ret)
 	if (!wim)
 		return WIMLIB_ERR_NOMEM;
 
-	ret = init_wim_header(&wim->hdr, ctype);
+	ret = init_wim_header(&wim->hdr, ctype, wim_default_chunk_size(ctype));
 	if (ret != 0)
 		goto out_free;
 
@@ -140,6 +196,8 @@ wimlib_create_new_wim(int ctype, WIMStruct **wim_ret)
 	wim->refcnts_ok = 1;
 	wim->compression_type = ctype;
 	wim->out_compression_type = ctype;
+	wim->chunk_size = wim->hdr.chunk_size;
+	wim->out_chunk_size = wim->hdr.chunk_size;
 	*wim_ret = wim;
 	return 0;
 out_free:
@@ -294,7 +352,7 @@ wimlib_get_wim_info(WIMStruct *wim, struct wimlib_wim_info *info)
 	info->image_count = wim->hdr.image_count;
 	info->boot_index = wim->hdr.boot_idx;
 	info->wim_version = WIM_VERSION;
-	info->chunk_size = WIM_CHUNK_SIZE;
+	info->chunk_size = wim->hdr.chunk_size;
 	info->part_number = wim->hdr.part_number;
 	info->total_parts = wim->hdr.total_parts;
 	info->compression_type = wim->compression_type;
@@ -357,6 +415,50 @@ wimlib_set_wim_info(WIMStruct *wim, const struct wimlib_wim_info *info, int whic
 	return 0;
 }
 
+/* API function documented in wimlib.h  */
+WIMLIBAPI int
+wimlib_set_output_compression_type(WIMStruct *wim, int ctype)
+{
+	switch (ctype) {
+	case WIMLIB_COMPRESSION_TYPE_INVALID:
+		break;
+	case WIMLIB_COMPRESSION_TYPE_NONE:
+	case WIMLIB_COMPRESSION_TYPE_LZX:
+	case WIMLIB_COMPRESSION_TYPE_XPRESS:
+		wim->out_compression_type = ctype;
+
+		/* Reset the chunk size if it's no longer valid.  */
+		if (!wim_chunk_size_valid(wim->out_chunk_size,
+					  wim->out_compression_type))
+			wim->out_chunk_size = wim_default_chunk_size(wim->out_compression_type);
+		return 0;
+	}
+	return WIMLIB_ERR_INVALID_PARAM;
+}
+
+/* API function documented in wimlib.h  */
+WIMLIBAPI int
+wimlib_set_output_chunk_size(WIMStruct *wim, uint32_t chunk_size)
+{
+	if (!wim_chunk_size_valid(chunk_size, wim->out_compression_type)) {
+		ERROR("Invalid chunk size (%"PRIu32" bytes) "
+		      "for compression type %"TS"!",
+		      chunk_size,
+		      wimlib_get_compression_type_string(wim->out_compression_type));
+		switch (wim->out_compression_type) {
+		case WIMLIB_COMPRESSION_TYPE_XPRESS:
+			ERROR("Valid chunk sizes for XPRESS are 32768, 65536, 131072, ..., 67108864.");
+			break;
+		case WIMLIB_COMPRESSION_TYPE_LZX:
+			ERROR("Valid chunk sizes for XPRESS are 65536.");
+			break;
+		}
+		return WIMLIB_ERR_INVALID_CHUNK_SIZE;
+	}
+	wim->out_chunk_size = chunk_size;
+	return 0;
+}
+
 static int
 do_open_wim(const tchar *filename, struct filedes *fd_ret)
 {
@@ -485,6 +587,16 @@ begin_read(WIMStruct *wim, const void *wim_filename_or_fd,
 	}
 	wim->out_compression_type = wim->compression_type;
 
+	/* Check and cache the chunk size.  */
+	wim->chunk_size = wim->out_chunk_size = wim->hdr.chunk_size;
+	if (!wim_chunk_size_valid(wim->chunk_size, wim->compression_type)) {
+		ERROR("Invalid chunk size (%"PRIu32" bytes) "
+		      "for compression type %"TS"!",
+		      wim->chunk_size,
+		      wimlib_get_compression_type_string(wim->compression_type));
+		return WIMLIB_ERR_INVALID_CHUNK_SIZE;
+	}
+
 	if (open_flags & WIMLIB_OPEN_FLAG_CHECK_INTEGRITY) {
 		ret = check_wim_integrity(wim, progress_func);
 		if (ret == WIM_INTEGRITY_NONEXISTENT) {
diff --git a/src/win32_capture.c b/src/win32_capture.c
index 308c0677..f940179c 100644
--- a/src/win32_capture.c
+++ b/src/win32_capture.c
@@ -58,12 +58,15 @@ int
 read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
 		       u64 size,
 		       consume_data_callback_t cb,
+		       u32 in_chunk_size,
 		       void *ctx_or_buf,
 		       int _ignored_flags)
 {
 	int ret = 0;
 	void *out_buf;
+	bool out_buf_malloced;
 	u64 bytes_remaining;
+	const size_t stack_max = 32768;
 
 	HANDLE hFile = win32_open_existing_file(lte->file_on_disk,
 						FILE_READ_DATA);
@@ -73,16 +76,27 @@ read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
 		return WIMLIB_ERR_OPEN;
 	}
 
-	if (cb)
-		out_buf = alloca(WIM_CHUNK_SIZE);
-	else
+	out_buf_malloced = false;
+	if (cb) {
+		if (in_chunk_size <= stack_max) {
+			out_buf = alloca(in_chunk_size);
+		} else {
+			out_buf = MALLOC(in_chunk_size);
+			if (out_buf == NULL) {
+				ret = WIMLIB_ERR_NOMEM;
+				goto out_close_handle;
+			}
+			out_buf_malloced = true;
+		}
+	} else {
 		out_buf = ctx_or_buf;
+	}
 
 	bytes_remaining = size;
 	while (bytes_remaining) {
 		DWORD bytesToRead, bytesRead;
 
-		bytesToRead = min(WIM_CHUNK_SIZE, bytes_remaining);
+		bytesToRead = min(in_chunk_size, bytes_remaining);
 		if (!ReadFile(hFile, out_buf, bytesToRead, &bytesRead, NULL) ||
 		    bytesRead != bytesToRead)
 		{
@@ -101,6 +115,9 @@ read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
 			out_buf += bytesRead;
 		}
 	}
+	if (out_buf_malloced)
+		FREE(out_buf);
+out_close_handle:
 	CloseHandle(hFile);
 	return ret;
 }
@@ -112,6 +129,7 @@ struct win32_encrypted_read_ctx {
 	void *buf;
 	size_t buf_filled;
 	u64 bytes_remaining;
+	u32 in_chunk_size;
 };
 
 static DWORD WINAPI
@@ -119,6 +137,7 @@ win32_encrypted_export_cb(unsigned char *_data, void *_ctx, unsigned long len)
 {
 	const void *data = _data;
 	struct win32_encrypted_read_ctx *ctx = _ctx;
+	u32 in_chunk_size = ctx->in_chunk_size;
 	int ret;
 
 	DEBUG("len = %lu", len);
@@ -130,7 +149,7 @@ win32_encrypted_export_cb(unsigned char *_data, void *_ctx, unsigned long len)
 					     len);
 		while (bytes_to_buffer) {
 			size_t bytes_to_copy_to_buf =
-				min(bytes_to_buffer, WIM_CHUNK_SIZE - ctx->buf_filled);
+				min(bytes_to_buffer, in_chunk_size - ctx->buf_filled);
 
 			memcpy(ctx->buf + ctx->buf_filled, data,
 			       bytes_to_copy_to_buf);
@@ -138,7 +157,7 @@ win32_encrypted_export_cb(unsigned char *_data, void *_ctx, unsigned long len)
 			data += bytes_to_copy_to_buf;
 			bytes_to_buffer -= bytes_to_copy_to_buf;
 
-			if (ctx->buf_filled == WIM_CHUNK_SIZE ||
+			if (ctx->buf_filled == in_chunk_size ||
 			    ctx->buf_filled == ctx->bytes_remaining)
 			{
 				ret = (*ctx->read_prefix_cb)(ctx->buf,
@@ -168,6 +187,7 @@ int
 read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
 				 u64 size,
 				 consume_data_callback_t cb,
+				 u32 in_chunk_size,
 				 void *ctx_or_buf,
 				 int _ignored_flags)
 {
@@ -183,7 +203,7 @@ read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
 	export_ctx.read_prefix_ctx_or_buf = ctx_or_buf;
 	export_ctx.wimlib_err_code = 0;
 	if (cb) {
-		export_ctx.buf = MALLOC(WIM_CHUNK_SIZE);
+		export_ctx.buf = MALLOC(in_chunk_size);
 		if (!export_ctx.buf)
 			return WIMLIB_ERR_NOMEM;
 	} else {
diff --git a/src/write.c b/src/write.c
index a127a99b..8554d1ab 100644
--- a/src/write.c
+++ b/src/write.c
@@ -68,6 +68,30 @@
 #  include <sys/uio.h> /* for `struct iovec' */
 #endif
 
+/* Return true if the specified resource is compressed and the compressed data
+ * can be reused with the specified output parameters.  */
+static bool
+can_raw_copy(const struct wim_lookup_table_entry *lte,
+	     int write_resource_flags, int out_ctype, u32 out_chunk_size)
+{
+	return (out_ctype == wim_resource_compression_type(lte)
+		&& out_chunk_size == wim_resource_chunk_size(lte)
+		&& out_ctype != WIMLIB_COMPRESSION_TYPE_NONE);
+}
+
+
+/* Return true if the specified resource must be recompressed when the specified
+ * output parameters are used.  */
+static bool
+must_compress_stream(const struct wim_lookup_table_entry *lte,
+		     int write_resource_flags, int out_ctype, u32 out_chunk_size)
+{
+	return (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
+	        && ((write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS)
+		    || !can_raw_copy(lte, write_resource_flags,
+				     out_ctype, out_chunk_size)));
+}
+
 static unsigned
 compress_chunk(const void * uncompressed_data,
 	       unsigned uncompressed_len,
@@ -115,6 +139,7 @@ struct chunk_table {
 static int
 begin_wim_resource_chunk_tab(const struct wim_lookup_table_entry *lte,
 			     struct filedes *out_fd,
+			     u32 out_chunk_size,
 			     struct chunk_table **chunk_tab_ret,
 			     int resource_flags)
 {
@@ -126,7 +151,7 @@ begin_wim_resource_chunk_tab(const struct wim_lookup_table_entry *lte,
 	int ret;
 
 	size = wim_resource_size(lte);
-	num_chunks = wim_resource_chunks(lte);
+	num_chunks = DIV_ROUND_UP(size, out_chunk_size);
 	bytes_per_chunk_entry = (size > (1ULL << 32)) ? 8 : 4;
 	alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
 	chunk_tab = CALLOC(1, alloc_size);
@@ -273,6 +298,7 @@ finalize_and_check_sha1(SHA_CTX *sha_ctx, struct wim_lookup_table_entry *lte)
 
 struct write_resource_ctx {
 	int out_ctype;
+	u32 out_chunk_size;
 	struct wimlib_lzx_context *comp_ctx;
 	struct chunk_table *chunk_tab;
 	struct filedes *out_fd;
@@ -288,6 +314,10 @@ write_resource_cb(const void *chunk, size_t chunk_size, void *_ctx)
 	const void *out_chunk;
 	unsigned out_chunk_size;
 	int ret;
+	void *compressed_chunk = NULL;
+	unsigned compressed_size;
+	bool compressed_chunk_malloced = false;
+	size_t stack_max = 32768;
 
 	if (ctx->doing_sha)
 		sha1_update(&ctx->sha_ctx, chunk, chunk_size);
@@ -295,11 +325,16 @@ write_resource_cb(const void *chunk, size_t chunk_size, void *_ctx)
 	out_chunk = chunk;
 	out_chunk_size = chunk_size;
 	if (ctx->out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
-		void *compressed_chunk;
-		unsigned compressed_size;
 
 		/* Compress the chunk.  */
-		compressed_chunk = alloca(chunk_size);
+		if (chunk_size <= stack_max) {
+			compressed_chunk = alloca(chunk_size);
+		} else {
+			compressed_chunk = MALLOC(chunk_size);
+			if (compressed_chunk == NULL)
+				return WIMLIB_ERR_NOMEM;
+			compressed_chunk_malloced = true;
+		}
 
 		compressed_size = compress_chunk(chunk, chunk_size,
 						 compressed_chunk,
@@ -335,11 +370,15 @@ write_resource_cb(const void *chunk, size_t chunk_size, void *_ctx)
 	ret = full_write(ctx->out_fd, out_chunk, out_chunk_size);
 	if (ret)
 		goto error;
-	return 0;
+
+out_free_memory:
+	if (compressed_chunk_malloced)
+		FREE(compressed_chunk);
+	return ret;
 
 error:
 	ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
-	return ret;
+	goto out_free_memory;
 }
 
 /*
@@ -381,12 +420,14 @@ error:
 int
 write_wim_resource(struct wim_lookup_table_entry *lte,
 		   struct filedes *out_fd, int out_ctype,
+		   u32 out_chunk_size,
 		   struct resource_entry *out_res_entry,
 		   int resource_flags,
 		   struct wimlib_lzx_context **comp_ctx)
 {
 	struct write_resource_ctx write_ctx;
 	off_t res_start_offset;
+	u32 in_chunk_size;
 	u64 read_size;
 	int ret;
 
@@ -402,11 +443,7 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
 	 * desired other than no compression, we can simply copy the compressed
 	 * data without recompressing it.  This also means we must skip
 	 * calculating the SHA1, as we never will see the uncompressed data.  */
-	if (lte->resource_location == RESOURCE_IN_WIM &&
-	    out_ctype == wim_resource_compression_type(lte) &&
-	    out_ctype != WIMLIB_COMPRESSION_TYPE_NONE &&
-	    !(resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS))
-	{
+	if (can_raw_copy(lte, resource_flags, out_ctype, out_chunk_size)) {
 		/* Normally we can request a RAW_FULL read, but if we're reading
 		 * from a pipable resource and writing a non-pipable resource or
 		 * vice versa, then a RAW_CHUNKS read needs to be requested so
@@ -418,22 +455,23 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
 			resource_flags |= WIMLIB_READ_RESOURCE_FLAG_RAW_FULL;
 		else
 			resource_flags |= WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS;
-		write_ctx.doing_sha = false;
 		read_size = lte->resource_entry.size;
+		write_ctx.doing_sha = false;
 	} else {
 		write_ctx.doing_sha = true;
 		sha1_init(&write_ctx.sha_ctx);
 		read_size = lte->resource_entry.original_size;
 	}
 
-
 	/* If the output resource is to be compressed, initialize the chunk
 	 * table and set the function to use for chunk compression.  Exceptions:
 	 * no compression function is needed if doing a raw copy; also, no chunk
 	 * table is needed if doing a *full* (not per-chunk) raw copy.  */
 	write_ctx.out_ctype = WIMLIB_COMPRESSION_TYPE_NONE;
+	write_ctx.out_chunk_size = out_chunk_size;
 	write_ctx.chunk_tab = NULL;
 	if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
+		wimlib_assert(out_chunk_size > 0);
 		if (!(resource_flags & WIMLIB_READ_RESOURCE_FLAG_RAW)) {
 			write_ctx.out_ctype = out_ctype;
 			if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) {
@@ -445,6 +483,7 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
 		}
 		if (!(resource_flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL)) {
 			ret = begin_wim_resource_chunk_tab(lte, out_fd,
+							   out_chunk_size,
 							   &write_ctx.chunk_tab,
 							   resource_flags);
 			if (ret)
@@ -469,8 +508,13 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
 	write_ctx.out_fd = out_fd;
 	write_ctx.resource_flags = resource_flags;
 try_write_again:
+	if (write_ctx.out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
+		in_chunk_size = wim_resource_chunk_size(lte);
+	else
+		in_chunk_size = out_chunk_size;
 	ret = read_resource_prefix(lte, read_size,
-				   write_resource_cb, &write_ctx, resource_flags);
+				   write_resource_cb,
+				   in_chunk_size, &write_ctx, resource_flags);
 	if (ret)
 		goto out_free_chunk_tab;
 
@@ -553,6 +597,7 @@ int
 write_wim_resource_from_buffer(const void *buf, size_t buf_size,
 			       int reshdr_flags, struct filedes *out_fd,
 			       int out_ctype,
+			       u32 out_chunk_size,
 			       struct resource_entry *out_res_entry,
 			       u8 *hash_ret, int write_resource_flags,
 			       struct wimlib_lzx_context **comp_ctx)
@@ -566,6 +611,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size,
 	lte.attached_buffer              = (void*)buf;
 	lte.resource_entry.original_size = buf_size;
 	lte.resource_entry.flags         = reshdr_flags;
+	lte.compression_type		 = WIMLIB_COMPRESSION_TYPE_NONE;
 
 	if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) {
 		sha1_buffer(buf, buf_size, lte.hash);
@@ -574,8 +620,8 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size,
 		lte.unhashed = 1;
 	}
 
-	ret = write_wim_resource(&lte, out_fd, out_ctype, out_res_entry,
-				 write_resource_flags, comp_ctx);
+	ret = write_wim_resource(&lte, out_fd, out_ctype, out_chunk_size,
+				 out_res_entry, write_resource_flags, comp_ctx);
 	if (ret)
 		return ret;
 	if (hash_ret)
@@ -684,6 +730,7 @@ struct compressor_thread_params {
 
 struct message {
 	struct wim_lookup_table_entry *lte;
+	u32 out_chunk_size;
 	u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
 	u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
 	unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
@@ -802,6 +849,7 @@ do_write_streams_progress(struct write_streams_progress_data *progress_data,
 struct serial_write_stream_ctx {
 	struct filedes *out_fd;
 	int out_ctype;
+	u32 out_chunk_size;
 	struct wimlib_lzx_context **comp_ctx;
 	int write_resource_flags;
 };
@@ -811,7 +859,9 @@ serial_write_stream(struct wim_lookup_table_entry *lte, void *_ctx)
 {
 	struct serial_write_stream_ctx *ctx = _ctx;
 	return write_wim_resource(lte, ctx->out_fd,
-				  ctx->out_ctype, &lte->output_resource_entry,
+				  ctx->out_ctype,
+				  ctx->out_chunk_size,
+				  &lte->output_resource_entry,
 				  ctx->write_resource_flags,
 				  ctx->comp_ctx);
 }
@@ -911,6 +961,7 @@ do_write_stream_list_serial(struct list_head *stream_list,
 			    struct wim_lookup_table *lookup_table,
 			    struct filedes *out_fd,
 			    int out_ctype,
+			    u32 out_chunk_size,
 			    struct wimlib_lzx_context **comp_ctx,
 			    int write_resource_flags,
 			    struct write_streams_progress_data *progress_data)
@@ -918,6 +969,7 @@ do_write_stream_list_serial(struct list_head *stream_list,
 	struct serial_write_stream_ctx ctx = {
 		.out_fd = out_fd,
 		.out_ctype = out_ctype,
+		.out_chunk_size = out_chunk_size,
 		.write_resource_flags = write_resource_flags,
 		.comp_ctx = comp_ctx,
 	};
@@ -945,6 +997,7 @@ write_stream_list_serial(struct list_head *stream_list,
 			 struct wim_lookup_table *lookup_table,
 			 struct filedes *out_fd,
 			 int out_ctype,
+			 u32 out_chunk_size,
 			 struct wimlib_lzx_context **comp_ctx,
 			 int write_resource_flags,
 			 struct write_streams_progress_data *progress_data)
@@ -961,6 +1014,7 @@ write_stream_list_serial(struct list_head *stream_list,
 					   lookup_table,
 					   out_fd,
 					   out_ctype,
+					   out_chunk_size,
 					   comp_ctx,
 					   write_resource_flags,
 					   progress_data);
@@ -1011,6 +1065,7 @@ struct main_writer_thread_ctx {
 	struct filedes *out_fd;
 	off_t res_start_offset;
 	int out_ctype;
+	u32 out_chunk_size;
 	struct wimlib_lzx_context **comp_ctx;
 	int write_resource_flags;
 	struct shared_queue *res_to_compress_queue;
@@ -1034,11 +1089,12 @@ struct main_writer_thread_ctx {
 };
 
 static int
-init_message(struct message *msg)
+init_message(struct message *msg, u32 out_chunk_size)
 {
+	msg->out_chunk_size = out_chunk_size;
 	for (size_t i = 0; i < MAX_CHUNKS_PER_MSG; i++) {
-		msg->compressed_chunks[i] = MALLOC(WIM_CHUNK_SIZE);
-		msg->uncompressed_chunks[i] = MALLOC(WIM_CHUNK_SIZE);
+		msg->compressed_chunks[i] = MALLOC(out_chunk_size);
+		msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
 		if (msg->compressed_chunks[i] == NULL ||
 		    msg->uncompressed_chunks[i] == NULL)
 			return WIMLIB_ERR_NOMEM;
@@ -1066,7 +1122,7 @@ free_messages(struct message *msgs, size_t num_messages)
 }
 
 static struct message *
-allocate_messages(size_t num_messages)
+allocate_messages(size_t num_messages, u32 out_chunk_size)
 {
 	struct message *msgs;
 
@@ -1074,7 +1130,7 @@ allocate_messages(size_t num_messages)
 	if (!msgs)
 		return NULL;
 	for (size_t i = 0; i < num_messages; i++) {
-		if (init_message(&msgs[i])) {
+		if (init_message(&msgs[i], out_chunk_size)) {
 			free_messages(msgs, num_messages);
 			return NULL;
 		}
@@ -1096,7 +1152,7 @@ main_writer_thread_init_ctx(struct main_writer_thread_ctx *ctx)
 {
 	/* Pre-allocate all the buffers that will be needed to do the chunk
 	 * compression. */
-	ctx->msgs = allocate_messages(ctx->num_messages);
+	ctx->msgs = allocate_messages(ctx->num_messages, ctx->out_chunk_size);
 	if (!ctx->msgs)
 		return WIMLIB_ERR_NOMEM;
 
@@ -1182,6 +1238,7 @@ receive_compressed_chunks(struct main_writer_thread_ctx *ctx)
 			 * it if needed.  */
 			ret = begin_wim_resource_chunk_tab(cur_lte,
 							   ctx->out_fd,
+							   ctx->out_chunk_size,
 							   &ctx->cur_chunk_tab,
 							   ctx->write_resource_flags);
 			if (ret)
@@ -1232,6 +1289,7 @@ receive_compressed_chunks(struct main_writer_thread_ctx *ctx)
 				ret = write_wim_resource(cur_lte,
 							 ctx->out_fd,
 							 WIMLIB_COMPRESSION_TYPE_NONE,
+							 0,
 							 &cur_lte->output_resource_entry,
 							 ctx->write_resource_flags,
 							 ctx->comp_ctx);
@@ -1273,6 +1331,7 @@ receive_compressed_chunks(struct main_writer_thread_ctx *ctx)
 								  ctx->lookup_table,
 								  ctx->out_fd,
 								  ctx->out_ctype,
+								  ctx->out_chunk_size,
 								  ctx->comp_ctx,
 								  ctx->write_resource_flags,
 								  ctx->progress_data);
@@ -1372,6 +1431,7 @@ main_writer_thread_finish(void *_ctx)
 					   ctx->lookup_table,
 					   ctx->out_fd,
 					   ctx->out_ctype,
+					   ctx->out_chunk_size,
 					   ctx->comp_ctx,
 					   ctx->write_resource_flags,
 					   ctx->progress_data);
@@ -1389,12 +1449,14 @@ submit_stream_for_compression(struct wim_lookup_table_entry *lte,
 	 * when @lte is already hashed. */
 	sha1_init(&ctx->next_sha_ctx);
 	ctx->next_chunk = 0;
-	ctx->next_num_chunks = wim_resource_chunks(lte);
+	ctx->next_num_chunks = DIV_ROUND_UP(wim_resource_size(lte),
+					    ctx->out_chunk_size);
 	ctx->next_lte = lte;
 	INIT_LIST_HEAD(&lte->msg_list);
 	list_add_tail(&lte->being_compressed_list, &ctx->outstanding_streams);
 	ret = read_resource_prefix(lte, wim_resource_size(lte),
-				   main_writer_thread_cb, ctx, 0);
+				   main_writer_thread_cb,
+				   ctx->out_chunk_size, ctx, 0);
 	if (ret)
 		return ret;
 	wimlib_assert(ctx->next_chunk == ctx->next_num_chunks);
@@ -1408,10 +1470,8 @@ main_thread_process_next_stream(struct wim_lookup_table_entry *lte, void *_ctx)
 	int ret;
 
 	if (wim_resource_size(lte) < 1000 ||
-	    ctx->out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
-	    (lte->resource_location == RESOURCE_IN_WIM &&
-	     !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS) &&
-	     lte->wim->compression_type == ctx->out_ctype))
+	    !must_compress_stream(lte, ctx->write_resource_flags,
+				  ctx->out_ctype, ctx->out_chunk_size))
 	{
 		/* Stream is too small or isn't being compressed.  Process it by
 		 * the main thread when we have a chance.  We can't necessarily
@@ -1443,15 +1503,15 @@ get_default_num_threads(void)
  * create the number of threads requested.
  *
  * High level description of the algorithm for writing compressed streams in
- * parallel:  We perform compression on chunks of size WIM_CHUNK_SIZE bytes
- * rather than on full files.  The currently executing thread becomes the main
- * thread and is entirely in charge of reading the data to compress (which may
- * be in any location understood by the resource code--- such as in an external
- * file being captured, or in another WIM file from which an image is being
- * exported) and actually writing the compressed data to the output file.
- * Additional threads are "compressor threads" and all execute the
- * compressor_thread_proc, where they repeatedly retrieve buffers of data from
- * the main thread, compress them, and hand them back to the main thread.
+ * parallel:  We perform compression on chunks rather than on full files.  The
+ * currently executing thread becomes the main thread and is entirely in charge
+ * of reading the data to compress (which may be in any location understood by
+ * the resource code--- such as in an external file being captured, or in
+ * another WIM file from which an image is being exported) and actually writing
+ * the compressed data to the output file.  Additional threads are "compressor
+ * threads" and all execute the compressor_thread_proc, where they repeatedly
+ * retrieve buffers of data from the main thread, compress them, and hand them
+ * back to the main thread.
  *
  * Certain streams, such as streams that do not need to be compressed (e.g.
  * input compression type same as output compression type) or streams of very
@@ -1468,6 +1528,7 @@ write_stream_list_parallel(struct list_head *stream_list,
 			   struct wim_lookup_table *lookup_table,
 			   struct filedes *out_fd,
 			   int out_ctype,
+			   u32 out_chunk_size,
 			   struct wimlib_lzx_context **comp_ctx,
 			   int write_resource_flags,
 			   struct write_streams_progress_data *progress_data,
@@ -1559,6 +1620,7 @@ write_stream_list_parallel(struct list_head *stream_list,
 	ctx.lookup_table          = lookup_table;
 	ctx.out_fd                = out_fd;
 	ctx.out_ctype             = out_ctype;
+	ctx.out_chunk_size	  = out_chunk_size;
 	ctx.comp_ctx		  = comp_ctx;
 	ctx.res_to_compress_queue = &res_to_compress_queue;
 	ctx.compressed_res_queue  = &compressed_res_queue;
@@ -1612,6 +1674,7 @@ out_serial_quiet:
 					lookup_table,
 					out_fd,
 					out_ctype,
+					out_chunk_size,
 					comp_ctx,
 					write_resource_flags,
 					progress_data);
@@ -1627,6 +1690,7 @@ static int
 write_stream_list(struct list_head *stream_list,
 		  struct wim_lookup_table *lookup_table,
 		  struct filedes *out_fd, int out_ctype,
+		  u32 out_chunk_size,
 		  struct wimlib_lzx_context **comp_ctx,
 		  int write_flags,
 		  unsigned num_threads, wimlib_progress_func_t progress_func)
@@ -1662,12 +1726,9 @@ write_stream_list(struct list_head *stream_list,
 	list_for_each_entry(lte, stream_list, write_streams_list) {
 		num_streams++;
 		total_bytes += wim_resource_size(lte);
-		if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
-		       && (wim_resource_compression_type(lte) != out_ctype ||
-			   (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS)))
-		{
+		if (must_compress_stream(lte, write_resource_flags,
+					 out_ctype, out_chunk_size))
 			total_compression_bytes += wim_resource_size(lte);
-		}
 		if (lte->resource_location == RESOURCE_IN_WIM) {
 			if (prev_wim_part != lte->wim) {
 				prev_wim_part = lte->wim;
@@ -1697,6 +1758,7 @@ write_stream_list(struct list_head *stream_list,
 						 lookup_table,
 						 out_fd,
 						 out_ctype,
+						 out_chunk_size,
 						 comp_ctx,
 						 write_resource_flags,
 						 &progress_data,
@@ -1707,6 +1769,7 @@ write_stream_list(struct list_head *stream_list,
 					       lookup_table,
 					       out_fd,
 					       out_ctype,
+					       out_chunk_size,
 					       comp_ctx,
 					       write_resource_flags,
 					       &progress_data);
@@ -2030,6 +2093,7 @@ write_wim_streams(WIMStruct *wim, int image, int write_flags,
 				 wim->lookup_table,
 				 &wim->out_fd,
 				 wim->out_compression_type,
+				 wim->out_chunk_size,
 				 &wim->lzx_context,
 				 write_flags,
 				 num_threads,
@@ -2090,6 +2154,7 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags,
 			ret = write_wim_resource(imd->metadata_lte,
 						 &wim->out_fd,
 						 wim->out_compression_type,
+						 wim->out_chunk_size,
 						 &imd->metadata_lte->output_resource_entry,
 						 write_resource_flags,
 						 &wim->lzx_context);
@@ -2447,22 +2512,6 @@ write_pipable_wim(WIMStruct *wim, int image, int write_flags,
 	 * finish_write().  */
 }
 
-/* API function documented in wimlib.h  */
-WIMLIBAPI int
-wimlib_set_output_compression_type(WIMStruct *wim, int ctype)
-{
-	switch (ctype) {
-	case WIMLIB_COMPRESSION_TYPE_INVALID:
-		break;
-	case WIMLIB_COMPRESSION_TYPE_NONE:
-	case WIMLIB_COMPRESSION_TYPE_LZX:
-	case WIMLIB_COMPRESSION_TYPE_XPRESS:
-		wim->out_compression_type = ctype;
-		return 0;
-	}
-	return WIMLIB_ERR_INVALID_PARAM;
-}
-
 /* Write a standalone WIM or split WIM (SWM) part to a new file or to a file
  * descriptor.  */
 int
@@ -2592,6 +2641,9 @@ write_wim_part(WIMStruct *wim,
 	if (wim->compression_type != wim->out_compression_type)
 		wim->hdr.flags = get_wim_hdr_cflags(wim->out_compression_type);
 
+	/* Set chunk size if different.  */
+	wim->hdr.chunk_size = wim->out_chunk_size;
+
 	/* Use GUID if specified; otherwise generate a new one.  */
 	if (guid)
 		memcpy(wim->hdr.guid, guid, WIMLIB_GUID_LEN);
@@ -2944,6 +2996,7 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags,
 				wim->lookup_table,
 				&wim->out_fd,
 				wim->compression_type,
+				wim->chunk_size,
 				&wim->lzx_context,
 				write_flags,
 				num_threads,
@@ -3066,7 +3119,8 @@ wimlib_overwrite(WIMStruct *wim, int write_flags,
 	    && !(write_flags & (WIMLIB_WRITE_FLAG_REBUILD |
 				WIMLIB_WRITE_FLAG_PIPABLE))
 	    && !(wim_is_pipable(wim))
-	    && wim->compression_type == wim->out_compression_type)
+	    && wim->compression_type == wim->out_compression_type
+	    && wim->chunk_size == wim->out_chunk_size)
 	{
 		ret = overwrite_wim_inplace(wim, write_flags, num_threads,
 					    progress_func);
diff --git a/src/xml.c b/src/xml.c
index deeedb50..cd335e07 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -1522,6 +1522,7 @@ write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
 					     WIM_RESHDR_FLAG_METADATA,
 					     &wim->out_fd,
 					     WIMLIB_COMPRESSION_TYPE_NONE,
+					     0,
 					     out_res_entry,
 					     NULL,
 					     write_resource_flags,
diff --git a/src/xpress-compress.c b/src/xpress-compress.c
index 5a314b69..518a0e3b 100644
--- a/src/xpress-compress.c
+++ b/src/xpress-compress.c
@@ -36,6 +36,10 @@
 #include "wimlib/util.h"
 #include "wimlib/xpress.h"
 
+#ifdef HAVE_ALLOCA_H
+#  include <alloca.h>
+#endif
+
 #include <string.h>
 
 /* Intermediate XPRESS match/literal representation.  */
@@ -132,6 +136,7 @@ xpress_record_match(unsigned len, unsigned offset, void *_ctx)
 static const struct lz_params xpress_lz_params = {
 	.min_match      = XPRESS_MIN_MATCH_LEN,
 	.max_match      = XPRESS_MAX_MATCH_LEN,
+	.max_offset	= XPRESS_MAX_OFFSET,
 	.good_match	= 16,
 	.nice_match     = 32,
 	.max_chain_len  = 16,
@@ -149,13 +154,17 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
 	struct output_bitstream ostream;
 
 	struct xpress_record_ctx record_ctx;
-	struct xpress_match matches[uncompressed_len];
-	u8 udata[uncompressed_len + 8];
+
+	struct xpress_match *matches;
+	input_idx_t *prev_tab;
+	u8 *udata;
+
 	u16 codewords[XPRESS_NUM_SYMBOLS];
 	u8 lens[XPRESS_NUM_SYMBOLS];
 	input_idx_t num_matches;
 	input_idx_t compressed_len;
 	input_idx_t i;
+	const size_t stack_max = 65536;
 
 	/* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
 	 * impossible to compress 256 bytes or less of data to less than the
@@ -169,6 +178,21 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
 	if (uncompressed_len < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
 		return 0;
 
+	if (uncompressed_len <= stack_max) {
+		matches = alloca(uncompressed_len * sizeof(matches[0]));
+		udata = alloca(uncompressed_len + 8);
+		prev_tab = alloca(uncompressed_len * sizeof(prev_tab[0]));
+	} else {
+		matches = MALLOC(uncompressed_len * sizeof(matches[0]));
+		udata = MALLOC(uncompressed_len + 8);
+		prev_tab = MALLOC(uncompressed_len * sizeof(prev_tab[0]));
+		if (matches == NULL || udata == NULL || prev_tab == NULL) {
+			WARNING("Failed to allocate memory for compression...");
+			compressed_len = 0;
+			goto out_free;
+		}
+	}
+
 	/* Copy the data to a temporary buffer, but only to avoid
 	 * inconsequential accesses of uninitialized memory in
 	 * lz_analyze_block().  */
@@ -183,7 +207,8 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
 			 xpress_record_match,
 			 xpress_record_literal,
 			 &record_ctx,
-			 &xpress_lz_params);
+			 &xpress_lz_params,
+			 prev_tab);
 
 	num_matches = (record_ctx.matches - matches);
 
@@ -206,8 +231,10 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
 
 	/* Flush any pending data and get the length of the compressed data.  */
 	compressed_len = flush_output_bitstream(&ostream);
-	if (compressed_len == ~(input_idx_t)0)
-		return 0;
+	if (compressed_len == ~(input_idx_t)0) {
+		compressed_len = 0;
+		goto out_free;
+	}
 	compressed_len += XPRESS_NUM_SYMBOLS / 2;
 
 #if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) || 1
@@ -218,15 +245,24 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
 		ERROR("Failed to decompress data we "
 		      "compressed using XPRESS algorithm");
 		wimlib_assert(0);
-		return 0;
+		compressed_len = 0;
+		goto out_free;
 	}
 
 	if (memcmp(uncompressed_data, udata, uncompressed_len)) {
 		ERROR("Data we compressed using XPRESS algorithm "
 		      "didn't decompress to original");
 		wimlib_assert(0);
-		return 0;
+		compressed_len = 0;
+		goto out_free;
 	}
 #endif
+
+out_free:
+	if (uncompressed_len > stack_max) {
+		FREE(matches);
+		FREE(udata);
+		FREE(prev_tab);
+	}
 	return compressed_len;
 }
-- 
2.43.0