Variable chunk size support (currently XPRESS only)

author Eric Biggers <ebiggers3@gmail.com>

Mon, 9 Dec 2013 06:26:22 +0000 (00:26 -0600)

committer Eric Biggers <ebiggers3@gmail.com>

Mon, 9 Dec 2013 06:26:22 +0000 (00:26 -0600)
author Eric Biggers <ebiggers3@gmail.com>
Mon, 9 Dec 2013 06:26:22 +0000 (00:26 -0600)
committer Eric Biggers <ebiggers3@gmail.com>
Mon, 9 Dec 2013 06:26:22 +0000 (00:26 -0600)
diff --git a/include/wimlib.h b/include/wimlib.h

index f4b0dc5acdc1762de1ab6ac190d31a2f6a805f61..82dd1afcea633d7f42342a325e80af55ad75d3e6 100644 (file)
--- a/include/wimlib.h
+++ b/include/wimlib.h
@@ -3359,15 +3359,39 @@ wimlib_set_image_descripton(WIMStruct *wim, int image,
  /**
   * @ingroup G_writing_and_overwriting_wims
   *
- * Set the compression type of a WIM to use in subsequent calls to
+ * Set the compression chunk size of a WIM to use in subsequent calls to
   * wimlib_write() or wimlib_overwrite().
   *
+ * @param wim
+ *     ::WIMStruct for a WIM.
+ * @param out_chunk_size
+ *     The chunk size (in bytes) to set.  The valid chunk sizes are dependent
+ *     on the compression format.  The XPRESS compression format supports chunk
+ *     sizes that are powers of 2 with exponents between 15 and 26 inclusively,
+ *     whereas the LZX compression format currently only supports a chunk size
+ *     of 32768.
+ *
   * @return 0 on success; nonzero on error.
   *
+ * @retval ::WIMLIB_ERR_INVALID_CHUNK_SIZE
+ *     @p ctype is not a supported chunk size.
+ */
+extern int
+wimlib_set_output_chunk_size(WIMStruct *wim, uint32_t chunk_size);
+
+/**
+ * @ingroup G_writing_and_overwriting_wims
+ *
+ * Set the compression type of a WIM to use in subsequent calls to
+ * wimlib_write() or wimlib_overwrite().
+ *
   * @param wim
   *     ::WIMStruct for a WIM.
   * @param ctype
- *     The compression type to set (one of ::wimlib_compression_type).
+ *     The compression type to set (one of ::wimlib_compression_type).  If this
+ *     compression type is incompatible with the current output chunk size
+ *     (either the default or as set with wimlib_set_output_chunk_size()), the
+ *     output chunk size is reset to the default for that compression type.
   *
   * @return 0 on success; nonzero on error.
   *
@@ -3871,8 +3895,28 @@ wimlib_write_to_fd(WIMStruct *wim,
  /**
   * @ingroup G_compression
   *
- * This function is equivalent to wimlib_lzx_compress(), but instead compresses
- * the data using "XPRESS" compression.
+ * Compress a chunk of data using XPRESS compression.
+ *
+ * This function is exported for convenience only and should only be used by
+ * library clients looking to make use of wimlib's compression code for another
+ * purpose.
+ *
+ * As of wimlib v1.5.4, this function can be used with @p chunk_size greater
+ * than 32768 bytes and is only limited by available memory.  However, the
+ * XPRESS format itself still caps match offsets to 65535, so if a larger chunk
+ * size is chosen, then the matching will effectively occur in a sliding window
+ * over it.
+ *
+ * @param chunk
+ *     Uncompressed data of the chunk.
+ * @param chunk_size
+ *     Size of the uncompressed chunk, in bytes.
+ * @param out
+ *     Pointer to output buffer of size at least (@p chunk_size - 1) bytes.
+ *
+ * @return
+ *     The size of the compressed data written to @p out in bytes, or 0 if the
+ *     data could not be compressed to (@p chunk_size - 1) bytes or fewer.
   */
  extern unsigned
  wimlib_xpress_compress(const void *chunk, unsigned chunk_size, void *out);
@@ -3880,8 +3924,26 @@ wimlib_xpress_compress(const void *chunk, unsigned chunk_size, void *out);
  /**
   * @ingroup G_compression
   *
- * This function is equivalent to wimlib_lzx_decompress(), but instead assumes
- * the data is compressed using "XPRESS" compression.
+ * Decompresses a chunk of XPRESS-compressed data.
+ *
+ * This function is exported for convenience only and should only be used by
+ * library clients looking to make use of wimlib's compression code for another
+ * purpose.
+ *
+ * @param compressed_data
+ *     Pointer to the compressed data.
+ *
+ * @param compressed_len
+ *     Length of the compressed data, in bytes.
+ *
+ * @param uncompressed_data
+ *     Pointer to the buffer into which to write the uncompressed data.
+ *
+ * @param uncompressed_len
+ *     Length of the uncompressed data.
+ *
+ * @return
+ *     0 on success; non-zero on failure.
   */
  extern int
  wimlib_xpress_decompress(const void *compressed_data, unsigned compressed_len,
diff --git a/include/wimlib/compress.h b/include/wimlib/compress.h

index f3ce6e2deb834921589d14593084c5829819e3b5..eaa6ea0e1afca354c0f1f0b34110618f910f37d4 100644 (file)
--- a/include/wimlib/compress.h
+++ b/include/wimlib/compress.h
@@ -69,6 +69,7 @@ bitstream_put_byte(struct output_bitstream *ostream, u8 n);
  struct lz_params {
         unsigned min_match;
         unsigned max_match;
+       unsigned max_offset;
         unsigned nice_match;
         unsigned good_match;
         unsigned max_chain_len;
@@ -85,7 +86,8 @@ lz_analyze_block(const u8 window[],
                  lz_record_match_t record_match,
                  lz_record_literal_t record_literal,
                  void *record_ctx,
-                const struct lz_params *params);
+                const struct lz_params *params,
+                input_idx_t prev_tab[]);
  
  extern void
  make_canonical_huffman_code(unsigned num_syms,
diff --git a/include/wimlib/header.h b/include/wimlib/header.h

index cd68486b0d9f118f90b8c82c04443d6ddad2db03..f7c4b7fff1818844637474a8a4c1452b818febcc 100644 (file)
--- a/include/wimlib/header.h
+++ b/include/wimlib/header.h
@@ -11,11 +11,6 @@
  /* Length of the WIM header on disk.  */
  #define WIM_HEADER_DISK_SIZE 208
  
-/* Compressed resources in the WIM are divided into separated compressed chunks
- * of this size.  This value is unfortunately not configurable (at least when
- * compatibility with Microsoft's software is desired).  */
-#define WIM_CHUNK_SIZE 32768
-
  /* Version of the WIM file.  There is an older version (used for prerelease
   * versions of Windows Vista), but wimlib doesn't support it.  The differences
   * between the versions are undocumented.  */
@@ -69,9 +64,8 @@ struct wim_header_disk {
         /* Flags for the WIM file (WIM_HDR_FLAG_*) */
         u32 wim_flags;
  
-       /* Uncompressed chunk size of resources in the WIM.  0 if the WIM is
-        * uncompressed.  If compressed, WIM_CHUNK_SIZE is expected (currently
-        * the only supported value).  */
+       /* Chunk size for compressed resources in the WIM, or 0 if the WIM is
+        * uncompressed.  */
         u32 chunk_size;
  
         /* Globally unique identifier for the WIM file.  Basically a bunch of
@@ -127,6 +121,9 @@ struct wim_header {
         /* Bitwise OR of one or more of the WIM_HDR_FLAG_* defined below. */
         u32 flags;
  
+       /* Compressed resource chunk size  */
+       u32 chunk_size;
+
         /* A unique identifier for the WIM file. */
         u8 guid[WIM_GID_LEN];
  
diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h

index 6a14daaa549c9f0c967c656ae7f6d9ad4318855e..d1b980dafe240f4ae18c33c05443ed9f09c98b9f 100644 (file)
--- a/include/wimlib/lookup_table.h
+++ b/include/wimlib/lookup_table.h
@@ -298,16 +298,26 @@ wim_resource_size(const struct wim_lookup_table_entry *lte)
         return lte->resource_entry.original_size;
  }
  
+static inline u32
+wim_resource_chunk_size(const struct wim_lookup_table_entry * lte)
+{
+       if (lte->resource_location == RESOURCE_IN_WIM &&
+           lte->compression_type != WIMLIB_COMPRESSION_TYPE_NONE)
+               return lte->wim->chunk_size;
+       else
+               return 32768;
+}
+
+
  static inline u64
  wim_resource_chunks(const struct wim_lookup_table_entry *lte)
  {
-       return DIV_ROUND_UP(wim_resource_size(lte), WIM_CHUNK_SIZE);
+       return DIV_ROUND_UP(wim_resource_size(lte), wim_resource_chunk_size(lte));
  }
  
  static inline int
  wim_resource_compression_type(const struct wim_lookup_table_entry *lte)
  {
-       BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
         return lte->compression_type;
  }
  
diff --git a/include/wimlib/ntfs_3g.h b/include/wimlib/ntfs_3g.h

index 425d9dfdf567b35f4ffdfd86a6e1f31418fcabe8..fd93fa7b8ae7d2e595b0c5e9533910060668df81 100644 (file)
--- a/include/wimlib/ntfs_3g.h
+++ b/include/wimlib/ntfs_3g.h
@@ -14,6 +14,7 @@ extern int
  read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
                       u64 size,
                       consume_data_callback_t cb,
+                     u32 in_chunk_size,
                       void *ctx_or_buf,
                       int _ignored_flags);
  
diff --git a/include/wimlib/resource.h b/include/wimlib/resource.h

index 799cb74088b659196a631869d75b49d4df6a8437..91b07ce88d50eb2be899146e3502e482c22b3d15 100644 (file)
--- a/include/wimlib/resource.h
+++ b/include/wimlib/resource.h
@@ -110,7 +110,8 @@ put_resource_entry(const struct resource_entry *entry,
  extern int
  read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
                           u64 size, consume_data_callback_t cb,
-                         void *ctx_or_buf, int flags, u64 offset);
+                         u32 in_chunk_size, void *ctx_or_buf,
+                         int flags, u64 offset);
  
  extern int
  read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
@@ -128,14 +129,16 @@ res_entry_to_data(const struct resource_entry *res_entry,
  
  extern int
  read_resource_prefix(const struct wim_lookup_table_entry *lte,
-                    u64 size, consume_data_callback_t cb, void *ctx_or_buf,
-                    int flags);
+                    u64 size, consume_data_callback_t cb,
+                    u32 in_chunk_size, void *ctx_or_buf, int flags);
  
  /* Functions to write a resource.  */
  
  extern int
  write_wim_resource(struct wim_lookup_table_entry *lte, struct filedes *out_fd,
-                  int out_ctype, struct resource_entry *out_res_entry,
+                  int out_ctype,
+                  u32 out_chunk_size,
+                  struct resource_entry *out_res_entry,
                    int write_resource_flags,
                    struct wimlib_lzx_context **comp_ctx);
  
@@ -143,6 +146,7 @@ extern int
  write_wim_resource_from_buffer(const void *buf, size_t buf_size,
                                int reshdr_flags, struct filedes *out_fd,
                                int out_ctype,
+                              u32 out_chunk_size,
                                struct resource_entry *out_res_entry,
                                u8 *hash_ret, int write_resource_flags,
                                struct wimlib_lzx_context **comp_ctx);
diff --git a/include/wimlib/util.h b/include/wimlib/util.h

index 7bc8359cf61f76f7e07a400c316fb150e61a5d9f..c61c1f9751bb15317c516b92b4f15633ce533522 100644 (file)
--- a/include/wimlib/util.h
+++ b/include/wimlib/util.h
@@ -121,6 +121,13 @@ bsr32(u32 n)
  #endif
  }
  
+static inline bool
+is_power_of_2(unsigned long n)
+{
+       return (n != 0 && (n & (n - 1)) == 0);
+
+}
+
  static inline u64
  hash_u64(u64 n)
  {
diff --git a/include/wimlib/wim.h b/include/wimlib/wim.h

index b6cf84a7f94b69161ae6190da885e30a28afa766..e972950cf720b563013ba78721c337944b29c0a5 100644 (file)
--- a/include/wimlib/wim.h
+++ b/include/wimlib/wim.h
@@ -76,6 +76,15 @@ struct WIMStruct {
          * wimlib_set_output_compression_type(); otherwise is the same as
          * compression_type.  */
         u8 out_compression_type : 2;
+
+       /* Uncompressed size of compressed chunks in this WIM (cached from
+        * header).  */
+       u32 chunk_size;
+
+       /* Overridden chunk size for wimlib_overwrite() or wimlib_write().  Can
+        * be changed by wimlib_set_output_chunk_size(); otherwise is the same
+        * as chunk_size.  */
+       u32 out_chunk_size;
  };
  
  static inline bool wim_is_pipable(const WIMStruct *wim)
@@ -100,7 +109,7 @@ extern u32
  get_wim_hdr_cflags(int ctype);
  
  extern int
-init_wim_header(struct wim_header *hdr, int ctype);
+init_wim_header(struct wim_header *hdr, int ctype, u32 chunk_size);
  
  extern int
  read_wim_header(const tchar *filename, struct filedes *in_fd,
diff --git a/include/wimlib/win32.h b/include/wimlib/win32.h

index e2bdde0e55b76f36de5e536eb638260b6ca857af..f42201272c07eb34ba3a84f4df86cd8239fa38f8 100644 (file)
--- a/include/wimlib/win32.h
+++ b/include/wimlib/win32.h
@@ -15,6 +15,7 @@ extern int
  read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
                        u64 size,
                        consume_data_callback_t cb,
+                      u32 in_chunk_size,
                        void *ctx_or_buf,
                        int _ignored_flags);
  
@@ -22,6 +23,7 @@ extern int
  read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
                                  u64 size,
                                  consume_data_callback_t cb,
+                                u32 in_chunk_size,
                                  void *ctx_or_buf,
                                  int _ignored_flags);
  
diff --git a/programs/imagex.c b/programs/imagex.c

index 06ddeddd4060730ad083fcff690629d413583ffe..d3588a9e5175f94fc10b393b31d8b0df6fa73b09 100644 (file)
--- a/programs/imagex.c
+++ b/programs/imagex.c
@@ -120,6 +120,7 @@ enum {
         IMAGEX_ALLOW_OTHER_OPTION,
         IMAGEX_BOOT_OPTION,
         IMAGEX_CHECK_OPTION,
+       IMAGEX_CHUNK_SIZE_OPTION,
         IMAGEX_COMMAND_OPTION,
         IMAGEX_COMMIT_OPTION,
         IMAGEX_COMPRESS_OPTION,
@@ -190,6 +191,7 @@ static const struct option capture_or_append_options[] = {
         {T("nocheck"),     no_argument,       NULL, IMAGEX_NOCHECK_OPTION},
         {T("compress"),    required_argument, NULL, IMAGEX_COMPRESS_OPTION},
         {T("compress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
+       {T("chunk-size"),  required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
         {T("config"),      required_argument, NULL, IMAGEX_CONFIG_OPTION},
         {T("dereference"), no_argument,       NULL, IMAGEX_DEREFERENCE_OPTION},
         {T("flags"),       required_argument, NULL, IMAGEX_FLAGS_OPTION},
@@ -286,6 +288,7 @@ static const struct option optimize_options[] = {
         {T("recompress"),  no_argument,       NULL, IMAGEX_RECOMPRESS_OPTION},
         {T("compress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
         {T("recompress-slow"), no_argument,     NULL, IMAGEX_COMPRESS_SLOW_OPTION},
+       {T("chunk-size"),  required_argument, NULL, IMAGEX_CHUNK_SIZE_OPTION},
         {T("threads"),     required_argument, NULL, IMAGEX_THREADS_OPTION},
         {T("pipable"),     no_argument,       NULL, IMAGEX_PIPABLE_OPTION},
         {T("not-pipable"), no_argument,       NULL, IMAGEX_NOT_PIPABLE_OPTION},
@@ -1273,6 +1276,19 @@ parse_num_threads(const tchar *optarg)
         }
  }
  
+static uint32_t parse_chunk_size(const char *optarg)
+{
+       char *tmp;
+       unsigned long chunk_size = strtoul(optarg, &tmp, 10);
+       if (chunk_size >= UINT32_MAX || *tmp || tmp == optarg) {
+               imagex_error(T("Chunk size must be a non-negative integer!"));
+               return UINT32_MAX;
+       } else {
+               return chunk_size;
+       }
+}
+
+
  /*
   * Parse an option passed to an update command.
   *
@@ -1671,6 +1687,7 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
                               WIMLIB_ADD_IMAGE_FLAG_WINCONFIG;
         int write_flags = 0;
         int compression_type = WIMLIB_COMPRESSION_TYPE_INVALID;
+       uint32_t chunk_size = UINT32_MAX;
         const tchar *wimfile;
         int wim_fd;
         const tchar *name;
@@ -1731,6 +1748,11 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
                                 goto out_err;
                         compression_type = WIMLIB_COMPRESSION_TYPE_LZX;
                         break;
+               case IMAGEX_CHUNK_SIZE_OPTION:
+                       chunk_size = parse_chunk_size(optarg);
+                       if (chunk_size == UINT32_MAX)
+                               goto out_err;
+                       break;
                 case IMAGEX_FLAGS_OPTION:
                         flags_element = optarg;
                         break;
@@ -1961,6 +1983,13 @@ imagex_capture_or_append(int argc, tchar **argv, int cmd)
         if (ret)
                 goto out_free_config;
  
+       /* Set chunk size if non-default.  */
+       if (chunk_size != UINT32_MAX) {
+               ret = wimlib_set_output_chunk_size(wim, chunk_size);
+               if (ret)
+                       goto out_free_wim;
+       }
+
  #ifndef __WIN32__
         /* Detect if source is regular file or block device and set NTFS volume
          * capture mode.  */
@@ -2486,6 +2515,8 @@ imagex_export(int argc, tchar **argv, int cmd)
                 ret = wimlib_create_new_wim(compression_type, &dest_wim);
                 if (ret)
                         goto out_free_src_wim;
+
+               wimlib_set_output_chunk_size(dest_wim, src_info.chunk_size);
         }
  
         image = wimlib_resolve_image(src_wim, src_image_num_or_name);
@@ -2745,6 +2776,8 @@ print_wim_information(const tchar *wimfile, const struct wimlib_wim_info *info)
         tprintf(T("Image Count:    %d\n"), info->image_count);
         tprintf(T("Compression:    %"TS"\n"),
                 wimlib_get_compression_type_string(info->compression_type));
+       tprintf(T("Chunk Size:     %"PRIu32" bytes\n"),
+               info->chunk_size);
         tprintf(T("Part Number:    %d/%d\n"), info->part_number, info->total_parts);
         tprintf(T("Boot Index:     %d\n"), info->boot_index);
         tprintf(T("Size:           %"PRIu64" bytes\n"), info->total_bytes);
@@ -3248,6 +3281,7 @@ imagex_optimize(int argc, tchar **argv, int cmd)
         int open_flags = WIMLIB_OPEN_FLAG_WRITE_ACCESS;
         int write_flags = WIMLIB_WRITE_FLAG_REBUILD;
         int compression_type = WIMLIB_COMPRESSION_TYPE_INVALID;
+       uint32_t chunk_size = UINT32_MAX;
         int ret;
         WIMStruct *wim;
         const tchar *wimfile;
@@ -3280,6 +3314,11 @@ imagex_optimize(int argc, tchar **argv, int cmd)
                         if (ret)
                                 goto out_err;
                         break;
+               case IMAGEX_CHUNK_SIZE_OPTION:
+                       chunk_size = parse_chunk_size(optarg);
+                       if (chunk_size == UINT32_MAX)
+                               goto out_err;
+                       break;
                 case IMAGEX_THREADS_OPTION:
                         num_threads = parse_num_threads(optarg);
                         if (num_threads == UINT_MAX)
@@ -3308,11 +3347,19 @@ imagex_optimize(int argc, tchar **argv, int cmd)
                 goto out;
  
         if (compression_type != WIMLIB_COMPRESSION_TYPE_INVALID) {
+               /* Change compression type.  */
                 ret = wimlib_set_output_compression_type(wim, compression_type);
                 if (ret)
                         goto out_wimlib_free;
         }
  
+       if (chunk_size != UINT32_MAX) {
+               /* Change chunk size.  */
+               ret = wimlib_set_output_chunk_size(wim, chunk_size);
+               if (ret)
+                       goto out_wimlib_free;
+       }
+
         old_size = file_get_size(wimfile);
         tprintf(T("\"%"TS"\" original size: "), wimfile);
         if (old_size == -1)
diff --git a/src/extract.c b/src/extract.c

index 656a76c9487e9cdd509015c04c8d1727f0b83e11..4d5ba7e0b7cbfa852829d1a4762c015b72137cdd 100644 (file)
--- a/src/extract.c
+++ b/src/extract.c
@@ -1382,8 +1382,11 @@ read_error:
  static int
  skip_pwm_stream(struct wim_lookup_table_entry *lte)
  {
-       return read_partial_wim_resource(lte, wim_resource_size(lte),
-                                        NULL, NULL,
+       return read_partial_wim_resource(lte,
+                                        wim_resource_size(lte),
+                                        NULL,
+                                        wim_resource_chunk_size(lte),
+                                        NULL,
                                          WIMLIB_READ_RESOURCE_FLAG_SEEK_ONLY,
                                          0);
  }
diff --git a/src/header.c b/src/header.c

index 9492e7f24d268d03d14601fa4d2f419abb9239eb..e3f64493f979707ba74a75af1fccb81566e24179 100644 (file)
--- a/src/header.c
+++ b/src/header.c
@@ -125,14 +125,8 @@ read_wim_header(const tchar *filename, struct filedes *in_fd,
         }
  
         hdr->flags = le32_to_cpu(disk_hdr.wim_flags);
-       if (le32_to_cpu(disk_hdr.chunk_size) != WIM_CHUNK_SIZE &&
-           (hdr->flags & WIM_HDR_FLAG_COMPRESSION)) {
-               ERROR("\"%"TS"\": Unexpected chunk size of %u! Ask the author to "
-                     "implement support for other chunk sizes.",
-                     filename, le32_to_cpu(disk_hdr.chunk_size));
-               ERROR("(Or it might just be that the WIM header is invalid.)");
-               return WIMLIB_ERR_INVALID_CHUNK_SIZE;
-       }
+
+       hdr->chunk_size = le32_to_cpu(disk_hdr.chunk_size);
  
         memcpy(hdr->guid, disk_hdr.guid, WIM_GID_LEN);
  
@@ -188,8 +182,10 @@ write_wim_header_at_offset(const struct wim_header *hdr, struct filedes *out_fd,
         disk_hdr.hdr_size = cpu_to_le32(sizeof(struct wim_header_disk));
         disk_hdr.wim_version = cpu_to_le32(WIM_VERSION);
         disk_hdr.wim_flags = cpu_to_le32(hdr->flags);
-       disk_hdr.chunk_size = cpu_to_le32((hdr->flags & WIM_HDR_FLAG_COMPRESSION) ?
-                                               WIM_CHUNK_SIZE : 0);
+       if (hdr->flags & WIM_HDR_FLAG_COMPRESSION)
+               disk_hdr.chunk_size = cpu_to_le32(hdr->chunk_size);
+       else
+               disk_hdr.chunk_size = 0;
         memcpy(disk_hdr.guid, hdr->guid, WIM_GID_LEN);
  
         disk_hdr.part_number = cpu_to_le16(hdr->part_number);
@@ -249,7 +245,7 @@ get_wim_hdr_cflags(int ctype)
   * Initializes the header for a WIM file.
   */
  int
-init_wim_header(struct wim_header *hdr, int ctype)
+init_wim_header(struct wim_header *hdr, int ctype, u32 chunk_size)
  {
         memset(hdr, 0, sizeof(struct wim_header));
         hdr->flags = get_wim_hdr_cflags(ctype);
@@ -257,6 +253,7 @@ init_wim_header(struct wim_header *hdr, int ctype)
                 ERROR("Invalid compression type specified (%d)", ctype);
                 return WIMLIB_ERR_INVALID_COMPRESSION_TYPE;
         }
+       hdr->chunk_size = chunk_size;
         hdr->total_parts = 1;
         hdr->part_number = 1;
         randomize_byte_array(hdr->guid, sizeof(hdr->guid));
@@ -297,7 +294,7 @@ wimlib_print_header(const WIMStruct *wim)
                 if (hdr_flags[i].flag & hdr->flags)
                         tprintf(T("    WIM_HDR_FLAG_%s is set\n"), hdr_flags[i].name);
  
-       tprintf(T("Chunk Size                  = %u\n"), WIM_CHUNK_SIZE);
+       tprintf(T("Chunk Size                  = %u\n"), wim->hdr.chunk_size);
         tfputs (T("GUID                        = "), stdout);
         print_byte_field(hdr->guid, WIM_GID_LEN, stdout);
         tputchar(T('\n'));
diff --git a/src/integrity.c b/src/integrity.c

index b83b9c06279b642cbcd7e8631cc806d47a603eef..21bc2f151ff314dea0fbfe3f241b8ee8aed1c3e4 100644 (file)
--- a/src/integrity.c
+++ b/src/integrity.c
@@ -365,6 +365,7 @@ write_integrity_table(WIMStruct *wim,
                                              0,
                                              &wim->out_fd,
                                              WIMLIB_COMPRESSION_TYPE_NONE,
+                                            0,
                                              &wim->hdr.integrity,
                                              NULL,
                                              0,
diff --git a/src/lookup_table.c b/src/lookup_table.c

index 553cf33b1c413f4dbfa430291b8439b85abec869..118cd06558dac6a2cb57a1b5b35aafe42112c219 100644 (file)
--- a/src/lookup_table.c
+++ b/src/lookup_table.c
@@ -75,8 +75,10 @@ new_lookup_table_entry(void)
  
         lte = CALLOC(1, sizeof(struct wim_lookup_table_entry));
         if (lte) {
-               lte->part_number  = 1;
-               lte->refcnt       = 1;
+               lte->part_number = 1;
+               lte->refcnt = 1;
+               BUILD_BUG_ON(RESOURCE_NONEXISTENT != 0);
+               BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
         } else {
                 ERROR("Out of memory (tried to allocate %zu bytes for "
                       "lookup table entry)",
@@ -711,6 +713,7 @@ write_wim_lookup_table_from_stream_list(struct list_head *stream_list,
                                              WIM_RESHDR_FLAG_METADATA,
                                              out_fd,
                                              WIMLIB_COMPRESSION_TYPE_NONE,
+                                            0,
                                              out_res_entry,
                                              NULL,
                                              write_resource_flags,
diff --git a/src/lz77.c b/src/lz77.c

index b9a40174393460e36491ecd97428feeb6795abb9..887b874506857251b2bda6c92809c3d008777222 100644 (file)
--- a/src/lz77.c
+++ b/src/lz77.c
@@ -107,6 +107,9 @@ insert_string(input_idx_t hash_tab[], input_idx_t prev_tab[],
   * @params:            Parameters that affect how long the search will proceed
   *                             before going with the best that has been found
   *                             so far.
+ * @min_start_pos:     If the chain reaches a match starting before this
+ *                     position (including the end-of-chain 0), the search will
+ *                     be terminated.
   *
   * Returns the length of the match that was found.
   */
@@ -115,7 +118,8 @@ longest_match(const u8 window[], unsigned bytes_remaining,
               unsigned strstart, const input_idx_t prev_tab[],
               unsigned cur_match, unsigned prev_len,
               unsigned *match_start_ret,
-             const struct lz_params *params)
+             const struct lz_params *params,
+             unsigned min_start_pos)
  {
         unsigned chain_len = params->max_chain_len;
  
@@ -146,9 +150,8 @@ longest_match(const u8 window[], unsigned bytes_remaining,
                  * performance reasons.  Therefore uninitialized memory will be
                  * accessed, and conditional jumps will be made that depend on
                  * those values.  However the length of the match is limited to
-                * the lookahead, so the output of deflate is not affected by
-                * the uninitialized values.
-                */
+                * the lookahead, so the output of lz_analyze_block() is not
+                * affected by the uninitialized values.  */
  
                 if (match[best_len] != scan_end
                     || match[best_len - 1] != scan_end1
@@ -182,7 +185,7 @@ longest_match(const u8 window[], unsigned bytes_remaining,
                         scan_end1  = scan[best_len - 1];
                         scan_end   = scan[best_len];
                 }
-       } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) != 0);
+       } while (--chain_len != 0 && (cur_match = prev_tab[cur_match]) >= min_start_pos);
         *match_start_ret = match_start;
         return min(min(best_len, bytes_remaining), params->max_match);
  }
@@ -201,6 +204,7 @@ longest_match(const u8 window[], unsigned bytes_remaining,
   * @params:            Structure that contains parameters that affect how the
   *                             analysis proceeds (mainly how good the matches
   *                             have to be).
+ * @prev_tab:          Temporary space containing least @window_size elements.
   */
  void
  lz_analyze_block(const u8 window[],
@@ -208,7 +212,8 @@ lz_analyze_block(const u8 window[],
                  lz_record_match_t record_match,
                  lz_record_literal_t record_literal,
                  void *record_ctx,
-                const struct lz_params *params)
+                const struct lz_params *params,
+                input_idx_t prev_tab[])
  {
         unsigned cur_input_pos = 0;
         unsigned hash          = 0;
@@ -219,7 +224,7 @@ lz_analyze_block(const u8 window[],
         unsigned match_start   = 0;
         bool match_available = false;
         input_idx_t hash_tab[HASH_SIZE];
-       input_idx_t prev_tab[window_size];
+       unsigned min_start_pos = 1;
  
         ZERO_ARRAY(hash_tab);
  
@@ -245,7 +250,14 @@ lz_analyze_block(const u8 window[],
                 prev_start = match_start;
                 match_len = params->min_match - 1;
  
-               if (hash_head != 0 && prev_len < params->max_lazy_match) {
+               if (cur_input_pos > params->max_offset)
+                       min_start_pos = cur_input_pos - params->max_offset;
+               else
+                       min_start_pos = 1;
+
+               if (hash_head >= min_start_pos &&
+                   prev_len < params->max_lazy_match)
+               {
                         /* To simplify the code, we prevent matches with the
                          * string of window index 0 (in particular we have to
                          * avoid a match of the string with itself at the start
@@ -254,7 +266,8 @@ lz_analyze_block(const u8 window[],
                                                   window_size - cur_input_pos,
                                                   cur_input_pos, prev_tab,
                                                   hash_head, prev_len,
-                                                 &match_start, params);
+                                                 &match_start, params,
+                                                 min_start_pos);
  
                         if (match_len == params->min_match &&
                              cur_input_pos - match_start > params->too_far)
diff --git a/src/lzx-compress.c b/src/lzx-compress.c

index 9874a7b949ccb97ed80215baaa6cd751e81e9228..6e1a0d9c6cd873737afc179831411d3e93079c63 100644 (file)
--- a/src/lzx-compress.c
+++ b/src/lzx-compress.c
@@ -2174,6 +2174,7 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx)
                  * aren't worth choosing when using greedy or lazy parsing.  */
                 .min_match      = 3,
                 .max_match      = LZX_MAX_MATCH_LEN,
+               .max_offset     = 32768,
                 .good_match     = LZX_MAX_MATCH_LEN,
                 .nice_match     = LZX_MAX_MATCH_LEN,
                 .max_chain_len  = LZX_MAX_MATCH_LEN,
@@ -2187,12 +2188,16 @@ lzx_prepare_block_fast(struct lzx_compressor * ctx)
         record_ctx.matches = ctx->chosen_matches;
  
         /* Determine series of matches/literals to output.  */
-       lz_analyze_block(ctx->window,
-                        ctx->window_size,
-                        lzx_record_match,
-                        lzx_record_literal,
-                        &record_ctx,
-                        &lzx_lz_params);
+       {
+               input_idx_t prev_tab[ctx->window_size];
+               lz_analyze_block(ctx->window,
+                                ctx->window_size,
+                                lzx_record_match,
+                                lzx_record_literal,
+                                &record_ctx,
+                                &lzx_lz_params,
+                                prev_tab);
+       }
  
  
         /* Set up block specification.  */
diff --git a/src/metadata_resource.c b/src/metadata_resource.c

index d69ad82c15676beb7efc87ec649c6ee0aebe4557..5ee296642fb70651b5a9c609d8571099bb968d40 100644 (file)
--- a/src/metadata_resource.c
+++ b/src/metadata_resource.c
@@ -297,6 +297,7 @@ write_metadata_resource(WIMStruct *wim, int image, int write_resource_flags)
         ret = write_wim_resource_from_buffer(buf, len, WIM_RESHDR_FLAG_METADATA,
                                              &wim->out_fd,
                                              wim->out_compression_type,
+                                            wim->out_chunk_size,
                                              &imd->metadata_lte->output_resource_entry,
                                              imd->metadata_lte->hash,
                                              write_resource_flags,
diff --git a/src/ntfs-3g_capture.c b/src/ntfs-3g_capture.c

index c674cca9729db575d79da5183c4e61920f25d861..8d5d4c63b31aa8a863924f23c7c0bf93e1a29be8 100644 (file)
--- a/src/ntfs-3g_capture.c
+++ b/src/ntfs-3g_capture.c
@@ -78,6 +78,7 @@ int
  read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
                       u64 size,
                       consume_data_callback_t cb,
+                     u32 in_chunk_size,
                       void *ctx_or_buf,
                       int _ignored_flags)
  {
@@ -88,7 +89,9 @@ read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
         s64 pos;
         s64 bytes_remaining;
         void *out_buf;
+       bool out_buf_malloced;
         int ret;
+       size_t stack_max = 32768;
  
         ni = ntfs_pathname_to_inode(vol, NULL, loc->path);
         if (!ni) {
@@ -103,30 +106,44 @@ read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
                 goto out_close_ntfs_inode;
         }
  
-       if (cb)
-               out_buf = alloca(WIM_CHUNK_SIZE);
-       else
+       out_buf_malloced = false;
+       if (cb) {
+               if (in_chunk_size <= stack_max) {
+                       out_buf = alloca(in_chunk_size);
+               } else {
+                       out_buf = MALLOC(in_chunk_size);
+                       if (out_buf == NULL) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_close_ntfs_attr;
+                       }
+                       out_buf_malloced = true;
+               }
+       } else {
                 out_buf = ctx_or_buf;
+       }
         pos = (loc->is_reparse_point) ? 8 : 0;
         bytes_remaining = size;
         while (bytes_remaining) {
-               s64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
+               s64 to_read = min(bytes_remaining, in_chunk_size);
                 if (ntfs_attr_pread(na, pos, to_read, out_buf) != to_read) {
                         ERROR_WITH_ERRNO("Error reading \"%"TS"\"", loc->path);
                         ret = WIMLIB_ERR_NTFS_3G;
-                       goto out_close_ntfs_attr;
+                       goto out_free_memory;
                 }
                 pos += to_read;
                 bytes_remaining -= to_read;
                 if (cb) {
                         ret = cb(out_buf, to_read, ctx_or_buf);
                         if (ret)
-                               goto out_close_ntfs_attr;
+                               goto out_free_memory;
                 } else {
                         out_buf += to_read;
                 }
         }
         ret = 0;
+out_free_memory:
+       if (out_buf_malloced)
+               FREE(out_buf);
  out_close_ntfs_attr:
         ntfs_attr_close(na);
  out_close_ntfs_inode:
diff --git a/src/resource.c b/src/resource.c

index 001ea02431daf72f36d1ddedad32a8a5d50a24de..092669e16a7f278935c9819c4108d87e979d82cc 100644 (file)
--- a/src/resource.c
+++ b/src/resource.c
@@ -96,10 +96,15 @@ typedef int (*decompress_func_t)(const void *, unsigned, void *, unsigned);
  static decompress_func_t
  get_decompress_func(int ctype)
  {
-       if (ctype == WIMLIB_COMPRESSION_TYPE_LZX)
+       switch (ctype) {
+       case WIMLIB_COMPRESSION_TYPE_LZX:
                 return wimlib_lzx_decompress;
-       else
+       case WIMLIB_COMPRESSION_TYPE_XPRESS:
                 return wimlib_xpress_decompress;
+       default:
+               wimlib_assert(0);
+               return NULL;
+       }
  }
  
  /*
@@ -122,12 +127,19 @@ get_decompress_func(int ctype)
   *     stream and chunk headers.
   */
  static int
-read_compressed_resource(const struct wim_lookup_table_entry *lte,
-                        u64 size, consume_data_callback_t cb,
-                        void *ctx_or_buf, int flags, u64 offset)
+read_compressed_resource(const struct wim_lookup_table_entry * const lte,
+                        u64 size, const consume_data_callback_t cb,
+                        const u32 in_chunk_size, void * const ctx_or_buf,
+                        const int flags, const u64 offset)
  {
         int ret;
  
+       const u32 orig_chunk_size = wim_resource_chunk_size(lte);
+       const u32 orig_chunk_order = bsr32(orig_chunk_size);
+
+       wimlib_assert(is_power_of_2(orig_chunk_size));
+       wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
+
         /* Currently, reading raw compressed chunks is only guaranteed to work
          * correctly when the full resource is requested.  Furthermore, in such
          * cases the requested size is specified as the compressed size, but
@@ -136,6 +148,7 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
         if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
                 wimlib_assert(offset == 0);
                 wimlib_assert(size == lte->resource_entry.size);
+               wimlib_assert(wim_resource_chunk_size(lte) == in_chunk_size);
                 size = wim_resource_size(lte);
         }
  
@@ -145,35 +158,45 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
         if (size == 0)
                 return 0;
  
+       u64 *chunk_offsets = NULL;
+       u8 *out_buf = NULL;
+       u8 *tmp_buf = NULL;
+       void *compressed_buf = NULL;
+       bool chunk_offsets_malloced = false;
+       bool out_buf_malloced = false;
+       bool tmp_buf_malloced = false;
+       bool compressed_buf_malloced = false;
+       const size_t stack_max = 32768;
+
         /* Get the appropriate decompression function.  */
-       decompress_func_t decompress =
+       const decompress_func_t decompress =
                         get_decompress_func(wim_resource_compression_type(lte));
  
         /* Get the file descriptor for the WIM.  */
-       struct filedes *in_fd = &lte->wim->in_fd;
+       struct filedes * const in_fd = &lte->wim->in_fd;
  
         /* Calculate the number of chunks the resource is divided into.  */
-       u64 num_chunks = wim_resource_chunks(lte);
+       const u64 num_chunks = wim_resource_chunks(lte);
  
         /* Calculate the number of entries in the chunk table; it's one less
          * than the number of chunks, since the first chunk has no entry.  */
-       u64 num_chunk_entries = num_chunks - 1;
+       const u64 num_chunk_entries = num_chunks - 1;
  
         /* Calculate the 0-based index of the chunk at which the read starts.
          */
-       u64 start_chunk = offset / WIM_CHUNK_SIZE;
+       const u64 start_chunk = offset >> orig_chunk_order;
  
         /* Calculate the offset, within the start chunk, of the first byte of
          * the read.  */
-       u64 start_offset_in_chunk = offset % WIM_CHUNK_SIZE;
+       const u32 start_offset_in_chunk = offset & (orig_chunk_size - 1);
  
         /* Calculate the index of the chunk that contains the last byte of the
          * read.  */
-       u64 end_chunk = (offset + size - 1) / WIM_CHUNK_SIZE;
+       const u64 end_chunk = (offset + size - 1) >> orig_chunk_order;
  
         /* Calculate the offset, within the end chunk, of the last byte of the
          * read.  */
-       u64 end_offset_in_chunk = (offset + size - 1) % WIM_CHUNK_SIZE;
+       const u32 end_offset_in_chunk = (offset + size - 1) & (orig_chunk_size - 1);
  
         /* Calculate the number of chunk entries are actually needed to read the
          * requested part of the resource.  Include an entry for the first chunk
@@ -181,28 +204,25 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
          * account that if the last chunk required for the read is not the last
          * chunk of the resource, an extra chunk entry is needed so that the
          * compressed size of the last chunk of the read can be determined.  */
-       u64 num_alloc_chunk_entries = end_chunk - start_chunk + 1;
-       if (end_chunk != num_chunks - 1)
-               num_alloc_chunk_entries++;
+       const u64 num_alloc_chunk_entries = end_chunk - start_chunk +
+                                           1 + (end_chunk != num_chunks - 1);
  
         /* Set the size of each chunk table entry based on the resource's
          * uncompressed size.  */
-       u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
+       const u64 chunk_entry_size = (wim_resource_size(lte) > ((u64)1 << 32)) ? 8 : 4;
  
         /* Calculate the size, in bytes, of the full chunk table.  */
-       u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
+       const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
  
         /* Allocate a buffer to hold a subset of the chunk table.  It will only
          * contain offsets for the chunks that are actually needed for this
          * read.  For speed, allocate the buffer on the stack unless it's too
          * large.  */
-       u64 *chunk_offsets;
-       bool chunk_offsets_malloced;
-       if (num_alloc_chunk_entries < 1024) {
+       if (num_alloc_chunk_entries <= stack_max) {
                 chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
                 chunk_offsets_malloced = false;
         } else {
-               chunk_offsets = malloc(num_alloc_chunk_entries * sizeof(u64));
+               chunk_offsets = MALLOC(num_alloc_chunk_entries * sizeof(u64));
                 if (!chunk_offsets) {
                         ERROR("Failed to allocate chunk table "
                               "with %"PRIu64" entries", num_alloc_chunk_entries);
@@ -217,40 +237,38 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
                 chunk_offsets[0] = 0;
  
         /* Calculate the index of the first needed entry in the chunk table.  */
-       u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
+       const u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
  
         /* Calculate the number of entries that need to be read from the chunk
          * table.  */
-       u64 num_needed_chunk_entries = (start_chunk == 0) ?
+       const u64 num_needed_chunk_entries = (start_chunk == 0) ?
                                 num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
  
         /* Calculate the number of bytes of data that need to be read from the
          * chunk table.  */
-       size_t chunk_table_needed_size =
+       const size_t chunk_table_needed_size =
                                 num_needed_chunk_entries * chunk_entry_size;
         if ((u64)chunk_table_needed_size !=
             num_needed_chunk_entries * chunk_entry_size)
         {
                 ERROR("Compressed read request too large to fit into memory!");
                 ret = WIMLIB_ERR_NOMEM;
-               goto out_free_chunk_offsets;
+               goto out_free_memory;
         }
  
         /* Calculate the byte offset, in the WIM file, of the first chunk table
          * entry to read.  Take into account that if the WIM file is in the
          * special "pipable" format, then the chunk table is at the end of the
          * resource, not the beginning.  */
-       u64 file_offset_of_needed_chunk_entries =
-                       lte->resource_entry.offset + (start_table_idx *
-                                                     chunk_entry_size);
-       if (lte->is_pipable)
-               file_offset_of_needed_chunk_entries += lte->resource_entry.size -
-                                                      chunk_table_size;
+       const u64 file_offset_of_needed_chunk_entries =
+               lte->resource_entry.offset
+               + (start_table_idx * chunk_entry_size)
+               + (lte->is_pipable ? (lte->resource_entry.size - chunk_table_size) : 0);
  
         /* Read the needed chunk table entries into the end of the chunk_offsets
          * buffer.  */
-       void *chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
-                               chunk_table_needed_size;
+       void * const chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
+                                     chunk_table_needed_size;
         ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
                          file_offset_of_needed_chunk_entries);
         if (ret)
@@ -266,12 +284,9 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
         {
                 typedef le64 __attribute__((may_alias)) aliased_le64_t;
                 typedef le32 __attribute__((may_alias)) aliased_le32_t;
-               u64 *chunk_offsets_p = chunk_offsets;
+               u64 * const chunk_offsets_p = chunk_offsets + (start_chunk == 0);
                 u64 i;
  
-               if (start_chunk == 0)
-                       chunk_offsets_p++;
-
                 if (chunk_entry_size == 4) {
                         aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
                         for (i = 0; i < num_needed_chunk_entries; i++)
@@ -283,32 +298,74 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
                 }
         }
  
-       /* Calculate file offset of the first chunk that needs to be read.  N.B.
-        * if the resource is pipable, the entries in the chunk table do *not*
-        * include the chunk headers.  */
+       /* Calculate file offset of the first chunk that needs to be read.
+        * Note: if the resource is pipable, the entries in the chunk table do
+        * *not* include the chunk headers.  */
         u64 cur_read_offset = lte->resource_entry.offset + chunk_offsets[0];
         if (!lte->is_pipable)
                 cur_read_offset += chunk_table_size;
         else
-               cur_read_offset += start_chunk *
-                                  sizeof(struct pwm_chunk_hdr);
+               cur_read_offset += start_chunk * sizeof(struct pwm_chunk_hdr);
  
         /* If using a callback function, allocate a temporary buffer that will
          * be used to pass data to it.  If writing directly to a buffer instead,
          * arrange to write data directly into it.  */
-       u8 *out_p;
-       if (cb)
-               out_p = alloca(WIM_CHUNK_SIZE);
-       else
-               out_p = ctx_or_buf;
+       size_t out_buf_size;
+       u8 *out_buf_end, *out_p;
+       if (cb) {
+               out_buf_size = max(in_chunk_size, orig_chunk_size);
+               if (out_buf_size <= stack_max) {
+                       out_buf = alloca(out_buf_size);
+               } else {
+                       out_buf = MALLOC(out_buf_size);
+                       if (out_buf == NULL) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_free_memory;
+                       }
+                       out_buf_malloced = true;
+               }
+       } else {
+               out_buf_size = size;
+               out_buf = ctx_or_buf;
+       }
+       out_buf_end = out_buf + out_buf_size;
+       out_p = out_buf;
  
         /* Unless the raw compressed data was requested, allocate a temporary
          * buffer for reading compressed chunks, each of which can be at most
-        * WIM_CHUNK_SIZE - 1 bytes.  This excludes compressed chunks that are a
-        * full WIM_CHUNK_SIZE bytes, which are handled separately.  */
-       void *compressed_buf;
-       if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
-               compressed_buf = alloca(WIM_CHUNK_SIZE - 1);
+        * orig_chunk_size - 1 bytes.  This excludes compressed chunks that are
+        * a full orig_chunk_size bytes, which are actually stored uncompressed.
+        */
+       if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)) {
+               if (orig_chunk_size - 1 <= stack_max) {
+                       compressed_buf = alloca(orig_chunk_size - 1);
+               } else {
+                       compressed_buf = MALLOC(orig_chunk_size - 1);
+                       if (compressed_buf == NULL) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_free_memory;
+                       }
+                       compressed_buf_malloced = true;
+               }
+       }
+
+       /* Allocate yet another temporary buffer, this one for reading partial
+        * chunks.  */
+       if (start_offset_in_chunk != 0 ||
+           (end_offset_in_chunk != orig_chunk_size - 1 &&
+            offset + size != wim_resource_size(lte)))
+       {
+               if (orig_chunk_size <= stack_max) {
+                       tmp_buf = alloca(orig_chunk_size);
+               } else {
+                       tmp_buf = MALLOC(orig_chunk_size);
+                       if (tmp_buf == NULL) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_free_memory;
+                       }
+                       tmp_buf_malloced = true;
+               }
+       }
  
         /* Read, and possibly decompress, each needed chunk, either writing the
          * data directly into the @ctx_or_buf buffer or passing it to the @cb
@@ -321,15 +378,15 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
  
                 /* Calculate the sizes of the compressed chunk and of the
                  * uncompressed chunk.  */
-               unsigned compressed_chunk_size;
-               unsigned uncompressed_chunk_size;
+               u32 compressed_chunk_size;
+               u32 uncompressed_chunk_size;
                 if (i != num_chunks - 1) {
                         /* Not the last chunk.  Compressed size is given by
                          * difference of chunk table entries; uncompressed size
-                        * is always 32768 bytes.  */
+                        * is always the WIM chunk size.  */
                         compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
                                                 chunk_offsets[i - start_chunk];
-                       uncompressed_chunk_size = WIM_CHUNK_SIZE;
+                       uncompressed_chunk_size = orig_chunk_size;
                 } else {
                         /* Last chunk.  Compressed size is the remaining size in
                          * the compressed resource; uncompressed size is the
@@ -341,18 +398,18 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
                                 compressed_chunk_size -= num_chunks *
                                                          sizeof(struct pwm_chunk_hdr);
  
-                       if (wim_resource_size(lte) % WIM_CHUNK_SIZE == 0)
-                               uncompressed_chunk_size = WIM_CHUNK_SIZE;
+                       if ((wim_resource_size(lte) & (orig_chunk_size - 1)) == 0)
+                               uncompressed_chunk_size = orig_chunk_size;
                         else
-                               uncompressed_chunk_size = wim_resource_size(lte) %
-                                                         WIM_CHUNK_SIZE;
+                               uncompressed_chunk_size = wim_resource_size(lte) &
+                                                         (orig_chunk_size - 1);
                 }
  
                 /* Calculate how much of this chunk needs to be read.  */
  
-               unsigned partial_chunk_size;
-               u64 start_offset = 0;
-               u64 end_offset = WIM_CHUNK_SIZE - 1;
+               u32 partial_chunk_size;
+               u32 start_offset = 0;
+               u32 end_offset = orig_chunk_size - 1;
  
                 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
                         partial_chunk_size = compressed_chunk_size;
@@ -371,13 +428,14 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
                 {
                         /* Chunk stored uncompressed, or reading raw chunk data.  */
                         ret = full_pread(in_fd,
-                                        cb ? out_p + start_offset : out_p,
+                                        out_p,
                                          partial_chunk_size,
                                          cur_read_offset + start_offset);
                         if (ret)
                                 goto read_error;
                 } else {
                         /* Compressed chunk and not doing raw read.  */
+                       u8 *target;
  
                         /* Read the compressed data into compressed_buf.  */
                         ret = full_pread(in_fd,
@@ -387,64 +445,72 @@ read_compressed_resource(const struct wim_lookup_table_entry *lte,
                         if (ret)
                                 goto read_error;
  
-                       /* For partial chunks and when writing directly to a
-                        * buffer, we must buffer the uncompressed data because
-                        * we don't need all of it.  */
-                       if (partial_chunk_size != uncompressed_chunk_size &&
-                           cb == NULL)
-                       {
-                               u8 uncompressed_buf[uncompressed_chunk_size];
-
-                               ret = (*decompress)(compressed_buf,
-                                                   compressed_chunk_size,
-                                                   uncompressed_buf,
-                                                   uncompressed_chunk_size);
-                               if (ret) {
-                                       ERROR("Failed to decompress data.");
-                                       ret = WIMLIB_ERR_DECOMPRESSION;
-                                       errno = EINVAL;
-                                       goto out_free_chunk_offsets;
-                               }
-                               memcpy(out_p, uncompressed_buf + start_offset,
-                                      partial_chunk_size);
-                       } else {
-                               ret = (*decompress)(compressed_buf,
-                                                   compressed_chunk_size,
-                                                   out_p,
-                                                   uncompressed_chunk_size);
-                               if (ret) {
-                                       ERROR("Failed to decompress data.");
-                                       ret = WIMLIB_ERR_DECOMPRESSION;
-                                       errno = EINVAL;
-                                       goto out_free_chunk_offsets;
-                               }
+                       /* For partial chunks we must buffer the uncompressed
+                        * data because we don't need all of it.  */
+                       if (partial_chunk_size == uncompressed_chunk_size)
+                               target = out_p;
+                       else
+                               target = tmp_buf;
+
+                       /* Decompress the chunk.  */
+                       ret = (*decompress)(compressed_buf,
+                                           compressed_chunk_size,
+                                           target,
+                                           uncompressed_chunk_size);
+                       if (ret) {
+                               ERROR("Failed to decompress data.");
+                               ret = WIMLIB_ERR_DECOMPRESSION;
+                               errno = EINVAL;
+                               goto out_free_memory;
                         }
+                       if (partial_chunk_size != uncompressed_chunk_size)
+                               memcpy(out_p, tmp_buf + start_offset,
+                                      partial_chunk_size);
                 }
+
+               out_p += partial_chunk_size;
+
                 if (cb) {
                         /* Feed the data to the callback function.  */
-                       ret = cb(out_p + start_offset,
-                                partial_chunk_size, ctx_or_buf);
-                       if (ret)
-                               goto out_free_chunk_offsets;
-               } else {
-                       /* No callback function provided; we are writing
-                        * directly to a buffer.  Advance the pointer into this
-                        * buffer by the number of uncompressed bytes that were
-                        * written.  */
-                       out_p += partial_chunk_size;
+                       wimlib_assert(offset == 0);
+
+                       if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
+                               ret = cb(out_buf, out_p - out_buf, ctx_or_buf);
+                               if (ret)
+                                       goto out_free_memory;
+                               out_p = out_buf;
+
+                       } else if (i == end_chunk || out_p == out_buf_end) {
+                               size_t bytes_sent;
+                               const u8 *p;
+
+                               for (p = out_buf; p != out_p; p += bytes_sent) {
+                                       bytes_sent = min(in_chunk_size, out_p - p);
+                                       ret = cb(p, bytes_sent, ctx_or_buf);
+                                       if (ret)
+                                               goto out_free_memory;
+                               }
+                               out_p = out_buf;
+                       }
                 }
                 cur_read_offset += compressed_chunk_size;
         }
  
         ret = 0;
-out_free_chunk_offsets:
+out_free_memory:
         if (chunk_offsets_malloced)
                 FREE(chunk_offsets);
+       if (out_buf_malloced)
+               FREE(out_buf);
+       if (compressed_buf_malloced)
+               FREE(compressed_buf);
+       if (tmp_buf_malloced)
+               FREE(tmp_buf);
         return ret;
  
  read_error:
         ERROR_WITH_ERRNO("Error reading compressed file resource");
-       goto out_free_chunk_offsets;
+       goto out_free_memory;
  }
  
  /* Skip over the chunk table at the end of pipable, compressed resource being
@@ -473,13 +539,26 @@ skip_chunk_table(const struct wim_lookup_table_entry *lte,
  static int
  read_pipable_resource(const struct wim_lookup_table_entry *lte,
                       u64 size, consume_data_callback_t cb,
-                     void *ctx_or_buf, int flags, u64 offset)
+                     u32 in_chunk_size, void *ctx_or_buf,
+                     int flags, u64 offset)
  {
         struct filedes *in_fd;
         decompress_func_t decompress;
         int ret;
-       u8 chunk[WIM_CHUNK_SIZE];
-       u8 cchunk[WIM_CHUNK_SIZE - 1];
+       const u32 orig_chunk_size = wim_resource_chunk_size(lte);
+       u8 cchunk[orig_chunk_size - 1];
+
+       size_t out_buf_size;
+       u8 *out_buf, *out_buf_end, *out_p;
+       if (cb) {
+               out_buf_size = max(in_chunk_size, orig_chunk_size);
+               out_buf = alloca(out_buf_size);
+       } else {
+               out_buf_size = size;
+               out_buf = ctx_or_buf;
+       }
+       out_buf_end = out_buf + out_buf_size;
+       out_p = out_buf;
  
         /* Get pointers to appropriate decompression function and the input file
          * descriptor.  */
@@ -495,15 +574,13 @@ read_pipable_resource(const struct wim_lookup_table_entry *lte,
         wimlib_assert(size == wim_resource_size(lte));
         wimlib_assert(in_fd->offset == lte->resource_entry.offset);
  
-       for (offset = 0; offset < size; offset += WIM_CHUNK_SIZE) {
+       u32 chunk_usize;
+       for (offset = 0; offset < size; offset += chunk_usize) {
                 struct pwm_chunk_hdr chunk_hdr;
-               u32 chunk_size;
-               u32 cchunk_size;
-               u8 *res_chunk;
-               u32 res_chunk_size;
+               u32 chunk_csize;
  
                 /* Calculate uncompressed size of next chunk.  */
-               chunk_size = min(WIM_CHUNK_SIZE, size - offset);
+               chunk_usize = min(orig_chunk_size, size - offset);
  
                 /* Read the compressed size of the next chunk from the chunk
                  * header.  */
@@ -511,16 +588,16 @@ read_pipable_resource(const struct wim_lookup_table_entry *lte,
                 if (ret)
                         goto read_error;
  
-               cchunk_size = le32_to_cpu(chunk_hdr.compressed_size);
+               chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
  
-               if (cchunk_size > WIM_CHUNK_SIZE) {
+               if (chunk_csize > orig_chunk_size) {
                         errno = EINVAL;
                         ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
                         goto invalid;
                 }
  
                 /* Read chunk data.  */
-               ret = full_read(in_fd, cchunk, cchunk_size);
+               ret = full_read(in_fd, cchunk, chunk_csize);
                 if (ret)
                         goto read_error;
  
@@ -530,29 +607,34 @@ read_pipable_resource(const struct wim_lookup_table_entry *lte,
                 /* Decompress chunk if needed.  Uncompressed size same
                  * as compressed size means the chunk is uncompressed.
                  */
-               res_chunk_size = chunk_size;
-               if (cchunk_size == chunk_size) {
-                       res_chunk = cchunk;
+               if (chunk_csize == chunk_usize) {
+                       memcpy(out_p, cchunk, chunk_usize);
                 } else {
-                       ret = (*decompress)(cchunk, cchunk_size,
-                                           chunk, chunk_size);
+                       ret = (*decompress)(cchunk, chunk_csize,
+                                           out_p, chunk_usize);
                         if (ret) {
                                 errno = EINVAL;
                                 ret = WIMLIB_ERR_DECOMPRESSION;
                                 goto invalid;
                         }
-                       res_chunk = chunk;
                 }
+               out_p += chunk_usize;
  
                 /* Feed the uncompressed data into the callback function or copy
                  * it into the provided buffer.  */
-               if (cb) {
-                       ret = cb(res_chunk, res_chunk_size, ctx_or_buf);
-                       if (ret)
-                               return ret;
-               } else {
-                       ctx_or_buf = mempcpy(ctx_or_buf, res_chunk,
-                                            res_chunk_size);
+               if (cb && (out_p == out_buf_end ||
+                          offset + chunk_usize == size))
+               {
+                       size_t bytes_sent;
+                       const u8 *p;
+
+                       for (p = out_buf; p != out_p; p += bytes_sent) {
+                               bytes_sent = min(in_chunk_size, out_p - p);
+                               ret = cb(p, bytes_sent, ctx_or_buf);
+                               if (ret)
+                                       return ret;
+                       }
+                       out_p = out_buf;
                 }
         }
  
@@ -631,6 +713,7 @@ invalid:
  int
  read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
                           u64 size, consume_data_callback_t cb,
+                         u32 in_chunk_size,
                           void *ctx_or_buf, int flags, u64 offset)
  {
         struct filedes *in_fd;
@@ -640,6 +723,13 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
          * somewhere else.  */
         wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
  
+       /* If a callback was specified, in_chunk_size must be a power of 2 (and
+        * not 0).  */
+       wimlib_assert(cb == NULL || is_power_of_2(in_chunk_size));
+
+       /* If a callback was specified, offset must be zero.  */
+       wimlib_assert(cb == NULL || offset == 0);
+
         /* Retrieve input file descriptor for the WIM file.  */
         in_fd = &lte->wim->in_fd;
  
@@ -678,10 +768,9 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
                         }
                 } else if (cb) {
                         /* Send data to callback function */
-                       u8 buf[min(WIM_CHUNK_SIZE, size)];
+                       u8 buf[min(in_chunk_size, size)];
                         while (size) {
-                               size_t bytes_to_read = min(WIM_CHUNK_SIZE,
-                                                          size);
+                               size_t bytes_to_read = min(in_chunk_size, size);
                                 ret = full_pread(in_fd, buf, bytes_to_read,
                                                  offset);
                                 if (ret)
@@ -702,11 +791,13 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
         } else if (lte->is_pipable && !filedes_is_seekable(in_fd)) {
                 /* Reading compressed, pipable resource from pipe.  */
                 ret = read_pipable_resource(lte, size, cb,
+                                           in_chunk_size,
                                             ctx_or_buf, flags, offset);
         } else {
                 /* Reading compressed, possibly pipable resource from seekable
                  * file.  */
                 ret = read_compressed_resource(lte, size, cb,
+                                              in_chunk_size,
                                                ctx_or_buf, flags, offset);
         }
         goto out;
@@ -722,17 +813,19 @@ int
  read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
                                    size_t size, u64 offset, void *buf)
  {
-       return read_partial_wim_resource(lte, size, NULL, buf, 0, offset);
+       return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
  }
  
  static int
  read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
                          u64 size,
                          consume_data_callback_t cb,
+                        u32 in_chunk_size,
                          void *ctx_or_buf,
                          int flags)
  {
-       return read_partial_wim_resource(lte, size, cb, ctx_or_buf, flags, 0);
+       return read_partial_wim_resource(lte, size, cb, in_chunk_size,
+                                        ctx_or_buf, flags, 0);
  }
  
  
@@ -741,6 +834,7 @@ static int
  read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
                          u64 size,
                          consume_data_callback_t cb,
+                        u32 in_chunk_size,
                          void *ctx_or_buf,
                          int _ignored_flags)
  {
@@ -748,6 +842,9 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
         int ret;
         struct filedes fd;
         int raw_fd;
+       u8 *out_buf;
+       bool out_buf_malloced;
+       const size_t stack_max = 32768;
  
         DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"",
               size, lte->file_on_disk);
@@ -758,16 +855,27 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
                 return WIMLIB_ERR_OPEN;
         }
         filedes_init(&fd, raw_fd);
+       out_buf_malloced = false;
         if (cb) {
                 /* Send data to callback function */
-               u8 buf[min(WIM_CHUNK_SIZE, size)];
+               if (in_chunk_size <= stack_max) {
+                       out_buf = alloca(in_chunk_size);
+               } else {
+                       out_buf = MALLOC(in_chunk_size);
+                       if (out_buf == NULL) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_close;
+                       }
+                       out_buf_malloced = true;
+               }
+
                 size_t bytes_to_read;
                 while (size) {
-                       bytes_to_read = min(WIM_CHUNK_SIZE, size);
-                       ret = full_read(&fd, buf, bytes_to_read);
+                       bytes_to_read = min(in_chunk_size, size);
+                       ret = full_read(&fd, out_buf, bytes_to_read);
                         if (ret)
                                 goto read_error;
-                       ret = cb(buf, bytes_to_read, ctx_or_buf);
+                       ret = cb(out_buf, bytes_to_read, ctx_or_buf);
                         if (ret)
                                 goto out_close;
                         size -= bytes_to_read;
@@ -785,6 +893,8 @@ read_error:
         ERROR_WITH_ERRNO("Error reading \"%"TS"\"", filename);
  out_close:
         filedes_close(&fd);
+       if (out_buf_malloced)
+               FREE(out_buf);
         return ret;
  }
  #endif /* !__WIN32__ */
@@ -792,22 +902,23 @@ out_close:
  static int
  read_buffer_prefix(const struct wim_lookup_table_entry *lte,
                    u64 size, consume_data_callback_t cb,
+                  u32 in_chunk_size,
                    void *ctx_or_buf, int _ignored_flags)
  {
-       const void *inbuf = lte->attached_buffer;
-       int ret;
  
         if (cb) {
-               while (size) {
-                       size_t chunk_size = min(WIM_CHUNK_SIZE, size);
-                       ret = cb(inbuf, chunk_size, ctx_or_buf);
+               int ret;
+               u32 chunk_size;
+
+               for (u64 offset = 0; offset < size; offset += chunk_size) {
+                       chunk_size = min(in_chunk_size, size - offset);
+                       ret = cb((const u8*)lte->attached_buffer + offset,
+                                chunk_size, ctx_or_buf);
                         if (ret)
                                 return ret;
-                       size -= chunk_size;
-                       inbuf += chunk_size;
                 }
         } else {
-               memcpy(ctx_or_buf, inbuf, size);
+               memcpy(ctx_or_buf, lte->attached_buffer, size);
         }
         return 0;
  }
@@ -815,6 +926,7 @@ read_buffer_prefix(const struct wim_lookup_table_entry *lte,
  typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
                                               u64 size,
                                               consume_data_callback_t cb,
+                                             u32 in_chunk_size,
                                               void *ctx_or_buf,
                                               int flags);
  
@@ -837,8 +949,8 @@ typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entr
   */
  int
  read_resource_prefix(const struct wim_lookup_table_entry *lte,
-                    u64 size, consume_data_callback_t cb, void *ctx_or_buf,
-                    int flags)
+                    u64 size, consume_data_callback_t cb, u32 in_chunk_size,
+                    void *ctx_or_buf, int flags)
  {
         static const read_resource_prefix_handler_t handlers[] = {
                 [RESOURCE_IN_WIM]             = read_wim_resource_prefix,
@@ -860,14 +972,15 @@ read_resource_prefix(const struct wim_lookup_table_entry *lte,
         };
         wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
                       && handlers[lte->resource_location] != NULL);
-       return handlers[lte->resource_location](lte, size, cb, ctx_or_buf, flags);
+       wimlib_assert(cb == NULL || in_chunk_size > 0);
+       return handlers[lte->resource_location](lte, size, cb, in_chunk_size, ctx_or_buf, flags);
  }
  
  int
  read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
                             void *buf)
  {
-       return read_resource_prefix(lte, wim_resource_size(lte), NULL, buf, 0);
+       return read_resource_prefix(lte, wim_resource_size(lte), NULL, 0, buf, 0);
  }
  
  int
@@ -957,6 +1070,7 @@ extract_wim_resource(const struct wim_lookup_table_entry *lte,
                 sha1_init(&ctx.sha_ctx);
                 ret = read_resource_prefix(lte, size,
                                            extract_chunk_sha1_wrapper,
+                                          wim_resource_chunk_size(lte),
                                            &ctx, 0);
                 if (ret == 0) {
                         u8 hash[SHA1_HASH_SIZE];
@@ -975,6 +1089,7 @@ extract_wim_resource(const struct wim_lookup_table_entry *lte,
         } else {
                 /* Don't do SHA1 */
                 ret = read_resource_prefix(lte, size, extract_chunk,
+                                          wim_resource_chunk_size(lte),
                                            extract_chunk_arg, 0);
         }
         return ret;
@@ -1014,7 +1129,8 @@ sha1_resource(struct wim_lookup_table_entry *lte)
  
         sha1_init(&sha_ctx);
         ret = read_resource_prefix(lte, wim_resource_size(lte),
-                                  sha1_chunk, &sha_ctx, 0);
+                                  sha1_chunk, wim_resource_chunk_size(lte),
+                                  &sha_ctx, 0);
         if (ret == 0)
                 sha1_final(lte->hash, &sha_ctx);
         return ret;
diff --git a/src/util.c b/src/util.c

index fd14e1fedeb1c8200bd2e32ef28f40feffec662a..262fa1e1fa65d96c186d7753279beda6c72a6efa 100644 (file)
--- a/src/util.c
+++ b/src/util.c
@@ -303,8 +303,7 @@ static const tchar *error_strings[] = {
         [WIMLIB_ERR_INVALID_CAPTURE_CONFIG]
                 = T("The capture configuration string was invalid"),
         [WIMLIB_ERR_INVALID_CHUNK_SIZE]
-               = T("The WIM is compressed but does not have a chunk "
-                       "size of 32768"),
+               = T("The WIM chunk size was invalid"),
         [WIMLIB_ERR_INVALID_COMPRESSION_TYPE]
                 = T("The WIM is compressed, but is not marked as having LZX or "
                         "XPRESS compression"),
diff --git a/src/wim.c b/src/wim.c

index 2a6ac129d644eb4b437544171e117f25f81cc00a..63459751eff443c74ccb06e30c7ba1059573aff8 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -75,6 +75,62 @@ new_wim_struct(void)
         return wim;
  }
  
+static bool
+wim_chunk_size_valid(u32 chunk_size, int ctype)
+{
+       u32 order;
+
+       /* Chunk size is meaningless for uncompressed WIMs --- any value is
+        * okay.  */
+       if (ctype == WIMLIB_COMPRESSION_TYPE_NONE)
+               return true;
+
+       /* Chunk size must be power of 2.  */
+       if (chunk_size == 0)
+               return false;
+       order = bsr32(chunk_size);
+       if (chunk_size != 1U << order)
+               return false;
+
+       /* Order        Size
+        * =====        ====
+        * 15           32768
+        * 16           65536
+        * 17           131072
+        * 18           262144
+        * 19           524288
+        * 20           1048576
+        * 21           2097152
+        * 22           4194304
+        * 23           8388608
+        * 24           16777216
+        * 25           33554432
+        * 26           67108864
+        */
+       switch (ctype) {
+       case WIMLIB_COMPRESSION_TYPE_LZX:
+               /* TODO: Allow other chunk sizes when supported by the LZX
+                * compressor and decompressor.  */
+               return order == 15;
+
+       case WIMLIB_COMPRESSION_TYPE_XPRESS:
+               /* WIMGAPI (Windows 7) didn't seem to support XPRESS chunk size
+                * below 32768 bytes, but larger power-of-two sizes appear to be
+                * supported.  67108864 was the largest size that worked.
+                * (Note, however, that the offsets of XPRESS matches are still
+                * limited to 65535 bytes even when a much larger chunk size is
+                * used!)  */
+               return order >= 15 && order <= 26;
+       }
+       return false;
+}
+
+static u32
+wim_default_chunk_size(int ctype)
+{
+       return 32768;
+}
+
  /*
   * Calls a function on images in the WIM.  If @image is WIMLIB_ALL_IMAGES, @visitor
   * is called on the WIM once for each image, with each image selected as the
@@ -127,7 +183,7 @@ wimlib_create_new_wim(int ctype, WIMStruct **wim_ret)
         if (!wim)
                 return WIMLIB_ERR_NOMEM;
  
-       ret = init_wim_header(&wim->hdr, ctype);
+       ret = init_wim_header(&wim->hdr, ctype, wim_default_chunk_size(ctype));
         if (ret != 0)
                 goto out_free;
  
@@ -140,6 +196,8 @@ wimlib_create_new_wim(int ctype, WIMStruct **wim_ret)
         wim->refcnts_ok = 1;
         wim->compression_type = ctype;
         wim->out_compression_type = ctype;
+       wim->chunk_size = wim->hdr.chunk_size;
+       wim->out_chunk_size = wim->hdr.chunk_size;
         *wim_ret = wim;
         return 0;
  out_free:
@@ -294,7 +352,7 @@ wimlib_get_wim_info(WIMStruct *wim, struct wimlib_wim_info *info)
         info->image_count = wim->hdr.image_count;
         info->boot_index = wim->hdr.boot_idx;
         info->wim_version = WIM_VERSION;
-       info->chunk_size = WIM_CHUNK_SIZE;
+       info->chunk_size = wim->hdr.chunk_size;
         info->part_number = wim->hdr.part_number;
         info->total_parts = wim->hdr.total_parts;
         info->compression_type = wim->compression_type;
@@ -357,6 +415,50 @@ wimlib_set_wim_info(WIMStruct *wim, const struct wimlib_wim_info *info, int whic
         return 0;
  }
  
+/* API function documented in wimlib.h  */
+WIMLIBAPI int
+wimlib_set_output_compression_type(WIMStruct *wim, int ctype)
+{
+       switch (ctype) {
+       case WIMLIB_COMPRESSION_TYPE_INVALID:
+               break;
+       case WIMLIB_COMPRESSION_TYPE_NONE:
+       case WIMLIB_COMPRESSION_TYPE_LZX:
+       case WIMLIB_COMPRESSION_TYPE_XPRESS:
+               wim->out_compression_type = ctype;
+
+               /* Reset the chunk size if it's no longer valid.  */
+               if (!wim_chunk_size_valid(wim->out_chunk_size,
+                                         wim->out_compression_type))
+                       wim->out_chunk_size = wim_default_chunk_size(wim->out_compression_type);
+               return 0;
+       }
+       return WIMLIB_ERR_INVALID_PARAM;
+}
+
+/* API function documented in wimlib.h  */
+WIMLIBAPI int
+wimlib_set_output_chunk_size(WIMStruct *wim, uint32_t chunk_size)
+{
+       if (!wim_chunk_size_valid(chunk_size, wim->out_compression_type)) {
+               ERROR("Invalid chunk size (%"PRIu32" bytes) "
+                     "for compression type %"TS"!",
+                     chunk_size,
+                     wimlib_get_compression_type_string(wim->out_compression_type));
+               switch (wim->out_compression_type) {
+               case WIMLIB_COMPRESSION_TYPE_XPRESS:
+                       ERROR("Valid chunk sizes for XPRESS are 32768, 65536, 131072, ..., 67108864.");
+                       break;
+               case WIMLIB_COMPRESSION_TYPE_LZX:
+                       ERROR("Valid chunk sizes for XPRESS are 65536.");
+                       break;
+               }
+               return WIMLIB_ERR_INVALID_CHUNK_SIZE;
+       }
+       wim->out_chunk_size = chunk_size;
+       return 0;
+}
+
  static int
  do_open_wim(const tchar *filename, struct filedes *fd_ret)
  {
@@ -485,6 +587,16 @@ begin_read(WIMStruct *wim, const void *wim_filename_or_fd,
         }
         wim->out_compression_type = wim->compression_type;
  
+       /* Check and cache the chunk size.  */
+       wim->chunk_size = wim->out_chunk_size = wim->hdr.chunk_size;
+       if (!wim_chunk_size_valid(wim->chunk_size, wim->compression_type)) {
+               ERROR("Invalid chunk size (%"PRIu32" bytes) "
+                     "for compression type %"TS"!",
+                     wim->chunk_size,
+                     wimlib_get_compression_type_string(wim->compression_type));
+               return WIMLIB_ERR_INVALID_CHUNK_SIZE;
+       }
+
         if (open_flags & WIMLIB_OPEN_FLAG_CHECK_INTEGRITY) {
                 ret = check_wim_integrity(wim, progress_func);
                 if (ret == WIM_INTEGRITY_NONEXISTENT) {
diff --git a/src/win32_capture.c b/src/win32_capture.c

index 308c0677ccb90b02c0a3b6f7eb31a51e7bc9558a..f940179cd58f2aed786ed4637d5985ff5bb4050b 100644 (file)
--- a/src/win32_capture.c
+++ b/src/win32_capture.c
@@ -58,12 +58,15 @@ int
  read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
                        u64 size,
                        consume_data_callback_t cb,
+                      u32 in_chunk_size,
                        void *ctx_or_buf,
                        int _ignored_flags)
  {
         int ret = 0;
         void *out_buf;
+       bool out_buf_malloced;
         u64 bytes_remaining;
+       const size_t stack_max = 32768;
  
         HANDLE hFile = win32_open_existing_file(lte->file_on_disk,
                                                 FILE_READ_DATA);
@@ -73,16 +76,27 @@ read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
                 return WIMLIB_ERR_OPEN;
         }
  
-       if (cb)
-               out_buf = alloca(WIM_CHUNK_SIZE);
-       else
+       out_buf_malloced = false;
+       if (cb) {
+               if (in_chunk_size <= stack_max) {
+                       out_buf = alloca(in_chunk_size);
+               } else {
+                       out_buf = MALLOC(in_chunk_size);
+                       if (out_buf == NULL) {
+                               ret = WIMLIB_ERR_NOMEM;
+                               goto out_close_handle;
+                       }
+                       out_buf_malloced = true;
+               }
+       } else {
                 out_buf = ctx_or_buf;
+       }
  
         bytes_remaining = size;
         while (bytes_remaining) {
                 DWORD bytesToRead, bytesRead;
  
-               bytesToRead = min(WIM_CHUNK_SIZE, bytes_remaining);
+               bytesToRead = min(in_chunk_size, bytes_remaining);
                 if (!ReadFile(hFile, out_buf, bytesToRead, &bytesRead, NULL) ||
                     bytesRead != bytesToRead)
                 {
@@ -101,6 +115,9 @@ read_win32_file_prefix(const struct wim_lookup_table_entry *lte,
                         out_buf += bytesRead;
                 }
         }
+       if (out_buf_malloced)
+               FREE(out_buf);
+out_close_handle:
         CloseHandle(hFile);
         return ret;
  }
@@ -112,6 +129,7 @@ struct win32_encrypted_read_ctx {
         void *buf;
         size_t buf_filled;
         u64 bytes_remaining;
+       u32 in_chunk_size;
  };
  
  static DWORD WINAPI
@@ -119,6 +137,7 @@ win32_encrypted_export_cb(unsigned char *_data, void *_ctx, unsigned long len)
  {
         const void *data = _data;
         struct win32_encrypted_read_ctx *ctx = _ctx;
+       u32 in_chunk_size = ctx->in_chunk_size;
         int ret;
  
         DEBUG("len = %lu", len);
@@ -130,7 +149,7 @@ win32_encrypted_export_cb(unsigned char *_data, void *_ctx, unsigned long len)
                                              len);
                 while (bytes_to_buffer) {
                         size_t bytes_to_copy_to_buf =
-                               min(bytes_to_buffer, WIM_CHUNK_SIZE - ctx->buf_filled);
+                               min(bytes_to_buffer, in_chunk_size - ctx->buf_filled);
  
                         memcpy(ctx->buf + ctx->buf_filled, data,
                                bytes_to_copy_to_buf);
@@ -138,7 +157,7 @@ win32_encrypted_export_cb(unsigned char *_data, void *_ctx, unsigned long len)
                         data += bytes_to_copy_to_buf;
                         bytes_to_buffer -= bytes_to_copy_to_buf;
  
-                       if (ctx->buf_filled == WIM_CHUNK_SIZE ||
+                       if (ctx->buf_filled == in_chunk_size ||
                             ctx->buf_filled == ctx->bytes_remaining)
                         {
                                 ret = (*ctx->read_prefix_cb)(ctx->buf,
@@ -168,6 +187,7 @@ int
  read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
                                  u64 size,
                                  consume_data_callback_t cb,
+                                u32 in_chunk_size,
                                  void *ctx_or_buf,
                                  int _ignored_flags)
  {
@@ -183,7 +203,7 @@ read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
         export_ctx.read_prefix_ctx_or_buf = ctx_or_buf;
         export_ctx.wimlib_err_code = 0;
         if (cb) {
-               export_ctx.buf = MALLOC(WIM_CHUNK_SIZE);
+               export_ctx.buf = MALLOC(in_chunk_size);
                 if (!export_ctx.buf)
                         return WIMLIB_ERR_NOMEM;
         } else {
diff --git a/src/write.c b/src/write.c

index a127a99b486881f7cfa77c50d01e9929476a5292..8554d1ab573fc23af05533c2f9f1cde750e0914e 100644 (file)
--- a/src/write.c
+++ b/src/write.c
@@ -68,6 +68,30 @@
  #  include <sys/uio.h> /* for `struct iovec' */
  #endif
  
+/* Return true if the specified resource is compressed and the compressed data
+ * can be reused with the specified output parameters.  */
+static bool
+can_raw_copy(const struct wim_lookup_table_entry *lte,
+            int write_resource_flags, int out_ctype, u32 out_chunk_size)
+{
+       return (out_ctype == wim_resource_compression_type(lte)
+               && out_chunk_size == wim_resource_chunk_size(lte)
+               && out_ctype != WIMLIB_COMPRESSION_TYPE_NONE);
+}
+
+
+/* Return true if the specified resource must be recompressed when the specified
+ * output parameters are used.  */
+static bool
+must_compress_stream(const struct wim_lookup_table_entry *lte,
+                    int write_resource_flags, int out_ctype, u32 out_chunk_size)
+{
+       return (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
+               && ((write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS)
+                   || !can_raw_copy(lte, write_resource_flags,
+                                    out_ctype, out_chunk_size)));
+}
+
  static unsigned
  compress_chunk(const void * uncompressed_data,
                unsigned uncompressed_len,
@@ -115,6 +139,7 @@ struct chunk_table {
  static int
  begin_wim_resource_chunk_tab(const struct wim_lookup_table_entry *lte,
                              struct filedes *out_fd,
+                            u32 out_chunk_size,
                              struct chunk_table **chunk_tab_ret,
                              int resource_flags)
  {
@@ -126,7 +151,7 @@ begin_wim_resource_chunk_tab(const struct wim_lookup_table_entry *lte,
         int ret;
  
         size = wim_resource_size(lte);
-       num_chunks = wim_resource_chunks(lte);
+       num_chunks = DIV_ROUND_UP(size, out_chunk_size);
         bytes_per_chunk_entry = (size > (1ULL << 32)) ? 8 : 4;
         alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
         chunk_tab = CALLOC(1, alloc_size);
@@ -273,6 +298,7 @@ finalize_and_check_sha1(SHA_CTX *sha_ctx, struct wim_lookup_table_entry *lte)
  
  struct write_resource_ctx {
         int out_ctype;
+       u32 out_chunk_size;
         struct wimlib_lzx_context *comp_ctx;
         struct chunk_table *chunk_tab;
         struct filedes *out_fd;
@@ -288,6 +314,10 @@ write_resource_cb(const void *chunk, size_t chunk_size, void *_ctx)
         const void *out_chunk;
         unsigned out_chunk_size;
         int ret;
+       void *compressed_chunk = NULL;
+       unsigned compressed_size;
+       bool compressed_chunk_malloced = false;
+       size_t stack_max = 32768;
  
         if (ctx->doing_sha)
                 sha1_update(&ctx->sha_ctx, chunk, chunk_size);
@@ -295,11 +325,16 @@ write_resource_cb(const void *chunk, size_t chunk_size, void *_ctx)
         out_chunk = chunk;
         out_chunk_size = chunk_size;
         if (ctx->out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
-               void *compressed_chunk;
-               unsigned compressed_size;
  
                 /* Compress the chunk.  */
-               compressed_chunk = alloca(chunk_size);
+               if (chunk_size <= stack_max) {
+                       compressed_chunk = alloca(chunk_size);
+               } else {
+                       compressed_chunk = MALLOC(chunk_size);
+                       if (compressed_chunk == NULL)
+                               return WIMLIB_ERR_NOMEM;
+                       compressed_chunk_malloced = true;
+               }
  
                 compressed_size = compress_chunk(chunk, chunk_size,
                                                  compressed_chunk,
@@ -335,11 +370,15 @@ write_resource_cb(const void *chunk, size_t chunk_size, void *_ctx)
         ret = full_write(ctx->out_fd, out_chunk, out_chunk_size);
         if (ret)
                 goto error;
-       return 0;
+
+out_free_memory:
+       if (compressed_chunk_malloced)
+               FREE(compressed_chunk);
+       return ret;
  
  error:
         ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
-       return ret;
+       goto out_free_memory;
  }
  
  /*
@@ -381,12 +420,14 @@ error:
  int
  write_wim_resource(struct wim_lookup_table_entry *lte,
                    struct filedes *out_fd, int out_ctype,
+                  u32 out_chunk_size,
                    struct resource_entry *out_res_entry,
                    int resource_flags,
                    struct wimlib_lzx_context **comp_ctx)
  {
         struct write_resource_ctx write_ctx;
         off_t res_start_offset;
+       u32 in_chunk_size;
         u64 read_size;
         int ret;
  
@@ -402,11 +443,7 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
          * desired other than no compression, we can simply copy the compressed
          * data without recompressing it.  This also means we must skip
          * calculating the SHA1, as we never will see the uncompressed data.  */
-       if (lte->resource_location == RESOURCE_IN_WIM &&
-           out_ctype == wim_resource_compression_type(lte) &&
-           out_ctype != WIMLIB_COMPRESSION_TYPE_NONE &&
-           !(resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS))
-       {
+       if (can_raw_copy(lte, resource_flags, out_ctype, out_chunk_size)) {
                 /* Normally we can request a RAW_FULL read, but if we're reading
                  * from a pipable resource and writing a non-pipable resource or
                  * vice versa, then a RAW_CHUNKS read needs to be requested so
@@ -418,22 +455,23 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
                         resource_flags |= WIMLIB_READ_RESOURCE_FLAG_RAW_FULL;
                 else
                         resource_flags |= WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS;
-               write_ctx.doing_sha = false;
                 read_size = lte->resource_entry.size;
+               write_ctx.doing_sha = false;
         } else {
                 write_ctx.doing_sha = true;
                 sha1_init(&write_ctx.sha_ctx);
                 read_size = lte->resource_entry.original_size;
         }
  
-
         /* If the output resource is to be compressed, initialize the chunk
          * table and set the function to use for chunk compression.  Exceptions:
          * no compression function is needed if doing a raw copy; also, no chunk
          * table is needed if doing a *full* (not per-chunk) raw copy.  */
         write_ctx.out_ctype = WIMLIB_COMPRESSION_TYPE_NONE;
+       write_ctx.out_chunk_size = out_chunk_size;
         write_ctx.chunk_tab = NULL;
         if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
+               wimlib_assert(out_chunk_size > 0);
                 if (!(resource_flags & WIMLIB_READ_RESOURCE_FLAG_RAW)) {
                         write_ctx.out_ctype = out_ctype;
                         if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) {
@@ -445,6 +483,7 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
                 }
                 if (!(resource_flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL)) {
                         ret = begin_wim_resource_chunk_tab(lte, out_fd,
+                                                          out_chunk_size,
                                                            &write_ctx.chunk_tab,
                                                            resource_flags);
                         if (ret)
@@ -469,8 +508,13 @@ write_wim_resource(struct wim_lookup_table_entry *lte,
         write_ctx.out_fd = out_fd;
         write_ctx.resource_flags = resource_flags;
  try_write_again:
+       if (write_ctx.out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
+               in_chunk_size = wim_resource_chunk_size(lte);
+       else
+               in_chunk_size = out_chunk_size;
         ret = read_resource_prefix(lte, read_size,
-                                  write_resource_cb, &write_ctx, resource_flags);
+                                  write_resource_cb,
+                                  in_chunk_size, &write_ctx, resource_flags);
         if (ret)
                 goto out_free_chunk_tab;
  
@@ -553,6 +597,7 @@ int
  write_wim_resource_from_buffer(const void *buf, size_t buf_size,
                                int reshdr_flags, struct filedes *out_fd,
                                int out_ctype,
+                              u32 out_chunk_size,
                                struct resource_entry *out_res_entry,
                                u8 *hash_ret, int write_resource_flags,
                                struct wimlib_lzx_context **comp_ctx)
@@ -566,6 +611,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size,
         lte.attached_buffer              = (void*)buf;
         lte.resource_entry.original_size = buf_size;
         lte.resource_entry.flags         = reshdr_flags;
+       lte.compression_type             = WIMLIB_COMPRESSION_TYPE_NONE;
  
         if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) {
                 sha1_buffer(buf, buf_size, lte.hash);
@@ -574,8 +620,8 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size,
                 lte.unhashed = 1;
         }
  
-       ret = write_wim_resource(&lte, out_fd, out_ctype, out_res_entry,
-                                write_resource_flags, comp_ctx);
+       ret = write_wim_resource(&lte, out_fd, out_ctype, out_chunk_size,
+                                out_res_entry, write_resource_flags, comp_ctx);
         if (ret)
                 return ret;
         if (hash_ret)
@@ -684,6 +730,7 @@ struct compressor_thread_params {
  
  struct message {
         struct wim_lookup_table_entry *lte;
+       u32 out_chunk_size;
         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
@@ -802,6 +849,7 @@ do_write_streams_progress(struct write_streams_progress_data *progress_data,
  struct serial_write_stream_ctx {
         struct filedes *out_fd;
         int out_ctype;
+       u32 out_chunk_size;
         struct wimlib_lzx_context **comp_ctx;
         int write_resource_flags;
  };
@@ -811,7 +859,9 @@ serial_write_stream(struct wim_lookup_table_entry *lte, void *_ctx)
  {
         struct serial_write_stream_ctx *ctx = _ctx;
         return write_wim_resource(lte, ctx->out_fd,
-                                 ctx->out_ctype, &lte->output_resource_entry,
+                                 ctx->out_ctype,
+                                 ctx->out_chunk_size,
+                                 &lte->output_resource_entry,
                                   ctx->write_resource_flags,
                                   ctx->comp_ctx);
  }
@@ -911,6 +961,7 @@ do_write_stream_list_serial(struct list_head *stream_list,
                             struct wim_lookup_table *lookup_table,
                             struct filedes *out_fd,
                             int out_ctype,
+                           u32 out_chunk_size,
                             struct wimlib_lzx_context **comp_ctx,
                             int write_resource_flags,
                             struct write_streams_progress_data *progress_data)
@@ -918,6 +969,7 @@ do_write_stream_list_serial(struct list_head *stream_list,
         struct serial_write_stream_ctx ctx = {
                 .out_fd = out_fd,
                 .out_ctype = out_ctype,
+               .out_chunk_size = out_chunk_size,
                 .write_resource_flags = write_resource_flags,
                 .comp_ctx = comp_ctx,
         };
@@ -945,6 +997,7 @@ write_stream_list_serial(struct list_head *stream_list,
                          struct wim_lookup_table *lookup_table,
                          struct filedes *out_fd,
                          int out_ctype,
+                        u32 out_chunk_size,
                          struct wimlib_lzx_context **comp_ctx,
                          int write_resource_flags,
                          struct write_streams_progress_data *progress_data)
@@ -961,6 +1014,7 @@ write_stream_list_serial(struct list_head *stream_list,
                                            lookup_table,
                                            out_fd,
                                            out_ctype,
+                                          out_chunk_size,
                                            comp_ctx,
                                            write_resource_flags,
                                            progress_data);
@@ -1011,6 +1065,7 @@ struct main_writer_thread_ctx {
         struct filedes *out_fd;
         off_t res_start_offset;
         int out_ctype;
+       u32 out_chunk_size;
         struct wimlib_lzx_context **comp_ctx;
         int write_resource_flags;
         struct shared_queue *res_to_compress_queue;
@@ -1034,11 +1089,12 @@ struct main_writer_thread_ctx {
  };
  
  static int
-init_message(struct message *msg)
+init_message(struct message *msg, u32 out_chunk_size)
  {
+       msg->out_chunk_size = out_chunk_size;
         for (size_t i = 0; i < MAX_CHUNKS_PER_MSG; i++) {
-               msg->compressed_chunks[i] = MALLOC(WIM_CHUNK_SIZE);
-               msg->uncompressed_chunks[i] = MALLOC(WIM_CHUNK_SIZE);
+               msg->compressed_chunks[i] = MALLOC(out_chunk_size);
+               msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
                 if (msg->compressed_chunks[i] == NULL ||
                     msg->uncompressed_chunks[i] == NULL)
                         return WIMLIB_ERR_NOMEM;
@@ -1066,7 +1122,7 @@ free_messages(struct message *msgs, size_t num_messages)
  }
  
  static struct message *
-allocate_messages(size_t num_messages)
+allocate_messages(size_t num_messages, u32 out_chunk_size)
  {
         struct message *msgs;
  
@@ -1074,7 +1130,7 @@ allocate_messages(size_t num_messages)
         if (!msgs)
                 return NULL;
         for (size_t i = 0; i < num_messages; i++) {
-               if (init_message(&msgs[i])) {
+               if (init_message(&msgs[i], out_chunk_size)) {
                         free_messages(msgs, num_messages);
                         return NULL;
                 }
@@ -1096,7 +1152,7 @@ main_writer_thread_init_ctx(struct main_writer_thread_ctx *ctx)
  {
         /* Pre-allocate all the buffers that will be needed to do the chunk
          * compression. */
-       ctx->msgs = allocate_messages(ctx->num_messages);
+       ctx->msgs = allocate_messages(ctx->num_messages, ctx->out_chunk_size);
         if (!ctx->msgs)
                 return WIMLIB_ERR_NOMEM;
  
@@ -1182,6 +1238,7 @@ receive_compressed_chunks(struct main_writer_thread_ctx *ctx)
                          * it if needed.  */
                         ret = begin_wim_resource_chunk_tab(cur_lte,
                                                            ctx->out_fd,
+                                                          ctx->out_chunk_size,
                                                            &ctx->cur_chunk_tab,
                                                            ctx->write_resource_flags);
                         if (ret)
@@ -1232,6 +1289,7 @@ receive_compressed_chunks(struct main_writer_thread_ctx *ctx)
                                 ret = write_wim_resource(cur_lte,
                                                          ctx->out_fd,
                                                          WIMLIB_COMPRESSION_TYPE_NONE,
+                                                        0,
                                                          &cur_lte->output_resource_entry,
                                                          ctx->write_resource_flags,
                                                          ctx->comp_ctx);
@@ -1273,6 +1331,7 @@ receive_compressed_chunks(struct main_writer_thread_ctx *ctx)
                                                                   ctx->lookup_table,
                                                                   ctx->out_fd,
                                                                   ctx->out_ctype,
+                                                                 ctx->out_chunk_size,
                                                                   ctx->comp_ctx,
                                                                   ctx->write_resource_flags,
                                                                   ctx->progress_data);
@@ -1372,6 +1431,7 @@ main_writer_thread_finish(void *_ctx)
                                            ctx->lookup_table,
                                            ctx->out_fd,
                                            ctx->out_ctype,
+                                          ctx->out_chunk_size,
                                            ctx->comp_ctx,
                                            ctx->write_resource_flags,
                                            ctx->progress_data);
@@ -1389,12 +1449,14 @@ submit_stream_for_compression(struct wim_lookup_table_entry *lte,
          * when @lte is already hashed. */
         sha1_init(&ctx->next_sha_ctx);
         ctx->next_chunk = 0;
-       ctx->next_num_chunks = wim_resource_chunks(lte);
+       ctx->next_num_chunks = DIV_ROUND_UP(wim_resource_size(lte),
+                                           ctx->out_chunk_size);
         ctx->next_lte = lte;
         INIT_LIST_HEAD(&lte->msg_list);
         list_add_tail(&lte->being_compressed_list, &ctx->outstanding_streams);
         ret = read_resource_prefix(lte, wim_resource_size(lte),
-                                  main_writer_thread_cb, ctx, 0);
+                                  main_writer_thread_cb,
+                                  ctx->out_chunk_size, ctx, 0);
         if (ret)
                 return ret;
         wimlib_assert(ctx->next_chunk == ctx->next_num_chunks);
@@ -1408,10 +1470,8 @@ main_thread_process_next_stream(struct wim_lookup_table_entry *lte, void *_ctx)
         int ret;
  
         if (wim_resource_size(lte) < 1000 ||
-           ctx->out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
-           (lte->resource_location == RESOURCE_IN_WIM &&
-            !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS) &&
-            lte->wim->compression_type == ctx->out_ctype))
+           !must_compress_stream(lte, ctx->write_resource_flags,
+                                 ctx->out_ctype, ctx->out_chunk_size))
         {
                 /* Stream is too small or isn't being compressed.  Process it by
                  * the main thread when we have a chance.  We can't necessarily
@@ -1443,15 +1503,15 @@ get_default_num_threads(void)
   * create the number of threads requested.
   *
   * High level description of the algorithm for writing compressed streams in
- * parallel:  We perform compression on chunks of size WIM_CHUNK_SIZE bytes
- * rather than on full files.  The currently executing thread becomes the main
- * thread and is entirely in charge of reading the data to compress (which may
- * be in any location understood by the resource code--- such as in an external
- * file being captured, or in another WIM file from which an image is being
- * exported) and actually writing the compressed data to the output file.
- * Additional threads are "compressor threads" and all execute the
- * compressor_thread_proc, where they repeatedly retrieve buffers of data from
- * the main thread, compress them, and hand them back to the main thread.
+ * parallel:  We perform compression on chunks rather than on full files.  The
+ * currently executing thread becomes the main thread and is entirely in charge
+ * of reading the data to compress (which may be in any location understood by
+ * the resource code--- such as in an external file being captured, or in
+ * another WIM file from which an image is being exported) and actually writing
+ * the compressed data to the output file.  Additional threads are "compressor
+ * threads" and all execute the compressor_thread_proc, where they repeatedly
+ * retrieve buffers of data from the main thread, compress them, and hand them
+ * back to the main thread.
   *
   * Certain streams, such as streams that do not need to be compressed (e.g.
   * input compression type same as output compression type) or streams of very
@@ -1468,6 +1528,7 @@ write_stream_list_parallel(struct list_head *stream_list,
                            struct wim_lookup_table *lookup_table,
                            struct filedes *out_fd,
                            int out_ctype,
+                          u32 out_chunk_size,
                            struct wimlib_lzx_context **comp_ctx,
                            int write_resource_flags,
                            struct write_streams_progress_data *progress_data,
@@ -1559,6 +1620,7 @@ write_stream_list_parallel(struct list_head *stream_list,
         ctx.lookup_table          = lookup_table;
         ctx.out_fd                = out_fd;
         ctx.out_ctype             = out_ctype;
+       ctx.out_chunk_size        = out_chunk_size;
         ctx.comp_ctx              = comp_ctx;
         ctx.res_to_compress_queue = &res_to_compress_queue;
         ctx.compressed_res_queue  = &compressed_res_queue;
@@ -1612,6 +1674,7 @@ out_serial_quiet:
                                         lookup_table,
                                         out_fd,
                                         out_ctype,
+                                       out_chunk_size,
                                         comp_ctx,
                                         write_resource_flags,
                                         progress_data);
@@ -1627,6 +1690,7 @@ static int
  write_stream_list(struct list_head *stream_list,
                   struct wim_lookup_table *lookup_table,
                   struct filedes *out_fd, int out_ctype,
+                 u32 out_chunk_size,
                   struct wimlib_lzx_context **comp_ctx,
                   int write_flags,
                   unsigned num_threads, wimlib_progress_func_t progress_func)
@@ -1662,12 +1726,9 @@ write_stream_list(struct list_head *stream_list,
         list_for_each_entry(lte, stream_list, write_streams_list) {
                 num_streams++;
                 total_bytes += wim_resource_size(lte);
-               if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
-                      && (wim_resource_compression_type(lte) != out_ctype ||
-                          (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS)))
-               {
+               if (must_compress_stream(lte, write_resource_flags,
+                                        out_ctype, out_chunk_size))
                         total_compression_bytes += wim_resource_size(lte);
-               }
                 if (lte->resource_location == RESOURCE_IN_WIM) {
                         if (prev_wim_part != lte->wim) {
                                 prev_wim_part = lte->wim;
@@ -1697,6 +1758,7 @@ write_stream_list(struct list_head *stream_list,
                                                  lookup_table,
                                                  out_fd,
                                                  out_ctype,
+                                                out_chunk_size,
                                                  comp_ctx,
                                                  write_resource_flags,
                                                  &progress_data,
@@ -1707,6 +1769,7 @@ write_stream_list(struct list_head *stream_list,
                                                lookup_table,
                                                out_fd,
                                                out_ctype,
+                                              out_chunk_size,
                                                comp_ctx,
                                                write_resource_flags,
                                                &progress_data);
@@ -2030,6 +2093,7 @@ write_wim_streams(WIMStruct *wim, int image, int write_flags,
                                  wim->lookup_table,
                                  &wim->out_fd,
                                  wim->out_compression_type,
+                                wim->out_chunk_size,
                                  &wim->lzx_context,
                                  write_flags,
                                  num_threads,
@@ -2090,6 +2154,7 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags,
                         ret = write_wim_resource(imd->metadata_lte,
                                                  &wim->out_fd,
                                                  wim->out_compression_type,
+                                                wim->out_chunk_size,
                                                  &imd->metadata_lte->output_resource_entry,
                                                  write_resource_flags,
                                                  &wim->lzx_context);
@@ -2447,22 +2512,6 @@ write_pipable_wim(WIMStruct *wim, int image, int write_flags,
          * finish_write().  */
  }
  
-/* API function documented in wimlib.h  */
-WIMLIBAPI int
-wimlib_set_output_compression_type(WIMStruct *wim, int ctype)
-{
-       switch (ctype) {
-       case WIMLIB_COMPRESSION_TYPE_INVALID:
-               break;
-       case WIMLIB_COMPRESSION_TYPE_NONE:
-       case WIMLIB_COMPRESSION_TYPE_LZX:
-       case WIMLIB_COMPRESSION_TYPE_XPRESS:
-               wim->out_compression_type = ctype;
-               return 0;
-       }
-       return WIMLIB_ERR_INVALID_PARAM;
-}
-
  /* Write a standalone WIM or split WIM (SWM) part to a new file or to a file
   * descriptor.  */
  int
@@ -2592,6 +2641,9 @@ write_wim_part(WIMStruct *wim,
         if (wim->compression_type != wim->out_compression_type)
                 wim->hdr.flags = get_wim_hdr_cflags(wim->out_compression_type);
  
+       /* Set chunk size if different.  */
+       wim->hdr.chunk_size = wim->out_chunk_size;
+
         /* Use GUID if specified; otherwise generate a new one.  */
         if (guid)
                 memcpy(wim->hdr.guid, guid, WIMLIB_GUID_LEN);
@@ -2944,6 +2996,7 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags,
                                 wim->lookup_table,
                                 &wim->out_fd,
                                 wim->compression_type,
+                               wim->chunk_size,
                                 &wim->lzx_context,
                                 write_flags,
                                 num_threads,
@@ -3066,7 +3119,8 @@ wimlib_overwrite(WIMStruct *wim, int write_flags,
             && !(write_flags & (WIMLIB_WRITE_FLAG_REBUILD |
                                 WIMLIB_WRITE_FLAG_PIPABLE))
             && !(wim_is_pipable(wim))
-           && wim->compression_type == wim->out_compression_type)
+           && wim->compression_type == wim->out_compression_type
+           && wim->chunk_size == wim->out_chunk_size)
         {
                 ret = overwrite_wim_inplace(wim, write_flags, num_threads,
                                             progress_func);
diff --git a/src/xml.c b/src/xml.c

index deeedb50efba04f5088e52615f66380124846c8f..cd335e07d65fa4a72a09223699484b382c7aa8ce 100644 (file)
--- a/src/xml.c
+++ b/src/xml.c
@@ -1522,6 +1522,7 @@ write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
                                              WIM_RESHDR_FLAG_METADATA,
                                              &wim->out_fd,
                                              WIMLIB_COMPRESSION_TYPE_NONE,
+                                            0,
                                              out_res_entry,
                                              NULL,
                                              write_resource_flags,
diff --git a/src/xpress-compress.c b/src/xpress-compress.c

index 5a314b69356a949c620f4c47e05e01d5f3d64b76..518a0e3ba70b23fdd24a85df93a95c5da33f4f52 100644 (file)
--- a/src/xpress-compress.c
+++ b/src/xpress-compress.c
@@ -36,6 +36,10 @@
  #include "wimlib/util.h"
  #include "wimlib/xpress.h"
  
+#ifdef HAVE_ALLOCA_H
+#  include <alloca.h>
+#endif
+
  #include <string.h>
  
  /* Intermediate XPRESS match/literal representation.  */
@@ -132,6 +136,7 @@ xpress_record_match(unsigned len, unsigned offset, void *_ctx)
  static const struct lz_params xpress_lz_params = {
         .min_match      = XPRESS_MIN_MATCH_LEN,
         .max_match      = XPRESS_MAX_MATCH_LEN,
+       .max_offset     = XPRESS_MAX_OFFSET,
         .good_match     = 16,
         .nice_match     = 32,
         .max_chain_len  = 16,
@@ -149,13 +154,17 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
         struct output_bitstream ostream;
  
         struct xpress_record_ctx record_ctx;
-       struct xpress_match matches[uncompressed_len];
-       u8 udata[uncompressed_len + 8];
+
+       struct xpress_match *matches;
+       input_idx_t *prev_tab;
+       u8 *udata;
+
         u16 codewords[XPRESS_NUM_SYMBOLS];
         u8 lens[XPRESS_NUM_SYMBOLS];
         input_idx_t num_matches;
         input_idx_t compressed_len;
         input_idx_t i;
+       const size_t stack_max = 65536;
  
         /* XPRESS requires 256 bytes of overhead for the Huffman code, so it's
          * impossible to compress 256 bytes or less of data to less than the
@@ -169,6 +178,21 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
         if (uncompressed_len < XPRESS_NUM_SYMBOLS / 2 + 1 + 4)
                 return 0;
  
+       if (uncompressed_len <= stack_max) {
+               matches = alloca(uncompressed_len * sizeof(matches[0]));
+               udata = alloca(uncompressed_len + 8);
+               prev_tab = alloca(uncompressed_len * sizeof(prev_tab[0]));
+       } else {
+               matches = MALLOC(uncompressed_len * sizeof(matches[0]));
+               udata = MALLOC(uncompressed_len + 8);
+               prev_tab = MALLOC(uncompressed_len * sizeof(prev_tab[0]));
+               if (matches == NULL || udata == NULL || prev_tab == NULL) {
+                       WARNING("Failed to allocate memory for compression...");
+                       compressed_len = 0;
+                       goto out_free;
+               }
+       }
+
         /* Copy the data to a temporary buffer, but only to avoid
          * inconsequential accesses of uninitialized memory in
          * lz_analyze_block().  */
@@ -183,7 +207,8 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
                          xpress_record_match,
                          xpress_record_literal,
                          &record_ctx,
-                        &xpress_lz_params);
+                        &xpress_lz_params,
+                        prev_tab);
  
         num_matches = (record_ctx.matches - matches);
  
@@ -206,8 +231,10 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
  
         /* Flush any pending data and get the length of the compressed data.  */
         compressed_len = flush_output_bitstream(&ostream);
-       if (compressed_len == ~(input_idx_t)0)
-               return 0;
+       if (compressed_len == ~(input_idx_t)0) {
+               compressed_len = 0;
+               goto out_free;
+       }
         compressed_len += XPRESS_NUM_SYMBOLS / 2;
  
  #if defined(ENABLE_XPRESS_DEBUG) || defined(ENABLE_VERIFY_COMPRESSION) || 1
@@ -218,15 +245,24 @@ wimlib_xpress_compress(const void * restrict uncompressed_data,
                 ERROR("Failed to decompress data we "
                       "compressed using XPRESS algorithm");
                 wimlib_assert(0);
-               return 0;
+               compressed_len = 0;
+               goto out_free;
         }
  
         if (memcmp(uncompressed_data, udata, uncompressed_len)) {
                 ERROR("Data we compressed using XPRESS algorithm "
                       "didn't decompress to original");
                 wimlib_assert(0);
-               return 0;
+               compressed_len = 0;
+               goto out_free;
         }
  #endif
+
+out_free:
+       if (uncompressed_len > stack_max) {
+               FREE(matches);
+               FREE(udata);
+               FREE(prev_tab);
+       }
         return compressed_len;
  }
author	Eric Biggers <ebiggers3@gmail.com>
	Mon, 9 Dec 2013 06:26:22 +0000 (00:26 -0600)
committer	Eric Biggers <ebiggers3@gmail.com>
	Mon, 9 Dec 2013 06:26:22 +0000 (00:26 -0600)
include/wimlib.h		patch \| blob \| history
include/wimlib/compress.h		patch \| blob \| history
include/wimlib/header.h		patch \| blob \| history
include/wimlib/lookup_table.h		patch \| blob \| history
include/wimlib/ntfs_3g.h		patch \| blob \| history
include/wimlib/resource.h		patch \| blob \| history
include/wimlib/util.h		patch \| blob \| history
include/wimlib/wim.h		patch \| blob \| history
include/wimlib/win32.h		patch \| blob \| history
programs/imagex.c		patch \| blob \| history
src/extract.c		patch \| blob \| history
src/header.c		patch \| blob \| history
src/integrity.c		patch \| blob \| history
src/lookup_table.c		patch \| blob \| history
src/lz77.c		patch \| blob \| history
src/lzx-compress.c		patch \| blob \| history
src/metadata_resource.c		patch \| blob \| history
src/ntfs-3g_capture.c		patch \| blob \| history
src/resource.c		patch \| blob \| history
src/util.c		patch \| blob \| history
src/wim.c		patch \| blob \| history
src/win32_capture.c		patch \| blob \| history
src/write.c		patch \| blob \| history
src/xml.c		patch \| blob \| history
src/xpress-compress.c		patch \| blob \| history