4 * Compress chunks of data (parallel version).
8 * Copyright (C) 2013-2023 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see https://www.gnu.org/licenses/.
32 #include "wimlib/assert.h"
33 #include "wimlib/chunk_compressor.h"
34 #include "wimlib/error.h"
35 #include "wimlib/list.h"
36 #include "wimlib/threads.h"
37 #include "wimlib/util.h"
39 struct message_queue {
40 struct list_head list;
42 struct condvar msg_avail_cond;
43 struct condvar space_avail_cond;
47 struct compressor_thread_data {
49 struct message_queue *chunks_to_compress_queue;
50 struct message_queue *compressed_chunks_queue;
51 struct wimlib_compressor *compressor;
54 #define MAX_CHUNKS_PER_MSG 16
57 u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
58 u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
59 u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
60 u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
61 size_t num_filled_chunks;
62 size_t num_alloc_chunks;
63 struct list_head list;
65 struct list_head submission_list;
68 struct parallel_chunk_compressor {
69 struct chunk_compressor base;
71 struct message_queue chunks_to_compress_queue;
72 struct message_queue compressed_chunks_queue;
73 struct compressor_thread_data *thread_data;
74 unsigned num_thread_data;
75 unsigned num_started_threads;
80 struct list_head available_msgs;
81 struct list_head submitted_msgs;
82 struct message *next_submit_msg;
83 struct message *next_ready_msg;
84 size_t next_chunk_idx;
90 message_queue_init(struct message_queue *q)
92 if (!mutex_init(&q->lock))
94 if (!condvar_init(&q->msg_avail_cond))
95 goto err_destroy_lock;
96 if (!condvar_init(&q->space_avail_cond))
97 goto err_destroy_msg_avail_cond;
98 INIT_LIST_HEAD(&q->list);
101 err_destroy_msg_avail_cond:
102 condvar_destroy(&q->msg_avail_cond);
104 mutex_destroy(&q->lock);
106 return WIMLIB_ERR_NOMEM;
110 message_queue_destroy(struct message_queue *q)
112 if (q->list.next != NULL) {
113 mutex_destroy(&q->lock);
114 condvar_destroy(&q->msg_avail_cond);
115 condvar_destroy(&q->space_avail_cond);
120 message_queue_put(struct message_queue *q, struct message *msg)
122 mutex_lock(&q->lock);
123 list_add_tail(&msg->list, &q->list);
124 condvar_signal(&q->msg_avail_cond);
125 mutex_unlock(&q->lock);
128 static struct message *
129 message_queue_get(struct message_queue *q)
133 mutex_lock(&q->lock);
134 while (list_empty(&q->list) && !q->terminating)
135 condvar_wait(&q->msg_avail_cond, &q->lock);
136 if (!q->terminating) {
137 msg = list_entry(q->list.next, struct message, list);
138 list_del(&msg->list);
141 mutex_unlock(&q->lock);
146 message_queue_terminate(struct message_queue *q)
148 mutex_lock(&q->lock);
149 q->terminating = true;
150 condvar_broadcast(&q->msg_avail_cond);
151 mutex_unlock(&q->lock);
155 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
157 msg->num_alloc_chunks = num_chunks;
158 for (size_t i = 0; i < num_chunks; i++) {
159 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
160 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
161 if (msg->compressed_chunks[i] == NULL ||
162 msg->uncompressed_chunks[i] == NULL)
163 return WIMLIB_ERR_NOMEM;
169 destroy_message(struct message *msg)
171 for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
172 FREE(msg->compressed_chunks[i]);
173 FREE(msg->uncompressed_chunks[i]);
178 free_messages(struct message *msgs, size_t num_messages)
181 for (size_t i = 0; i < num_messages; i++)
182 destroy_message(&msgs[i]);
187 static struct message *
188 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
190 struct message *msgs;
192 msgs = CALLOC(count, sizeof(struct message));
195 for (size_t i = 0; i < count; i++) {
196 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
197 free_messages(msgs, count);
205 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
208 for (size_t i = 0; i < msg->num_filled_chunks; i++) {
209 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
210 msg->compressed_chunk_sizes[i] =
211 wimlib_compress(msg->uncompressed_chunks[i],
212 msg->uncompressed_chunk_sizes[i],
213 msg->compressed_chunks[i],
214 msg->uncompressed_chunk_sizes[i] - 1,
220 compressor_thread_proc(void *arg)
222 struct compressor_thread_data *params = arg;
225 while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
226 compress_chunks(msg, params->compressor);
227 message_queue_put(params->compressed_chunks_queue, msg);
233 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
235 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
241 if (ctx->num_started_threads != 0) {
242 message_queue_terminate(&ctx->chunks_to_compress_queue);
244 for (i = 0; i < ctx->num_started_threads; i++)
245 thread_join(&ctx->thread_data[i].thread);
248 message_queue_destroy(&ctx->chunks_to_compress_queue);
249 message_queue_destroy(&ctx->compressed_chunks_queue);
251 if (ctx->thread_data != NULL)
252 for (i = 0; i < ctx->num_thread_data; i++)
253 wimlib_free_compressor(ctx->thread_data[i].compressor);
255 FREE(ctx->thread_data);
257 free_messages(ctx->msgs, ctx->num_messages);
263 submit_compression_msg(struct parallel_chunk_compressor *ctx)
265 struct message *msg = ctx->next_submit_msg;
267 msg->complete = false;
268 list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
269 message_queue_put(&ctx->chunks_to_compress_queue, msg);
270 ctx->next_submit_msg = NULL;
274 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
276 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
279 if (ctx->next_submit_msg) {
280 msg = ctx->next_submit_msg;
282 if (list_empty(&ctx->available_msgs))
285 msg = list_entry(ctx->available_msgs.next, struct message, list);
286 list_del(&msg->list);
287 ctx->next_submit_msg = msg;
288 msg->num_filled_chunks = 0;
291 return msg->uncompressed_chunks[msg->num_filled_chunks];
295 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
297 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
300 wimlib_assert(usize > 0);
301 wimlib_assert(usize <= ctx->base.out_chunk_size);
302 wimlib_assert(ctx->next_submit_msg);
304 msg = ctx->next_submit_msg;
305 msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
306 if (++msg->num_filled_chunks == msg->num_alloc_chunks)
307 submit_compression_msg(ctx);
311 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
312 const void **cdata_ret, u32 *csize_ret,
315 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
318 if (ctx->next_submit_msg)
319 submit_compression_msg(ctx);
321 if (ctx->next_ready_msg) {
322 msg = ctx->next_ready_msg;
324 if (list_empty(&ctx->submitted_msgs))
327 while (!(msg = list_entry(ctx->submitted_msgs.next,
329 submission_list))->complete)
330 message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
332 ctx->next_ready_msg = msg;
333 ctx->next_chunk_idx = 0;
336 if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
337 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
338 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
340 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
341 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
343 *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
345 if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
346 list_del(&msg->submission_list);
347 list_add_tail(&msg->list, &ctx->available_msgs);
348 ctx->next_ready_msg = NULL;
354 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
355 unsigned num_threads, u64 max_memory,
356 struct chunk_compressor **compressor_ret)
358 u64 approx_mem_required;
359 size_t chunks_per_msg;
360 size_t msgs_per_thread;
361 struct parallel_chunk_compressor *ctx;
364 unsigned desired_num_threads;
366 wimlib_assert(out_chunk_size > 0);
368 if (num_threads == 0)
369 num_threads = get_available_cpus();
371 if (num_threads == 1)
375 max_memory = get_available_memory();
377 desired_num_threads = num_threads;
379 if (out_chunk_size < ((u32)1 << 23)) {
380 /* Relatively small chunks. Use 2 messages per thread, each
381 * with at least 2 chunks. Use more chunks per message if there
382 * are lots of threads and/or the chunks are very small. */
384 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
385 chunks_per_msg = max(chunks_per_msg, 2);
386 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
389 /* Big chunks: Just have one buffer per thread --- more would
390 * just waste memory. */
395 approx_mem_required =
396 (u64)chunks_per_msg *
397 (u64)msgs_per_thread *
402 + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
405 if (approx_mem_required <= max_memory)
408 if (chunks_per_msg > 1)
410 else if (msgs_per_thread > 1)
412 else if (num_threads > 1)
418 if (num_threads < desired_num_threads) {
419 WARNING("Wanted to use %u threads, but limiting to %u "
420 "to fit in available memory!",
421 desired_num_threads, num_threads);
424 if (num_threads == 1)
427 ret = WIMLIB_ERR_NOMEM;
428 ctx = CALLOC(1, sizeof(*ctx));
432 ctx->base.out_ctype = out_ctype;
433 ctx->base.out_chunk_size = out_chunk_size;
434 ctx->base.destroy = parallel_chunk_compressor_destroy;
435 ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
436 ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
437 ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
439 ctx->num_thread_data = num_threads;
441 ret = message_queue_init(&ctx->chunks_to_compress_queue);
445 ret = message_queue_init(&ctx->compressed_chunks_queue);
449 ret = WIMLIB_ERR_NOMEM;
450 ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
451 if (ctx->thread_data == NULL)
454 for (i = 0; i < num_threads; i++) {
455 struct compressor_thread_data *dat;
457 dat = &ctx->thread_data[i];
459 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
460 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
461 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
462 WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
468 for (ctx->num_started_threads = 0;
469 ctx->num_started_threads < num_threads;
470 ctx->num_started_threads++)
472 if (!thread_create(&ctx->thread_data[ctx->num_started_threads].thread,
473 compressor_thread_proc,
474 &ctx->thread_data[ctx->num_started_threads]))
476 ret = WIMLIB_ERR_NOMEM;
477 if (ctx->num_started_threads >= 2)
483 ctx->base.num_threads = ctx->num_started_threads;
485 ret = WIMLIB_ERR_NOMEM;
486 ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
487 ctx->msgs = allocate_messages(ctx->num_messages,
488 chunks_per_msg, out_chunk_size);
489 if (ctx->msgs == NULL)
492 INIT_LIST_HEAD(&ctx->available_msgs);
493 for (size_t i = 0; i < ctx->num_messages; i++)
494 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
496 INIT_LIST_HEAD(&ctx->submitted_msgs);
498 *compressor_ret = &ctx->base;
502 parallel_chunk_compressor_destroy(&ctx->base);