4 * Compress chunks of data (parallel version).
8 * Copyright (C) 2013 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see http://www.gnu.org/licenses/.
28 #ifdef ENABLE_MULTITHREADED_COMPRESSION
35 #include "wimlib/assert.h"
36 #include "wimlib/chunk_compressor.h"
37 #include "wimlib/error.h"
38 #include "wimlib/list.h"
39 #include "wimlib/util.h"
41 struct message_queue {
42 struct list_head list;
44 pthread_cond_t msg_avail_cond;
45 pthread_cond_t space_avail_cond;
49 struct compressor_thread_data {
51 struct message_queue *chunks_to_compress_queue;
52 struct message_queue *compressed_chunks_queue;
53 struct wimlib_compressor *compressor;
56 #define MAX_CHUNKS_PER_MSG 16
59 u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
60 u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
61 u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
62 u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
63 size_t num_filled_chunks;
64 size_t num_alloc_chunks;
65 struct list_head list;
67 struct list_head submission_list;
70 struct parallel_chunk_compressor {
71 struct chunk_compressor base;
73 struct message_queue chunks_to_compress_queue;
74 struct message_queue compressed_chunks_queue;
75 struct compressor_thread_data *thread_data;
76 unsigned num_thread_data;
77 unsigned num_started_threads;
82 struct list_head available_msgs;
83 struct list_head submitted_msgs;
84 struct message *next_submit_msg;
85 struct message *next_ready_msg;
86 size_t next_chunk_idx;
92 message_queue_init(struct message_queue *q)
94 if (pthread_mutex_init(&q->lock, NULL)) {
95 ERROR_WITH_ERRNO("Failed to initialize mutex");
98 if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
99 ERROR_WITH_ERRNO("Failed to initialize condition variable");
100 goto err_destroy_lock;
102 if (pthread_cond_init(&q->space_avail_cond, NULL)) {
103 ERROR_WITH_ERRNO("Failed to initialize condition variable");
104 goto err_destroy_msg_avail_cond;
106 INIT_LIST_HEAD(&q->list);
109 err_destroy_msg_avail_cond:
110 pthread_cond_destroy(&q->msg_avail_cond);
112 pthread_mutex_destroy(&q->lock);
114 return WIMLIB_ERR_NOMEM;
118 message_queue_destroy(struct message_queue *q)
120 if (q->list.next != NULL) {
121 pthread_mutex_destroy(&q->lock);
122 pthread_cond_destroy(&q->msg_avail_cond);
123 pthread_cond_destroy(&q->space_avail_cond);
128 message_queue_put(struct message_queue *q, struct message *msg)
130 pthread_mutex_lock(&q->lock);
131 list_add_tail(&msg->list, &q->list);
132 pthread_cond_signal(&q->msg_avail_cond);
133 pthread_mutex_unlock(&q->lock);
136 static struct message *
137 message_queue_get(struct message_queue *q)
141 pthread_mutex_lock(&q->lock);
142 while (list_empty(&q->list) && !q->terminating)
143 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
144 if (!q->terminating) {
145 msg = list_entry(q->list.next, struct message, list);
146 list_del(&msg->list);
149 pthread_mutex_unlock(&q->lock);
154 message_queue_terminate(struct message_queue *q)
156 pthread_mutex_lock(&q->lock);
157 q->terminating = true;
158 pthread_cond_broadcast(&q->msg_avail_cond);
159 pthread_mutex_unlock(&q->lock);
163 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
165 msg->num_alloc_chunks = num_chunks;
166 for (size_t i = 0; i < num_chunks; i++) {
167 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
168 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
169 if (msg->compressed_chunks[i] == NULL ||
170 msg->uncompressed_chunks[i] == NULL)
171 return WIMLIB_ERR_NOMEM;
177 destroy_message(struct message *msg)
179 for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
180 FREE(msg->compressed_chunks[i]);
181 FREE(msg->uncompressed_chunks[i]);
186 free_messages(struct message *msgs, size_t num_messages)
189 for (size_t i = 0; i < num_messages; i++)
190 destroy_message(&msgs[i]);
195 static struct message *
196 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
198 struct message *msgs;
200 msgs = CALLOC(count, sizeof(struct message));
203 for (size_t i = 0; i < count; i++) {
204 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
205 free_messages(msgs, count);
213 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
216 for (size_t i = 0; i < msg->num_filled_chunks; i++) {
217 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
218 msg->compressed_chunk_sizes[i] =
219 wimlib_compress(msg->uncompressed_chunks[i],
220 msg->uncompressed_chunk_sizes[i],
221 msg->compressed_chunks[i],
222 msg->uncompressed_chunk_sizes[i] - 1,
228 compressor_thread_proc(void *arg)
230 struct compressor_thread_data *params = arg;
233 while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
234 compress_chunks(msg, params->compressor);
235 message_queue_put(params->compressed_chunks_queue, msg);
241 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
243 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
249 if (ctx->num_started_threads != 0) {
250 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
251 message_queue_terminate(&ctx->chunks_to_compress_queue);
253 for (i = 0; i < ctx->num_started_threads; i++)
254 pthread_join(ctx->thread_data[i].thread, NULL);
257 message_queue_destroy(&ctx->chunks_to_compress_queue);
258 message_queue_destroy(&ctx->compressed_chunks_queue);
260 if (ctx->thread_data != NULL)
261 for (i = 0; i < ctx->num_thread_data; i++)
262 wimlib_free_compressor(ctx->thread_data[i].compressor);
264 FREE(ctx->thread_data);
266 free_messages(ctx->msgs, ctx->num_messages);
272 submit_compression_msg(struct parallel_chunk_compressor *ctx)
274 struct message *msg = ctx->next_submit_msg;
276 msg->complete = false;
277 list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
278 message_queue_put(&ctx->chunks_to_compress_queue, msg);
279 ctx->next_submit_msg = NULL;
283 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
285 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
288 if (ctx->next_submit_msg) {
289 msg = ctx->next_submit_msg;
291 if (list_empty(&ctx->available_msgs))
294 msg = list_entry(ctx->available_msgs.next, struct message, list);
295 list_del(&msg->list);
296 ctx->next_submit_msg = msg;
297 msg->num_filled_chunks = 0;
300 return msg->uncompressed_chunks[msg->num_filled_chunks];
304 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
306 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
309 wimlib_assert(usize > 0);
310 wimlib_assert(usize <= ctx->base.out_chunk_size);
311 wimlib_assert(ctx->next_submit_msg);
313 msg = ctx->next_submit_msg;
314 msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
315 if (++msg->num_filled_chunks == msg->num_alloc_chunks)
316 submit_compression_msg(ctx);
320 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
321 const void **cdata_ret, u32 *csize_ret,
324 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
327 if (ctx->next_submit_msg)
328 submit_compression_msg(ctx);
330 if (ctx->next_ready_msg) {
331 msg = ctx->next_ready_msg;
333 if (list_empty(&ctx->submitted_msgs))
336 while (!(msg = list_entry(ctx->submitted_msgs.next,
338 submission_list))->complete)
339 message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
341 ctx->next_ready_msg = msg;
342 ctx->next_chunk_idx = 0;
345 if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
346 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
347 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
349 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
350 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
352 *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
354 if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
355 list_del(&msg->submission_list);
356 list_add_tail(&msg->list, &ctx->available_msgs);
357 ctx->next_ready_msg = NULL;
363 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
364 unsigned num_threads, u64 max_memory,
365 struct chunk_compressor **compressor_ret)
367 u64 approx_mem_required;
368 size_t chunks_per_msg;
369 size_t msgs_per_thread;
370 struct parallel_chunk_compressor *ctx;
373 unsigned desired_num_threads;
375 wimlib_assert(out_chunk_size > 0);
377 if (num_threads == 0)
378 num_threads = get_available_cpus();
380 if (num_threads == 1) {
381 DEBUG("Only 1 thread; Not bothering with "
382 "parallel chunk compressor.");
387 max_memory = get_available_memory();
389 desired_num_threads = num_threads;
391 if (out_chunk_size < ((u32)1 << 23)) {
392 /* Relatively small chunks. Use 2 messages per thread, each
393 * with at least 2 chunks. Use more chunks per message if there
394 * are lots of threads and/or the chunks are very small. */
396 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
397 chunks_per_msg = max(chunks_per_msg, 2);
398 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
401 /* Big chunks: Just have one buffer per thread --- more would
402 * just waste memory. */
407 approx_mem_required =
408 (u64)chunks_per_msg *
409 (u64)msgs_per_thread *
414 + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
417 if (approx_mem_required <= max_memory)
420 if (chunks_per_msg > 1)
422 else if (msgs_per_thread > 1)
424 else if (num_threads > 1)
430 if (num_threads < desired_num_threads) {
431 WARNING("Wanted to use %u threads, but limiting to %u "
432 "to fit in available memory!",
433 desired_num_threads, num_threads);
436 if (num_threads == 1) {
437 DEBUG("Only 1 thread; Not bothering with "
438 "parallel chunk compressor.");
442 ret = WIMLIB_ERR_NOMEM;
443 ctx = CALLOC(1, sizeof(*ctx));
447 ctx->base.out_ctype = out_ctype;
448 ctx->base.out_chunk_size = out_chunk_size;
449 ctx->base.destroy = parallel_chunk_compressor_destroy;
450 ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
451 ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
452 ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
454 ctx->num_thread_data = num_threads;
456 ret = message_queue_init(&ctx->chunks_to_compress_queue);
460 ret = message_queue_init(&ctx->compressed_chunks_queue);
464 ret = WIMLIB_ERR_NOMEM;
465 ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
466 if (ctx->thread_data == NULL)
469 for (i = 0; i < num_threads; i++) {
470 struct compressor_thread_data *dat;
472 dat = &ctx->thread_data[i];
474 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
475 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
476 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
477 WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
483 for (ctx->num_started_threads = 0;
484 ctx->num_started_threads < num_threads;
485 ctx->num_started_threads++)
487 DEBUG("pthread_create thread %u of %u",
488 ctx->num_started_threads + 1, num_threads);
489 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
491 compressor_thread_proc,
492 &ctx->thread_data[ctx->num_started_threads]);
495 WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
496 ctx->num_started_threads + 1,
498 ret = WIMLIB_ERR_NOMEM;
499 if (ctx->num_started_threads >= 2)
505 ctx->base.num_threads = ctx->num_started_threads;
507 ret = WIMLIB_ERR_NOMEM;
508 ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
509 ctx->msgs = allocate_messages(ctx->num_messages,
510 chunks_per_msg, out_chunk_size);
511 if (ctx->msgs == NULL)
514 INIT_LIST_HEAD(&ctx->available_msgs);
515 for (size_t i = 0; i < ctx->num_messages; i++)
516 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
518 INIT_LIST_HEAD(&ctx->submitted_msgs);
520 *compressor_ret = &ctx->base;
524 parallel_chunk_compressor_destroy(&ctx->base);
528 #endif /* ENABLE_MULTITHREADED_COMPRESSION */