4 * Compress chunks of data (parallel version).
8 * Copyright (C) 2013 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see http://www.gnu.org/licenses/.
28 #ifdef ENABLE_MULTITHREADED_COMPRESSION
36 #ifdef HAVE_SYS_SYSCTL_H
37 # include <sys/sysctl.h>
40 #include "wimlib/assert.h"
41 #include "wimlib/chunk_compressor.h"
42 #include "wimlib/error.h"
43 #include "wimlib/list.h"
44 #include "wimlib/util.h"
45 #include "wimlib/win32.h" /* win32_get_number_of_processors() */
47 struct message_queue {
48 struct list_head list;
50 pthread_cond_t msg_avail_cond;
51 pthread_cond_t space_avail_cond;
55 struct compressor_thread_data {
57 struct message_queue *chunks_to_compress_queue;
58 struct message_queue *compressed_chunks_queue;
59 struct wimlib_compressor *compressor;
62 #define MAX_CHUNKS_PER_MSG 16
65 u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
66 u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
67 u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
68 u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
69 size_t num_filled_chunks;
70 size_t num_alloc_chunks;
71 struct list_head list;
73 struct list_head submission_list;
76 struct parallel_chunk_compressor {
77 struct chunk_compressor base;
79 struct message_queue chunks_to_compress_queue;
80 struct message_queue compressed_chunks_queue;
81 struct compressor_thread_data *thread_data;
82 unsigned num_thread_data;
83 unsigned num_started_threads;
88 struct list_head available_msgs;
89 struct list_head submitted_msgs;
90 struct message *next_submit_msg;
91 struct message *next_ready_msg;
92 size_t next_chunk_idx;
96 get_default_num_threads(void)
100 n = win32_get_number_of_processors();
102 n = sysconf(_SC_NPROCESSORS_ONLN);
104 if (n < 1 || n >= UINT_MAX) {
105 WARNING("Failed to determine number of processors; assuming 1.");
112 get_avail_memory(void)
115 u64 phys_bytes = win32_get_avail_memory();
119 #elif defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
120 long page_size = sysconf(_SC_PAGESIZE);
121 long num_pages = sysconf(_SC_PHYS_PAGES);
122 if (page_size <= 0 || num_pages <= 0)
124 return ((u64)page_size * (u64)num_pages);
126 int mib[2] = {CTL_HW, HW_MEMSIZE};
128 size_t len = sizeof(memsize);
129 if (sysctl(mib, ARRAY_LEN(mib), &memsize, &len, NULL, 0) < 0 || len != 8)
135 WARNING("Failed to determine available memory; assuming 1 GiB");
140 message_queue_init(struct message_queue *q)
142 if (pthread_mutex_init(&q->lock, NULL)) {
143 ERROR_WITH_ERRNO("Failed to initialize mutex");
146 if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
147 ERROR_WITH_ERRNO("Failed to initialize condition variable");
148 goto err_destroy_lock;
150 if (pthread_cond_init(&q->space_avail_cond, NULL)) {
151 ERROR_WITH_ERRNO("Failed to initialize condition variable");
152 goto err_destroy_msg_avail_cond;
154 INIT_LIST_HEAD(&q->list);
157 err_destroy_msg_avail_cond:
158 pthread_cond_destroy(&q->msg_avail_cond);
160 pthread_mutex_destroy(&q->lock);
162 return WIMLIB_ERR_NOMEM;
166 message_queue_destroy(struct message_queue *q)
168 if (q->list.next != NULL) {
169 pthread_mutex_destroy(&q->lock);
170 pthread_cond_destroy(&q->msg_avail_cond);
171 pthread_cond_destroy(&q->space_avail_cond);
176 message_queue_put(struct message_queue *q, struct message *msg)
178 pthread_mutex_lock(&q->lock);
179 list_add_tail(&msg->list, &q->list);
180 pthread_cond_signal(&q->msg_avail_cond);
181 pthread_mutex_unlock(&q->lock);
184 static struct message *
185 message_queue_get(struct message_queue *q)
189 pthread_mutex_lock(&q->lock);
190 while (list_empty(&q->list) && !q->terminating)
191 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
192 if (!q->terminating) {
193 msg = list_entry(q->list.next, struct message, list);
194 list_del(&msg->list);
197 pthread_mutex_unlock(&q->lock);
202 message_queue_terminate(struct message_queue *q)
204 pthread_mutex_lock(&q->lock);
205 q->terminating = true;
206 pthread_cond_broadcast(&q->msg_avail_cond);
207 pthread_mutex_unlock(&q->lock);
211 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
213 msg->num_alloc_chunks = num_chunks;
214 for (size_t i = 0; i < num_chunks; i++) {
215 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
216 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
217 if (msg->compressed_chunks[i] == NULL ||
218 msg->uncompressed_chunks[i] == NULL)
219 return WIMLIB_ERR_NOMEM;
225 destroy_message(struct message *msg)
227 for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
228 FREE(msg->compressed_chunks[i]);
229 FREE(msg->uncompressed_chunks[i]);
234 free_messages(struct message *msgs, size_t num_messages)
237 for (size_t i = 0; i < num_messages; i++)
238 destroy_message(&msgs[i]);
243 static struct message *
244 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
246 struct message *msgs;
248 msgs = CALLOC(count, sizeof(struct message));
251 for (size_t i = 0; i < count; i++) {
252 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
253 free_messages(msgs, count);
261 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
264 for (size_t i = 0; i < msg->num_filled_chunks; i++) {
265 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
266 msg->compressed_chunk_sizes[i] =
267 wimlib_compress(msg->uncompressed_chunks[i],
268 msg->uncompressed_chunk_sizes[i],
269 msg->compressed_chunks[i],
270 msg->uncompressed_chunk_sizes[i] - 1,
276 compressor_thread_proc(void *arg)
278 struct compressor_thread_data *params = arg;
281 while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
282 compress_chunks(msg, params->compressor);
283 message_queue_put(params->compressed_chunks_queue, msg);
289 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
291 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
297 if (ctx->num_started_threads != 0) {
298 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
299 message_queue_terminate(&ctx->chunks_to_compress_queue);
301 for (i = 0; i < ctx->num_started_threads; i++)
302 pthread_join(ctx->thread_data[i].thread, NULL);
305 message_queue_destroy(&ctx->chunks_to_compress_queue);
306 message_queue_destroy(&ctx->compressed_chunks_queue);
308 if (ctx->thread_data != NULL)
309 for (i = 0; i < ctx->num_thread_data; i++)
310 wimlib_free_compressor(ctx->thread_data[i].compressor);
312 FREE(ctx->thread_data);
314 free_messages(ctx->msgs, ctx->num_messages);
320 submit_compression_msg(struct parallel_chunk_compressor *ctx)
322 struct message *msg = ctx->next_submit_msg;
324 msg->complete = false;
325 list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
326 message_queue_put(&ctx->chunks_to_compress_queue, msg);
327 ctx->next_submit_msg = NULL;
331 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
333 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
336 if (ctx->next_submit_msg) {
337 msg = ctx->next_submit_msg;
339 if (list_empty(&ctx->available_msgs))
342 msg = list_entry(ctx->available_msgs.next, struct message, list);
343 list_del(&msg->list);
344 ctx->next_submit_msg = msg;
345 msg->num_filled_chunks = 0;
348 return msg->uncompressed_chunks[msg->num_filled_chunks];
352 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
354 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
357 wimlib_assert(usize > 0);
358 wimlib_assert(usize <= ctx->base.out_chunk_size);
359 wimlib_assert(ctx->next_submit_msg);
361 msg = ctx->next_submit_msg;
362 msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
363 if (++msg->num_filled_chunks == msg->num_alloc_chunks)
364 submit_compression_msg(ctx);
368 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
369 const void **cdata_ret, u32 *csize_ret,
372 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
375 if (ctx->next_submit_msg)
376 submit_compression_msg(ctx);
378 if (ctx->next_ready_msg) {
379 msg = ctx->next_ready_msg;
381 if (list_empty(&ctx->submitted_msgs))
384 while (!(msg = list_entry(ctx->submitted_msgs.next,
386 submission_list))->complete)
387 message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
389 ctx->next_ready_msg = msg;
390 ctx->next_chunk_idx = 0;
393 if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
394 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
395 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
397 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
398 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
400 *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
402 if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
403 list_del(&msg->submission_list);
404 list_add_tail(&msg->list, &ctx->available_msgs);
405 ctx->next_ready_msg = NULL;
411 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
412 unsigned num_threads, u64 max_memory,
413 struct chunk_compressor **compressor_ret)
415 u64 approx_mem_required;
416 size_t chunks_per_msg;
417 size_t msgs_per_thread;
418 struct parallel_chunk_compressor *ctx;
421 unsigned desired_num_threads;
423 wimlib_assert(out_chunk_size > 0);
425 if (num_threads == 0)
426 num_threads = get_default_num_threads();
428 if (num_threads == 1) {
429 DEBUG("Only 1 thread; Not bothering with "
430 "parallel chunk compressor.");
435 max_memory = get_avail_memory();
437 desired_num_threads = num_threads;
439 if (out_chunk_size < ((u32)1 << 23)) {
440 /* Relatively small chunks. Use 2 messages per thread, each
441 * with at least 2 chunks. Use more chunks per message if there
442 * are lots of threads and/or the chunks are very small. */
444 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
445 chunks_per_msg = max(chunks_per_msg, 2);
446 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
449 /* Big chunks: Just have one buffer per thread --- more would
450 * just waste memory. */
455 approx_mem_required =
456 (u64)chunks_per_msg *
457 (u64)msgs_per_thread *
462 + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
465 if (approx_mem_required <= max_memory)
468 if (chunks_per_msg > 1)
470 else if (msgs_per_thread > 1)
472 else if (num_threads > 1)
478 if (num_threads < desired_num_threads) {
479 WARNING("Wanted to use %u threads, but limiting to %u "
480 "to fit in available memory!",
481 desired_num_threads, num_threads);
484 if (num_threads == 1) {
485 DEBUG("Only 1 thread; Not bothering with "
486 "parallel chunk compressor.");
490 ret = WIMLIB_ERR_NOMEM;
491 ctx = CALLOC(1, sizeof(*ctx));
495 ctx->base.out_ctype = out_ctype;
496 ctx->base.out_chunk_size = out_chunk_size;
497 ctx->base.destroy = parallel_chunk_compressor_destroy;
498 ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
499 ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
500 ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
502 ctx->num_thread_data = num_threads;
504 ret = message_queue_init(&ctx->chunks_to_compress_queue);
508 ret = message_queue_init(&ctx->compressed_chunks_queue);
512 ret = WIMLIB_ERR_NOMEM;
513 ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
514 if (ctx->thread_data == NULL)
517 for (i = 0; i < num_threads; i++) {
518 struct compressor_thread_data *dat;
520 dat = &ctx->thread_data[i];
522 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
523 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
524 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
525 WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
531 for (ctx->num_started_threads = 0;
532 ctx->num_started_threads < num_threads;
533 ctx->num_started_threads++)
535 DEBUG("pthread_create thread %u of %u",
536 ctx->num_started_threads + 1, num_threads);
537 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
539 compressor_thread_proc,
540 &ctx->thread_data[ctx->num_started_threads]);
543 WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
544 ctx->num_started_threads + 1,
546 ret = WIMLIB_ERR_NOMEM;
547 if (ctx->num_started_threads >= 2)
553 ctx->base.num_threads = ctx->num_started_threads;
555 ret = WIMLIB_ERR_NOMEM;
556 ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
557 ctx->msgs = allocate_messages(ctx->num_messages,
558 chunks_per_msg, out_chunk_size);
559 if (ctx->msgs == NULL)
562 INIT_LIST_HEAD(&ctx->available_msgs);
563 for (size_t i = 0; i < ctx->num_messages; i++)
564 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
566 INIT_LIST_HEAD(&ctx->submitted_msgs);
568 *compressor_ret = &ctx->base;
572 parallel_chunk_compressor_destroy(&ctx->base);
576 #endif /* ENABLE_MULTITHREADED_COMPRESSION */