4 * Compress chunks of data (parallel version).
8 * Copyright (C) 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
29 #ifdef ENABLE_MULTITHREADED_COMPRESSION
31 #include "wimlib/assert.h"
32 #include "wimlib/chunk_compressor.h"
33 #include "wimlib/error.h"
34 #include "wimlib/list.h"
35 #include "wimlib/util.h"
37 # include "wimlib/win32.h" /* win32_get_number_of_processors() */
46 #ifdef HAVE_SYS_SYSCTL_H
47 # include <sys/sysctl.h>
50 struct message_queue {
51 struct list_head list;
53 pthread_cond_t msg_avail_cond;
54 pthread_cond_t space_avail_cond;
58 struct compressor_thread_data {
60 struct message_queue *chunks_to_compress_queue;
61 struct message_queue *compressed_chunks_queue;
62 struct wimlib_compressor *compressor;
65 #define MAX_CHUNKS_PER_MSG 2
68 u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
69 u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
70 u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
71 u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
72 size_t num_filled_chunks;
73 size_t num_alloc_chunks;
74 struct list_head list;
76 struct list_head submission_list;
79 struct parallel_chunk_compressor {
80 struct chunk_compressor base;
82 struct message_queue chunks_to_compress_queue;
83 struct message_queue compressed_chunks_queue;
84 struct compressor_thread_data *thread_data;
85 unsigned num_thread_data;
86 unsigned num_started_threads;
91 struct list_head available_msgs;
92 struct list_head submitted_msgs;
93 struct message *next_submit_msg;
94 struct message *next_ready_msg;
95 size_t next_chunk_idx;
99 get_default_num_threads(void)
103 n = win32_get_number_of_processors();
105 n = sysconf(_SC_NPROCESSORS_ONLN);
107 if (n < 1 || n >= UINT_MAX) {
108 WARNING("Failed to determine number of processors; assuming 1.");
115 get_avail_memory(void)
118 u64 phys_bytes = win32_get_avail_memory();
122 #elif defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
123 long page_size = sysconf(_SC_PAGESIZE);
124 long num_pages = sysconf(_SC_PHYS_PAGES);
125 if (page_size <= 0 || num_pages <= 0)
127 return ((u64)page_size * (u64)num_pages);
129 int mib[2] = {CTL_HW, HW_MEMSIZE};
131 size_t len = sizeof(memsize);
132 if (sysctl(mib, ARRAY_LEN(mib), &memsize, &len, NULL, 0) < 0 || len != 8)
138 WARNING("Failed to determine available memory; assuming 1 GiB");
143 message_queue_init(struct message_queue *q)
145 if (pthread_mutex_init(&q->lock, NULL)) {
146 ERROR_WITH_ERRNO("Failed to initialize mutex");
149 if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
150 ERROR_WITH_ERRNO("Failed to initialize condition variable");
151 goto err_destroy_lock;
153 if (pthread_cond_init(&q->space_avail_cond, NULL)) {
154 ERROR_WITH_ERRNO("Failed to initialize condition variable");
155 goto err_destroy_msg_avail_cond;
157 INIT_LIST_HEAD(&q->list);
160 err_destroy_msg_avail_cond:
161 pthread_cond_destroy(&q->msg_avail_cond);
163 pthread_mutex_destroy(&q->lock);
165 return WIMLIB_ERR_NOMEM;
169 message_queue_destroy(struct message_queue *q)
171 if (q->list.next != NULL) {
172 pthread_mutex_destroy(&q->lock);
173 pthread_cond_destroy(&q->msg_avail_cond);
174 pthread_cond_destroy(&q->space_avail_cond);
179 message_queue_put(struct message_queue *q, struct message *msg)
181 pthread_mutex_lock(&q->lock);
182 list_add_tail(&msg->list, &q->list);
183 pthread_cond_signal(&q->msg_avail_cond);
184 pthread_mutex_unlock(&q->lock);
187 static struct message *
188 message_queue_get(struct message_queue *q)
192 pthread_mutex_lock(&q->lock);
193 while (list_empty(&q->list) && !q->terminating)
194 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
195 if (!q->terminating) {
196 msg = list_entry(q->list.next, struct message, list);
197 list_del(&msg->list);
200 pthread_mutex_unlock(&q->lock);
205 message_queue_terminate(struct message_queue *q)
207 pthread_mutex_lock(&q->lock);
208 q->terminating = true;
209 pthread_cond_broadcast(&q->msg_avail_cond);
210 pthread_mutex_unlock(&q->lock);
214 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
216 msg->num_alloc_chunks = num_chunks;
217 for (size_t i = 0; i < num_chunks; i++) {
218 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
219 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
220 if (msg->compressed_chunks[i] == NULL ||
221 msg->uncompressed_chunks[i] == NULL)
222 return WIMLIB_ERR_NOMEM;
228 destroy_message(struct message *msg)
230 for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
231 FREE(msg->compressed_chunks[i]);
232 FREE(msg->uncompressed_chunks[i]);
237 free_messages(struct message *msgs, size_t num_messages)
240 for (size_t i = 0; i < num_messages; i++)
241 destroy_message(&msgs[i]);
246 static struct message *
247 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
249 struct message *msgs;
251 msgs = CALLOC(count, sizeof(struct message));
254 for (size_t i = 0; i < count; i++) {
255 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
256 free_messages(msgs, count);
264 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
267 for (size_t i = 0; i < msg->num_filled_chunks; i++) {
268 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
269 msg->compressed_chunk_sizes[i] =
270 wimlib_compress(msg->uncompressed_chunks[i],
271 msg->uncompressed_chunk_sizes[i],
272 msg->compressed_chunks[i],
273 msg->uncompressed_chunk_sizes[i] - 1,
279 compressor_thread_proc(void *arg)
281 struct compressor_thread_data *params = arg;
284 while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
285 compress_chunks(msg, params->compressor);
286 message_queue_put(params->compressed_chunks_queue, msg);
292 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
294 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
300 if (ctx->num_started_threads != 0) {
301 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
302 message_queue_terminate(&ctx->chunks_to_compress_queue);
304 for (i = 0; i < ctx->num_started_threads; i++)
305 pthread_join(ctx->thread_data[i].thread, NULL);
308 message_queue_destroy(&ctx->chunks_to_compress_queue);
309 message_queue_destroy(&ctx->compressed_chunks_queue);
311 if (ctx->thread_data != NULL)
312 for (i = 0; i < ctx->num_thread_data; i++)
313 wimlib_free_compressor(ctx->thread_data[i].compressor);
315 FREE(ctx->thread_data);
317 free_messages(ctx->msgs, ctx->num_messages);
323 submit_compression_msg(struct parallel_chunk_compressor *ctx)
325 struct message *msg = ctx->next_submit_msg;
327 msg->complete = false;
328 list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
329 message_queue_put(&ctx->chunks_to_compress_queue, msg);
330 ctx->next_submit_msg = NULL;
334 parallel_chunk_compressor_submit_chunk(struct chunk_compressor *_ctx,
335 const void *chunk, u32 size)
337 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
340 wimlib_assert(size > 0);
341 wimlib_assert(size <= ctx->base.out_chunk_size);
343 if (ctx->next_submit_msg) {
344 msg = ctx->next_submit_msg;
346 if (list_empty(&ctx->available_msgs))
349 msg = list_entry(ctx->available_msgs.next, struct message, list);
350 list_del(&msg->list);
351 ctx->next_submit_msg = msg;
352 msg->num_filled_chunks = 0;
355 memcpy(msg->uncompressed_chunks[msg->num_filled_chunks], chunk, size);
356 msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = size;
357 if (++msg->num_filled_chunks == msg->num_alloc_chunks)
358 submit_compression_msg(ctx);
363 parallel_chunk_compressor_get_chunk(struct chunk_compressor *_ctx,
364 const void **cdata_ret, u32 *csize_ret,
367 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
371 if (ctx->next_submit_msg)
372 submit_compression_msg(ctx);
374 if (ctx->next_ready_msg) {
375 msg = ctx->next_ready_msg;
377 if (list_empty(&ctx->submitted_msgs))
380 while (!(msg = list_entry(ctx->submitted_msgs.next,
382 submission_list))->complete)
383 message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
385 ctx->next_ready_msg = msg;
386 ctx->next_chunk_idx = 0;
389 if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
390 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
391 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
393 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
394 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
396 *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
398 if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
399 list_del(&msg->submission_list);
400 list_add_tail(&msg->list, &ctx->available_msgs);
401 ctx->next_ready_msg = NULL;
407 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
408 unsigned num_threads, u64 max_memory,
409 struct chunk_compressor **compressor_ret)
411 u64 approx_mem_required;
412 size_t chunks_per_msg;
413 size_t msgs_per_thread;
414 struct parallel_chunk_compressor *ctx;
417 unsigned desired_num_threads;
419 wimlib_assert(out_chunk_size > 0);
421 if (num_threads == 0)
422 num_threads = get_default_num_threads();
424 if (num_threads == 1) {
425 DEBUG("Only 1 thread; Not bothering with "
426 "parallel chunk compressor.");
431 max_memory = get_avail_memory();
433 desired_num_threads = num_threads;
435 if (out_chunk_size < ((u32)1 << 23)) {
436 chunks_per_msg = MAX_CHUNKS_PER_MSG;
439 /* Big chunks: Just have one buffer per thread --- more would
440 * just waste memory. */
445 approx_mem_required =
446 (u64)chunks_per_msg *
447 (u64)msgs_per_thread *
452 + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
455 if (approx_mem_required <= max_memory)
458 if (chunks_per_msg > 1)
460 else if (msgs_per_thread > 1)
462 else if (num_threads > 1)
468 if (num_threads < desired_num_threads) {
469 WARNING("Wanted to use %u threads, but limiting to %u "
470 "to fit in available memory!",
471 desired_num_threads, num_threads);
474 if (num_threads == 1) {
475 DEBUG("Only 1 thread; Not bothering with "
476 "parallel chunk compressor.");
480 ret = WIMLIB_ERR_NOMEM;
481 ctx = CALLOC(1, sizeof(*ctx));
485 ctx->base.out_ctype = out_ctype;
486 ctx->base.out_chunk_size = out_chunk_size;
487 ctx->base.destroy = parallel_chunk_compressor_destroy;
488 ctx->base.submit_chunk = parallel_chunk_compressor_submit_chunk;
489 ctx->base.get_chunk = parallel_chunk_compressor_get_chunk;
491 ctx->num_thread_data = num_threads;
493 ret = message_queue_init(&ctx->chunks_to_compress_queue);
497 ret = message_queue_init(&ctx->compressed_chunks_queue);
501 ret = WIMLIB_ERR_NOMEM;
502 ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
503 if (ctx->thread_data == NULL)
506 for (i = 0; i < num_threads; i++) {
507 struct compressor_thread_data *dat;
509 dat = &ctx->thread_data[i];
511 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
512 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
513 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
514 NULL, &dat->compressor);
519 for (ctx->num_started_threads = 0;
520 ctx->num_started_threads < num_threads;
521 ctx->num_started_threads++)
523 DEBUG("pthread_create thread %u of %u",
524 ctx->num_started_threads + 1, num_threads);
525 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
527 compressor_thread_proc,
528 &ctx->thread_data[ctx->num_started_threads]);
531 WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
532 ctx->num_started_threads + 1,
534 ret = WIMLIB_ERR_NOMEM;
535 if (ctx->num_started_threads >= 2)
541 ctx->base.num_threads = ctx->num_started_threads;
543 ret = WIMLIB_ERR_NOMEM;
544 ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
545 ctx->msgs = allocate_messages(ctx->num_messages,
546 chunks_per_msg, out_chunk_size);
547 if (ctx->msgs == NULL)
550 INIT_LIST_HEAD(&ctx->available_msgs);
551 for (size_t i = 0; i < ctx->num_messages; i++)
552 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
554 INIT_LIST_HEAD(&ctx->submitted_msgs);
556 *compressor_ret = &ctx->base;
560 parallel_chunk_compressor_destroy(&ctx->base);
564 #endif /* ENABLE_MULTITHREADED_COMPRESSION */