4 * Compress chunks of data (parallel version).
8 * Copyright (C) 2013 Eric Biggers
10 * This file is part of wimlib, a library for working with WIM files.
12 * wimlib is free software; you can redistribute it and/or modify it under the
13 * terms of the GNU General Public License as published by the Free Software
14 * Foundation; either version 3 of the License, or (at your option) any later
17 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
21 * You should have received a copy of the GNU General Public License along with
22 * wimlib; if not, see http://www.gnu.org/licenses/.
29 #include "wimlib/assert.h"
30 #include "wimlib/compress_chunks.h"
31 #include "wimlib/error.h"
32 #include "wimlib/list.h"
33 #include "wimlib/util.h"
35 # include "wimlib/win32.h" /* win32_get_number_of_processors() */
45 struct message_queue {
46 struct list_head list;
48 pthread_cond_t msg_avail_cond;
49 pthread_cond_t space_avail_cond;
53 struct compressor_thread_data {
55 struct message_queue *chunks_to_compress_queue;
56 struct message_queue *compressed_chunks_queue;
59 struct wimlib_lzx_context *comp_ctx;
62 #define MAX_CHUNKS_PER_MSG 2
65 u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
66 u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
67 unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
68 unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
69 size_t num_filled_chunks;
70 size_t num_alloc_chunks;
71 struct list_head list;
73 struct list_head submission_list;
76 struct parallel_chunk_compressor {
77 struct chunk_compressor base;
79 struct message_queue chunks_to_compress_queue;
80 struct message_queue compressed_chunks_queue;
81 struct compressor_thread_data *thread_data;
83 unsigned num_started_threads;
88 struct list_head available_msgs;
89 struct list_head submitted_msgs;
90 struct message *next_submit_msg;
91 struct message *next_ready_msg;
92 size_t next_chunk_idx;
96 get_default_num_threads(void)
100 n = win32_get_number_of_processors();
102 n = sysconf(_SC_NPROCESSORS_ONLN);
104 if (n < 1 || n >= UINT_MAX) {
105 WARNING("Failed to determine number of processors; assuming 1.");
112 get_avail_memory(void)
115 u64 phys_bytes = win32_get_avail_memory();
120 long page_size = sysconf(_SC_PAGESIZE);
121 long num_pages = sysconf(_SC_PHYS_PAGES);
122 if (page_size <= 0 || num_pages <= 0)
124 return ((u64)page_size * (u64)num_pages);
128 WARNING("Failed to determine available memory; assuming 1 GiB");
133 message_queue_init(struct message_queue *q)
135 if (pthread_mutex_init(&q->lock, NULL)) {
136 ERROR_WITH_ERRNO("Failed to initialize mutex");
139 if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
140 ERROR_WITH_ERRNO("Failed to initialize condition variable");
141 goto err_destroy_lock;
143 if (pthread_cond_init(&q->space_avail_cond, NULL)) {
144 ERROR_WITH_ERRNO("Failed to initialize condition variable");
145 goto err_destroy_msg_avail_cond;
147 INIT_LIST_HEAD(&q->list);
150 err_destroy_msg_avail_cond:
151 pthread_cond_destroy(&q->msg_avail_cond);
153 pthread_mutex_destroy(&q->lock);
155 return WIMLIB_ERR_NOMEM;
159 message_queue_destroy(struct message_queue *q)
161 if (q->list.next != NULL) {
162 pthread_mutex_destroy(&q->lock);
163 pthread_cond_destroy(&q->msg_avail_cond);
164 pthread_cond_destroy(&q->space_avail_cond);
169 message_queue_put(struct message_queue *q, struct message *msg)
171 pthread_mutex_lock(&q->lock);
172 list_add_tail(&msg->list, &q->list);
173 pthread_cond_signal(&q->msg_avail_cond);
174 pthread_mutex_unlock(&q->lock);
177 static struct message *
178 message_queue_get(struct message_queue *q)
182 pthread_mutex_lock(&q->lock);
183 while (list_empty(&q->list) && !q->terminating)
184 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
185 if (!q->terminating) {
186 msg = list_entry(q->list.next, struct message, list);
187 list_del(&msg->list);
190 pthread_mutex_unlock(&q->lock);
195 message_queue_terminate(struct message_queue *q)
197 pthread_mutex_lock(&q->lock);
198 q->terminating = true;
199 pthread_cond_broadcast(&q->msg_avail_cond);
200 pthread_mutex_unlock(&q->lock);
204 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
206 msg->num_alloc_chunks = num_chunks;
207 for (size_t i = 0; i < num_chunks; i++) {
208 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
209 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
210 if (msg->compressed_chunks[i] == NULL ||
211 msg->uncompressed_chunks[i] == NULL)
212 return WIMLIB_ERR_NOMEM;
218 destroy_message(struct message *msg)
220 for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
221 FREE(msg->compressed_chunks[i]);
222 FREE(msg->uncompressed_chunks[i]);
227 free_messages(struct message *msgs, size_t num_messages)
230 for (size_t i = 0; i < num_messages; i++)
231 destroy_message(&msgs[i]);
236 static struct message *
237 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
239 struct message *msgs;
241 msgs = CALLOC(count, sizeof(struct message));
244 for (size_t i = 0; i < count; i++) {
245 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
246 free_messages(msgs, count);
254 compress_chunks(struct message *msg, int out_ctype,
255 struct wimlib_lzx_context *comp_ctx)
258 for (size_t i = 0; i < msg->num_filled_chunks; i++) {
259 msg->compressed_chunk_sizes[i] =
260 compress_chunk(msg->uncompressed_chunks[i],
261 msg->uncompressed_chunk_sizes[i],
262 msg->compressed_chunks[i],
269 compressor_thread_proc(void *arg)
271 struct compressor_thread_data *params = arg;
274 while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
275 compress_chunks(msg, params->out_ctype, params->comp_ctx);
276 message_queue_put(params->compressed_chunks_queue, msg);
282 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
284 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
290 if (ctx->num_started_threads != 0) {
291 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
292 message_queue_terminate(&ctx->chunks_to_compress_queue);
294 for (i = 0; i < ctx->num_started_threads; i++)
295 pthread_join(ctx->thread_data[i].thread, NULL);
298 message_queue_destroy(&ctx->chunks_to_compress_queue);
299 message_queue_destroy(&ctx->compressed_chunks_queue);
301 if (ctx->base.out_ctype == WIMLIB_COMPRESSION_TYPE_LZX &&
302 ctx->thread_data != NULL)
303 for (i = 0; i < ctx->num_threads; i++)
304 wimlib_lzx_free_context(ctx->thread_data[i].comp_ctx);
306 FREE(ctx->thread_data);
308 free_messages(ctx->msgs, ctx->num_messages);
314 submit_compression_msg(struct parallel_chunk_compressor *ctx)
316 struct message *msg = ctx->next_submit_msg;
318 msg->complete = false;
319 list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
320 message_queue_put(&ctx->chunks_to_compress_queue, msg);
321 ctx->next_submit_msg = NULL;
325 parallel_chunk_compressor_submit_chunk(struct chunk_compressor *_ctx,
326 const void *chunk, size_t size)
328 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
331 wimlib_assert(size > 0);
332 wimlib_assert(size <= ctx->base.out_chunk_size);
334 if (ctx->next_submit_msg) {
335 msg = ctx->next_submit_msg;
337 if (list_empty(&ctx->available_msgs))
340 msg = list_entry(ctx->available_msgs.next, struct message, list);
341 list_del(&msg->list);
342 ctx->next_submit_msg = msg;
343 msg->num_filled_chunks = 0;
346 memcpy(msg->uncompressed_chunks[msg->num_filled_chunks], chunk, size);
347 msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = size;
348 if (++msg->num_filled_chunks == msg->num_alloc_chunks)
349 submit_compression_msg(ctx);
354 parallel_chunk_compressor_get_chunk(struct chunk_compressor *_ctx,
355 const void **cdata_ret, unsigned *csize_ret,
358 struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
362 if (ctx->next_submit_msg)
363 submit_compression_msg(ctx);
365 if (ctx->next_ready_msg) {
366 msg = ctx->next_ready_msg;
368 if (list_empty(&ctx->submitted_msgs))
371 while (!(msg = list_entry(ctx->submitted_msgs.next,
373 submission_list))->complete)
374 message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
376 ctx->next_ready_msg = msg;
377 ctx->next_chunk_idx = 0;
380 if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
381 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
382 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
384 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
385 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
387 *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
389 if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
390 list_del(&msg->submission_list);
391 list_add_tail(&msg->list, &ctx->available_msgs);
392 ctx->next_ready_msg = NULL;
398 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
399 unsigned num_threads, u64 max_memory,
400 struct chunk_compressor **compressor_ret)
402 u64 approx_mem_required;
403 size_t chunks_per_msg;
404 size_t msgs_per_thread;
405 struct parallel_chunk_compressor *ctx;
408 unsigned desired_num_threads;
410 wimlib_assert(out_chunk_size > 0);
411 wimlib_assert(out_ctype != WIMLIB_COMPRESSION_TYPE_NONE);
413 if (num_threads == 0)
414 num_threads = get_default_num_threads();
416 if (num_threads == 1) {
417 DEBUG("Only 1 thread; Not bothering with "
418 "parallel chunk compressor.");
423 max_memory = get_avail_memory();
425 desired_num_threads = num_threads;
427 chunks_per_msg = MAX_CHUNKS_PER_MSG;
430 approx_mem_required =
431 (u64)chunks_per_msg *
432 (u64)msgs_per_thread *
436 + (out_chunk_size * num_threads * 4);
437 if (approx_mem_required <= max_memory)
440 if (chunks_per_msg > 1)
442 else if (msgs_per_thread > 1)
444 else if (num_threads > 1)
450 if (num_threads < desired_num_threads) {
451 WARNING("Wanted to use %u threads, but limiting to %u "
452 "to fit in available memory!",
453 desired_num_threads, num_threads);
456 if (num_threads == 1) {
457 DEBUG("Only 1 thread; Not bothering with "
458 "parallel chunk compressor.");
462 ret = WIMLIB_ERR_NOMEM;
463 ctx = CALLOC(1, sizeof(*ctx));
467 ctx->base.out_ctype = out_ctype;
468 ctx->base.out_chunk_size = out_chunk_size;
469 ctx->base.num_threads = num_threads;
470 ctx->base.destroy = parallel_chunk_compressor_destroy;
471 ctx->base.submit_chunk = parallel_chunk_compressor_submit_chunk;
472 ctx->base.get_chunk = parallel_chunk_compressor_get_chunk;
474 ctx->num_threads = num_threads;
476 ret = message_queue_init(&ctx->chunks_to_compress_queue);
480 ret = message_queue_init(&ctx->compressed_chunks_queue);
484 ret = WIMLIB_ERR_NOMEM;
485 ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
486 if (ctx->thread_data == NULL)
489 for (i = 0; i < num_threads; i++) {
490 struct compressor_thread_data *dat;
492 dat = &ctx->thread_data[i];
494 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
495 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
496 dat->out_ctype = out_ctype;
497 dat->out_chunk_size = out_chunk_size;
498 if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) {
499 ret = wimlib_lzx_alloc_context(out_chunk_size, NULL,
506 for (ctx->num_started_threads = 0;
507 ctx->num_started_threads < num_threads;
508 ctx->num_started_threads++)
510 DEBUG("pthread_create thread %u of %u",
511 ctx->num_started_threads + 1, num_threads);
512 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
514 compressor_thread_proc,
515 &ctx->thread_data[ctx->num_started_threads]);
518 ret = WIMLIB_ERR_NOMEM;
519 WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u");
524 ret = WIMLIB_ERR_NOMEM;
525 ctx->num_messages = num_threads * msgs_per_thread;
526 ctx->msgs = allocate_messages(ctx->num_messages,
527 chunks_per_msg, out_chunk_size);
528 if (ctx->msgs == NULL)
531 INIT_LIST_HEAD(&ctx->available_msgs);
532 for (size_t i = 0; i < ctx->num_messages; i++)
533 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
535 INIT_LIST_HEAD(&ctx->submitted_msgs);
537 *compressor_ret = &ctx->base;
541 parallel_chunk_compressor_destroy(&ctx->base);