/* * compress_parallel.c * * Compress chunks of data (parallel version). */ /* * Copyright (C) 2013 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * * wimlib is free software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the Free Software * Foundation; either version 3 of the License, or (at your option) any later * version. * * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with * wimlib; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H # include "config.h" #endif #ifdef ENABLE_MULTITHREADED_COMPRESSION #include "wimlib/assert.h" #include "wimlib/chunk_compressor.h" #include "wimlib/error.h" #include "wimlib/list.h" #include "wimlib/util.h" #ifdef __WIN32__ # include "wimlib/win32.h" /* win32_get_number_of_processors() */ #endif #include #include #include #include #include #include struct message_queue { struct list_head list; pthread_mutex_t lock; pthread_cond_t msg_avail_cond; pthread_cond_t space_avail_cond; bool terminating; }; struct compressor_thread_data { pthread_t thread; struct message_queue *chunks_to_compress_queue; struct message_queue *compressed_chunks_queue; struct wimlib_compressor *compressor; }; #define MAX_CHUNKS_PER_MSG 2 struct message { u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG]; u8 *compressed_chunks[MAX_CHUNKS_PER_MSG]; unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG]; unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG]; size_t num_filled_chunks; size_t num_alloc_chunks; struct list_head list; bool complete; struct list_head submission_list; }; struct parallel_chunk_compressor { struct chunk_compressor base; struct message_queue chunks_to_compress_queue; struct message_queue compressed_chunks_queue; struct compressor_thread_data *thread_data; unsigned num_threads; unsigned num_started_threads; struct message *msgs; size_t num_messages; struct list_head available_msgs; struct list_head submitted_msgs; struct message *next_submit_msg; struct message *next_ready_msg; size_t next_chunk_idx; }; static unsigned get_default_num_threads(void) { long n; #ifdef __WIN32__ n = win32_get_number_of_processors(); #else n = sysconf(_SC_NPROCESSORS_ONLN); #endif if (n < 1 || n >= UINT_MAX) { WARNING("Failed to determine number of processors; assuming 1."); return 1; } return n; } static u64 get_avail_memory(void) { #ifdef __WIN32__ u64 phys_bytes = win32_get_avail_memory(); if (phys_bytes == 0) goto default_size; return phys_bytes; #else long page_size = sysconf(_SC_PAGESIZE); long num_pages = sysconf(_SC_PHYS_PAGES); if (page_size <= 0 || num_pages <= 0) goto default_size; return ((u64)page_size * (u64)num_pages); #endif default_size: WARNING("Failed to determine available memory; assuming 1 GiB"); return 1U << 30; } static int message_queue_init(struct message_queue *q) { if (pthread_mutex_init(&q->lock, NULL)) { ERROR_WITH_ERRNO("Failed to initialize mutex"); goto err; } if (pthread_cond_init(&q->msg_avail_cond, NULL)) { ERROR_WITH_ERRNO("Failed to initialize condition variable"); goto err_destroy_lock; } if (pthread_cond_init(&q->space_avail_cond, NULL)) { ERROR_WITH_ERRNO("Failed to initialize condition variable"); goto err_destroy_msg_avail_cond; } INIT_LIST_HEAD(&q->list); return 0; err_destroy_msg_avail_cond: pthread_cond_destroy(&q->msg_avail_cond); err_destroy_lock: pthread_mutex_destroy(&q->lock); err: return WIMLIB_ERR_NOMEM; } static void message_queue_destroy(struct message_queue *q) { if (q->list.next != NULL) { pthread_mutex_destroy(&q->lock); pthread_cond_destroy(&q->msg_avail_cond); pthread_cond_destroy(&q->space_avail_cond); } } static void message_queue_put(struct message_queue *q, struct message *msg) { pthread_mutex_lock(&q->lock); list_add_tail(&msg->list, &q->list); pthread_cond_signal(&q->msg_avail_cond); pthread_mutex_unlock(&q->lock); } static struct message * message_queue_get(struct message_queue *q) { struct message *msg; pthread_mutex_lock(&q->lock); while (list_empty(&q->list) && !q->terminating) pthread_cond_wait(&q->msg_avail_cond, &q->lock); if (!q->terminating) { msg = list_entry(q->list.next, struct message, list); list_del(&msg->list); } else msg = NULL; pthread_mutex_unlock(&q->lock); return msg; } static void message_queue_terminate(struct message_queue *q) { pthread_mutex_lock(&q->lock); q->terminating = true; pthread_cond_broadcast(&q->msg_avail_cond); pthread_mutex_unlock(&q->lock); } static int init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size) { msg->num_alloc_chunks = num_chunks; for (size_t i = 0; i < num_chunks; i++) { msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1); msg->uncompressed_chunks[i] = MALLOC(out_chunk_size); if (msg->compressed_chunks[i] == NULL || msg->uncompressed_chunks[i] == NULL) return WIMLIB_ERR_NOMEM; } return 0; } static void destroy_message(struct message *msg) { for (size_t i = 0; i < msg->num_alloc_chunks; i++) { FREE(msg->compressed_chunks[i]); FREE(msg->uncompressed_chunks[i]); } } static void free_messages(struct message *msgs, size_t num_messages) { if (msgs) { for (size_t i = 0; i < num_messages; i++) destroy_message(&msgs[i]); FREE(msgs); } } static struct message * allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size) { struct message *msgs; msgs = CALLOC(count, sizeof(struct message)); if (msgs == NULL) return NULL; for (size_t i = 0; i < count; i++) { if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) { free_messages(msgs, count); return NULL; } } return msgs; } static void compress_chunks(struct message *msg, struct wimlib_compressor *compressor) { for (size_t i = 0; i < msg->num_filled_chunks; i++) { wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0); msg->compressed_chunk_sizes[i] = wimlib_compress(msg->uncompressed_chunks[i], msg->uncompressed_chunk_sizes[i], msg->compressed_chunks[i], msg->uncompressed_chunk_sizes[i] - 1, compressor); } } static void * compressor_thread_proc(void *arg) { struct compressor_thread_data *params = arg; struct message *msg; while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) { compress_chunks(msg, params->compressor); message_queue_put(params->compressed_chunks_queue, msg); } return NULL; } static void parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx) { struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx; unsigned i; if (ctx == NULL) return; if (ctx->num_started_threads != 0) { DEBUG("Terminating %u compressor threads", ctx->num_started_threads); message_queue_terminate(&ctx->chunks_to_compress_queue); for (i = 0; i < ctx->num_started_threads; i++) pthread_join(ctx->thread_data[i].thread, NULL); } message_queue_destroy(&ctx->chunks_to_compress_queue); message_queue_destroy(&ctx->compressed_chunks_queue); if (ctx->thread_data != NULL) for (i = 0; i < ctx->num_threads; i++) wimlib_free_compressor(ctx->thread_data[i].compressor); FREE(ctx->thread_data); free_messages(ctx->msgs, ctx->num_messages); FREE(ctx); } static void submit_compression_msg(struct parallel_chunk_compressor *ctx) { struct message *msg = ctx->next_submit_msg; msg->complete = false; list_add_tail(&msg->submission_list, &ctx->submitted_msgs); message_queue_put(&ctx->chunks_to_compress_queue, msg); ctx->next_submit_msg = NULL; } static bool parallel_chunk_compressor_submit_chunk(struct chunk_compressor *_ctx, const void *chunk, size_t size) { struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx; struct message *msg; wimlib_assert(size > 0); wimlib_assert(size <= ctx->base.out_chunk_size); if (ctx->next_submit_msg) { msg = ctx->next_submit_msg; } else { if (list_empty(&ctx->available_msgs)) return false; msg = list_entry(ctx->available_msgs.next, struct message, list); list_del(&msg->list); ctx->next_submit_msg = msg; msg->num_filled_chunks = 0; } memcpy(msg->uncompressed_chunks[msg->num_filled_chunks], chunk, size); msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = size; if (++msg->num_filled_chunks == msg->num_alloc_chunks) submit_compression_msg(ctx); return true; } static bool parallel_chunk_compressor_get_chunk(struct chunk_compressor *_ctx, const void **cdata_ret, unsigned *csize_ret, unsigned *usize_ret) { struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx; struct message *msg; if (ctx->next_submit_msg) submit_compression_msg(ctx); if (ctx->next_ready_msg) { msg = ctx->next_ready_msg; } else { if (list_empty(&ctx->submitted_msgs)) return false; while (!(msg = list_entry(ctx->submitted_msgs.next, struct message, submission_list))->complete) message_queue_get(&ctx->compressed_chunks_queue)->complete = true; ctx->next_ready_msg = msg; ctx->next_chunk_idx = 0; } if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) { *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx]; *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx]; } else { *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx]; *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx]; } *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx]; if (++ctx->next_chunk_idx == msg->num_filled_chunks) { list_del(&msg->submission_list); list_add_tail(&msg->list, &ctx->available_msgs); ctx->next_ready_msg = NULL; } return true; } int new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size, unsigned num_threads, u64 max_memory, struct chunk_compressor **compressor_ret) { u64 approx_mem_required; size_t chunks_per_msg; size_t msgs_per_thread; struct parallel_chunk_compressor *ctx; unsigned i; int ret; unsigned desired_num_threads; wimlib_assert(out_chunk_size > 0); wimlib_assert(out_ctype != WIMLIB_COMPRESSION_TYPE_NONE); if (num_threads == 0) num_threads = get_default_num_threads(); if (num_threads == 1) { DEBUG("Only 1 thread; Not bothering with " "parallel chunk compressor."); return -1; } if (max_memory == 0) max_memory = get_avail_memory(); desired_num_threads = num_threads; if (out_chunk_size < ((u32)1 << 23)) { chunks_per_msg = MAX_CHUNKS_PER_MSG; msgs_per_thread = 2; } else { /* Big chunks: Just have one buffer per thread --- more would * just waste memory. */ chunks_per_msg = 1; msgs_per_thread = 1; } for (;;) { approx_mem_required = (u64)chunks_per_msg * (u64)msgs_per_thread * (u64)num_threads * (u64)out_chunk_size + out_chunk_size + 1000000 + num_threads * wimlib_get_compressor_needed_memory(out_ctype, out_chunk_size, NULL); if (approx_mem_required <= max_memory) break; if (chunks_per_msg > 1) chunks_per_msg--; else if (msgs_per_thread > 1) msgs_per_thread--; else if (num_threads > 1) num_threads--; else break; } if (num_threads < desired_num_threads) { WARNING("Wanted to use %u threads, but limiting to %u " "to fit in available memory!", desired_num_threads, num_threads); } if (num_threads == 1) { DEBUG("Only 1 thread; Not bothering with " "parallel chunk compressor."); return -2; } ret = WIMLIB_ERR_NOMEM; ctx = CALLOC(1, sizeof(*ctx)); if (ctx == NULL) goto err; ctx->base.out_ctype = out_ctype; ctx->base.out_chunk_size = out_chunk_size; ctx->base.num_threads = num_threads; ctx->base.destroy = parallel_chunk_compressor_destroy; ctx->base.submit_chunk = parallel_chunk_compressor_submit_chunk; ctx->base.get_chunk = parallel_chunk_compressor_get_chunk; ctx->num_threads = num_threads; ret = message_queue_init(&ctx->chunks_to_compress_queue); if (ret) goto err; ret = message_queue_init(&ctx->compressed_chunks_queue); if (ret) goto err; ret = WIMLIB_ERR_NOMEM; ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0])); if (ctx->thread_data == NULL) goto err; for (i = 0; i < num_threads; i++) { struct compressor_thread_data *dat; dat = &ctx->thread_data[i]; dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue; dat->compressed_chunks_queue = &ctx->compressed_chunks_queue; ret = wimlib_create_compressor(out_ctype, out_chunk_size, NULL, &dat->compressor); if (ret) goto err; } for (ctx->num_started_threads = 0; ctx->num_started_threads < num_threads; ctx->num_started_threads++) { DEBUG("pthread_create thread %u of %u", ctx->num_started_threads + 1, num_threads); ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread, NULL, compressor_thread_proc, &ctx->thread_data[ctx->num_started_threads]); if (ret) { errno = ret; ret = WIMLIB_ERR_NOMEM; WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u"); goto err; } } ret = WIMLIB_ERR_NOMEM; ctx->num_messages = num_threads * msgs_per_thread; ctx->msgs = allocate_messages(ctx->num_messages, chunks_per_msg, out_chunk_size); if (ctx->msgs == NULL) goto err; INIT_LIST_HEAD(&ctx->available_msgs); for (size_t i = 0; i < ctx->num_messages; i++) list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs); INIT_LIST_HEAD(&ctx->submitted_msgs); *compressor_ret = &ctx->base; return 0; err: parallel_chunk_compressor_destroy(&ctx->base); return ret; } #endif /* ENABLE_MULTITHREADED_COMPRESSION */