wimlib.net Git - wimlib/blob - src/compress_parallel.c

   1 /*
   2  * compress_parallel.c
   3  *
   4  * Compress chunks of data (parallel version).
   5  */
   6
   7 /*
   8  * Copyright (C) 2013 Eric Biggers
   9  *
  10  * This file is free software; you can redistribute it and/or modify it under
  11  * the terms of the GNU Lesser General Public License as published by the Free
  12  * Software Foundation; either version 3 of the License, or (at your option) any
  13  * later version.
  14  *
  15  * This file is distributed in the hope that it will be useful, but WITHOUT
  16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  18  * details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public License
  21  * along with this file; if not, see http://www.gnu.org/licenses/.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #  include "config.h"
  26 #endif
  27
  28 #ifdef ENABLE_MULTITHREADED_COMPRESSION
  29
  30 #include <errno.h>
  31 #include <pthread.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34
  35 #include "wimlib/assert.h"
  36 #include "wimlib/chunk_compressor.h"
  37 #include "wimlib/error.h"
  38 #include "wimlib/list.h"
  39 #include "wimlib/util.h"
  40
  41 struct message_queue {
  42         struct list_head list;
  43         pthread_mutex_t lock;
  44         pthread_cond_t msg_avail_cond;
  45         pthread_cond_t space_avail_cond;
  46         bool terminating;
  47 };
  48
  49 struct compressor_thread_data {
  50         pthread_t thread;
  51         struct message_queue *chunks_to_compress_queue;
  52         struct message_queue *compressed_chunks_queue;
  53         struct wimlib_compressor *compressor;
  54 };
  55
  56 #define MAX_CHUNKS_PER_MSG 16
  57
  58 struct message {
  59         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
  60         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
  61         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  62         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  63         size_t num_filled_chunks;
  64         size_t num_alloc_chunks;
  65         struct list_head list;
  66         bool complete;
  67         struct list_head submission_list;
  68 };
  69
  70 struct parallel_chunk_compressor {
  71         struct chunk_compressor base;
  72
  73         struct message_queue chunks_to_compress_queue;
  74         struct message_queue compressed_chunks_queue;
  75         struct compressor_thread_data *thread_data;
  76         unsigned num_thread_data;
  77         unsigned num_started_threads;
  78
  79         struct message *msgs;
  80         size_t num_messages;
  81
  82         struct list_head available_msgs;
  83         struct list_head submitted_msgs;
  84         struct message *next_submit_msg;
  85         struct message *next_ready_msg;
  86         size_t next_chunk_idx;
  87 };
  88
  89
  90
  91 static int
  92 message_queue_init(struct message_queue *q)
  93 {
  94         if (pthread_mutex_init(&q->lock, NULL)) {
  95                 ERROR_WITH_ERRNO("Failed to initialize mutex");
  96                 goto err;
  97         }
  98         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
  99                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 100                 goto err_destroy_lock;
 101         }
 102         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
 103                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 104                 goto err_destroy_msg_avail_cond;
 105         }
 106         INIT_LIST_HEAD(&q->list);
 107         return 0;
 108
 109 err_destroy_msg_avail_cond:
 110         pthread_cond_destroy(&q->msg_avail_cond);
 111 err_destroy_lock:
 112         pthread_mutex_destroy(&q->lock);
 113 err:
 114         return WIMLIB_ERR_NOMEM;
 115 }
 116
 117 static void
 118 message_queue_destroy(struct message_queue *q)
 119 {
 120         if (q->list.next != NULL) {
 121                 pthread_mutex_destroy(&q->lock);
 122                 pthread_cond_destroy(&q->msg_avail_cond);
 123                 pthread_cond_destroy(&q->space_avail_cond);
 124         }
 125 }
 126
 127 static void
 128 message_queue_put(struct message_queue *q, struct message *msg)
 129 {
 130         pthread_mutex_lock(&q->lock);
 131         list_add_tail(&msg->list, &q->list);
 132         pthread_cond_signal(&q->msg_avail_cond);
 133         pthread_mutex_unlock(&q->lock);
 134 }
 135
 136 static struct message *
 137 message_queue_get(struct message_queue *q)
 138 {
 139         struct message *msg;
 140
 141         pthread_mutex_lock(&q->lock);
 142         while (list_empty(&q->list) && !q->terminating)
 143                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
 144         if (!q->terminating) {
 145                 msg = list_entry(q->list.next, struct message, list);
 146                 list_del(&msg->list);
 147         } else
 148                 msg = NULL;
 149         pthread_mutex_unlock(&q->lock);
 150         return msg;
 151 }
 152
 153 static void
 154 message_queue_terminate(struct message_queue *q)
 155 {
 156         pthread_mutex_lock(&q->lock);
 157         q->terminating = true;
 158         pthread_cond_broadcast(&q->msg_avail_cond);
 159         pthread_mutex_unlock(&q->lock);
 160 }
 161
 162 static int
 163 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
 164 {
 165         msg->num_alloc_chunks = num_chunks;
 166         for (size_t i = 0; i < num_chunks; i++) {
 167                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
 168                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
 169                 if (msg->compressed_chunks[i] == NULL ||
 170                     msg->uncompressed_chunks[i] == NULL)
 171                         return WIMLIB_ERR_NOMEM;
 172         }
 173         return 0;
 174 }
 175
 176 static void
 177 destroy_message(struct message *msg)
 178 {
 179         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
 180                 FREE(msg->compressed_chunks[i]);
 181                 FREE(msg->uncompressed_chunks[i]);
 182         }
 183 }
 184
 185 static void
 186 free_messages(struct message *msgs, size_t num_messages)
 187 {
 188         if (msgs) {
 189                 for (size_t i = 0; i < num_messages; i++)
 190                         destroy_message(&msgs[i]);
 191                 FREE(msgs);
 192         }
 193 }
 194
 195 static struct message *
 196 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
 197 {
 198         struct message *msgs;
 199
 200         msgs = CALLOC(count, sizeof(struct message));
 201         if (msgs == NULL)
 202                 return NULL;
 203         for (size_t i = 0; i < count; i++) {
 204                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
 205                         free_messages(msgs, count);
 206                         return NULL;
 207                 }
 208         }
 209         return msgs;
 210 }
 211
 212 static void
 213 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
 214 {
 215
 216         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
 217                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
 218                 msg->compressed_chunk_sizes[i] =
 219                         wimlib_compress(msg->uncompressed_chunks[i],
 220                                         msg->uncompressed_chunk_sizes[i],
 221                                         msg->compressed_chunks[i],
 222                                         msg->uncompressed_chunk_sizes[i] - 1,
 223                                         compressor);
 224         }
 225 }
 226
 227 static void *
 228 compressor_thread_proc(void *arg)
 229 {
 230         struct compressor_thread_data *params = arg;
 231         struct message *msg;
 232
 233         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
 234                 compress_chunks(msg, params->compressor);
 235                 message_queue_put(params->compressed_chunks_queue, msg);
 236         }
 237         return NULL;
 238 }
 239
 240 static void
 241 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
 242 {
 243         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 244         unsigned i;
 245
 246         if (ctx == NULL)
 247                 return;
 248
 249         if (ctx->num_started_threads != 0) {
 250                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
 251                 message_queue_terminate(&ctx->chunks_to_compress_queue);
 252
 253                 for (i = 0; i < ctx->num_started_threads; i++)
 254                         pthread_join(ctx->thread_data[i].thread, NULL);
 255         }
 256
 257         message_queue_destroy(&ctx->chunks_to_compress_queue);
 258         message_queue_destroy(&ctx->compressed_chunks_queue);
 259
 260         if (ctx->thread_data != NULL)
 261                 for (i = 0; i < ctx->num_thread_data; i++)
 262                         wimlib_free_compressor(ctx->thread_data[i].compressor);
 263
 264         FREE(ctx->thread_data);
 265
 266         free_messages(ctx->msgs, ctx->num_messages);
 267
 268         FREE(ctx);
 269 }
 270
 271 static void
 272 submit_compression_msg(struct parallel_chunk_compressor *ctx)
 273 {
 274         struct message *msg = ctx->next_submit_msg;
 275
 276         msg->complete = false;
 277         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
 278         message_queue_put(&ctx->chunks_to_compress_queue, msg);
 279         ctx->next_submit_msg = NULL;
 280 }
 281
 282 static void *
 283 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
 284 {
 285         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 286         struct message *msg;
 287
 288         if (ctx->next_submit_msg) {
 289                 msg = ctx->next_submit_msg;
 290         } else {
 291                 if (list_empty(&ctx->available_msgs))
 292                         return NULL;
 293
 294                 msg = list_entry(ctx->available_msgs.next, struct message, list);
 295                 list_del(&msg->list);
 296                 ctx->next_submit_msg = msg;
 297                 msg->num_filled_chunks = 0;
 298         }
 299
 300         return msg->uncompressed_chunks[msg->num_filled_chunks];
 301 }
 302
 303 static void
 304 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
 305 {
 306         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 307         struct message *msg;
 308
 309         wimlib_assert(usize > 0);
 310         wimlib_assert(usize <= ctx->base.out_chunk_size);
 311         wimlib_assert(ctx->next_submit_msg);
 312
 313         msg = ctx->next_submit_msg;
 314         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
 315         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
 316                 submit_compression_msg(ctx);
 317 }
 318
 319 static bool
 320 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
 321                                                  const void **cdata_ret, u32 *csize_ret,
 322                                                  u32 *usize_ret)
 323 {
 324         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 325         struct message *msg;
 326
 327         if (ctx->next_submit_msg)
 328                 submit_compression_msg(ctx);
 329
 330         if (ctx->next_ready_msg) {
 331                 msg = ctx->next_ready_msg;
 332         } else {
 333                 if (list_empty(&ctx->submitted_msgs))
 334                         return false;
 335
 336                 while (!(msg = list_entry(ctx->submitted_msgs.next,
 337                                           struct message,
 338                                           submission_list))->complete)
 339                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
 340
 341                 ctx->next_ready_msg = msg;
 342                 ctx->next_chunk_idx = 0;
 343         }
 344
 345         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
 346                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
 347                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
 348         } else {
 349                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
 350                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 351         }
 352         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 353
 354         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
 355                 list_del(&msg->submission_list);
 356                 list_add_tail(&msg->list, &ctx->available_msgs);
 357                 ctx->next_ready_msg = NULL;
 358         }
 359         return true;
 360 }
 361
 362 int
 363 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
 364                               unsigned num_threads, u64 max_memory,
 365                               struct chunk_compressor **compressor_ret)
 366 {
 367         u64 approx_mem_required;
 368         size_t chunks_per_msg;
 369         size_t msgs_per_thread;
 370         struct parallel_chunk_compressor *ctx;
 371         unsigned i;
 372         int ret;
 373         unsigned desired_num_threads;
 374
 375         wimlib_assert(out_chunk_size > 0);
 376
 377         if (num_threads == 0)
 378                 num_threads = get_available_cpus();
 379
 380         if (num_threads == 1) {
 381                 DEBUG("Only 1 thread; Not bothering with "
 382                       "parallel chunk compressor.");
 383                 return -1;
 384         }
 385
 386         if (max_memory == 0)
 387                 max_memory = get_available_memory();
 388
 389         desired_num_threads = num_threads;
 390
 391         if (out_chunk_size < ((u32)1 << 23)) {
 392                 /* Relatively small chunks.  Use 2 messages per thread, each
 393                  * with at least 2 chunks.  Use more chunks per message if there
 394                  * are lots of threads and/or the chunks are very small.  */
 395                 chunks_per_msg = 2;
 396                 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
 397                 chunks_per_msg = max(chunks_per_msg, 2);
 398                 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
 399                 msgs_per_thread = 2;
 400         } else {
 401                 /* Big chunks: Just have one buffer per thread --- more would
 402                  * just waste memory.  */
 403                 chunks_per_msg = 1;
 404                 msgs_per_thread = 1;
 405         }
 406         for (;;) {
 407                 approx_mem_required =
 408                         (u64)chunks_per_msg *
 409                         (u64)msgs_per_thread *
 410                         (u64)num_threads *
 411                         (u64)out_chunk_size
 412                         + out_chunk_size
 413                         + 1000000
 414                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
 415                                                                             out_chunk_size,
 416                                                                             0);
 417                 if (approx_mem_required <= max_memory)
 418                         break;
 419
 420                 if (chunks_per_msg > 1)
 421                         chunks_per_msg--;
 422                 else if (msgs_per_thread > 1)
 423                         msgs_per_thread--;
 424                 else if (num_threads > 1)
 425                         num_threads--;
 426                 else
 427                         break;
 428         }
 429
 430         if (num_threads < desired_num_threads) {
 431                 WARNING("Wanted to use %u threads, but limiting to %u "
 432                         "to fit in available memory!",
 433                         desired_num_threads, num_threads);
 434         }
 435
 436         if (num_threads == 1) {
 437                 DEBUG("Only 1 thread; Not bothering with "
 438                       "parallel chunk compressor.");
 439                 return -2;
 440         }
 441
 442         ret = WIMLIB_ERR_NOMEM;
 443         ctx = CALLOC(1, sizeof(*ctx));
 444         if (ctx == NULL)
 445                 goto err;
 446
 447         ctx->base.out_ctype = out_ctype;
 448         ctx->base.out_chunk_size = out_chunk_size;
 449         ctx->base.destroy = parallel_chunk_compressor_destroy;
 450         ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
 451         ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
 452         ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
 453
 454         ctx->num_thread_data = num_threads;
 455
 456         ret = message_queue_init(&ctx->chunks_to_compress_queue);
 457         if (ret)
 458                 goto err;
 459
 460         ret = message_queue_init(&ctx->compressed_chunks_queue);
 461         if (ret)
 462                 goto err;
 463
 464         ret = WIMLIB_ERR_NOMEM;
 465         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
 466         if (ctx->thread_data == NULL)
 467                 goto err;
 468
 469         for (i = 0; i < num_threads; i++) {
 470                 struct compressor_thread_data *dat;
 471
 472                 dat = &ctx->thread_data[i];
 473
 474                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
 475                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
 476                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
 477                                                WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
 478                                                &dat->compressor);
 479                 if (ret)
 480                         goto err;
 481         }
 482
 483         for (ctx->num_started_threads = 0;
 484              ctx->num_started_threads < num_threads;
 485              ctx->num_started_threads++)
 486         {
 487                 DEBUG("pthread_create thread %u of %u",
 488                       ctx->num_started_threads + 1, num_threads);
 489                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
 490                                      NULL,
 491                                      compressor_thread_proc,
 492                                      &ctx->thread_data[ctx->num_started_threads]);
 493                 if (ret) {
 494                         errno = ret;
 495                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
 496                                            ctx->num_started_threads + 1,
 497                                            num_threads);
 498                         ret = WIMLIB_ERR_NOMEM;
 499                         if (ctx->num_started_threads >= 2)
 500                                 break;
 501                         goto err;
 502                 }
 503         }
 504
 505         ctx->base.num_threads = ctx->num_started_threads;
 506
 507         ret = WIMLIB_ERR_NOMEM;
 508         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
 509         ctx->msgs = allocate_messages(ctx->num_messages,
 510                                       chunks_per_msg, out_chunk_size);
 511         if (ctx->msgs == NULL)
 512                 goto err;
 513
 514         INIT_LIST_HEAD(&ctx->available_msgs);
 515         for (size_t i = 0; i < ctx->num_messages; i++)
 516                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
 517
 518         INIT_LIST_HEAD(&ctx->submitted_msgs);
 519
 520         *compressor_ret = &ctx->base;
 521         return 0;
 522
 523 err:
 524         parallel_chunk_compressor_destroy(&ctx->base);
 525         return ret;
 526 }
 527
 528 #endif /* ENABLE_MULTITHREADED_COMPRESSION */