wimlib.net Git - wimlib/blob - src/compress_parallel.c

   1 /*
   2  * compress_parallel.c
   3  *
   4  * Compress chunks of data (parallel version).
   5  */
   6
   7 /*
   8  * Copyright (C) 2013 Eric Biggers
   9  *
  10  * This file is part of wimlib, a library for working with WIM files.
  11  *
  12  * wimlib is free software; you can redistribute it and/or modify it under the
  13  * terms of the GNU General Public License as published by the Free Software
  14  * Foundation; either version 3 of the License, or (at your option) any later
  15  * version.
  16  *
  17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License along with
  22  * wimlib; if not, see http://www.gnu.org/licenses/.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 #  include "config.h"
  27 #endif
  28
  29 #ifdef ENABLE_MULTITHREADED_COMPRESSION
  30
  31 #include "wimlib/assert.h"
  32 #include "wimlib/chunk_compressor.h"
  33 #include "wimlib/error.h"
  34 #include "wimlib/list.h"
  35 #include "wimlib/util.h"
  36 #ifdef __WIN32__
  37 #  include "wimlib/win32.h" /* win32_get_number_of_processors() */
  38 #endif
  39
  40 #include <errno.h>
  41 #include <limits.h>
  42 #include <pthread.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <unistd.h>
  46
  47 struct message_queue {
  48         struct list_head list;
  49         pthread_mutex_t lock;
  50         pthread_cond_t msg_avail_cond;
  51         pthread_cond_t space_avail_cond;
  52         bool terminating;
  53 };
  54
  55 struct compressor_thread_data {
  56         pthread_t thread;
  57         struct message_queue *chunks_to_compress_queue;
  58         struct message_queue *compressed_chunks_queue;
  59         struct wimlib_compressor *compressor;
  60 };
  61
  62 #define MAX_CHUNKS_PER_MSG 2
  63
  64 struct message {
  65         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
  66         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
  67         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  68         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  69         size_t num_filled_chunks;
  70         size_t num_alloc_chunks;
  71         struct list_head list;
  72         bool complete;
  73         struct list_head submission_list;
  74 };
  75
  76 struct parallel_chunk_compressor {
  77         struct chunk_compressor base;
  78
  79         struct message_queue chunks_to_compress_queue;
  80         struct message_queue compressed_chunks_queue;
  81         struct compressor_thread_data *thread_data;
  82         unsigned num_threads;
  83         unsigned num_started_threads;
  84
  85         struct message *msgs;
  86         size_t num_messages;
  87
  88         struct list_head available_msgs;
  89         struct list_head submitted_msgs;
  90         struct message *next_submit_msg;
  91         struct message *next_ready_msg;
  92         size_t next_chunk_idx;
  93 };
  94
  95 static unsigned
  96 get_default_num_threads(void)
  97 {
  98         long n;
  99 #ifdef __WIN32__
 100         n = win32_get_number_of_processors();
 101 #else
 102         n = sysconf(_SC_NPROCESSORS_ONLN);
 103 #endif
 104         if (n < 1 || n >= UINT_MAX) {
 105                 WARNING("Failed to determine number of processors; assuming 1.");
 106                 return 1;
 107         }
 108         return n;
 109 }
 110
 111 static u64
 112 get_avail_memory(void)
 113 {
 114 #ifdef __WIN32__
 115         u64 phys_bytes = win32_get_avail_memory();
 116         if (phys_bytes == 0)
 117                 goto default_size;
 118         return phys_bytes;
 119 #else
 120         long page_size = sysconf(_SC_PAGESIZE);
 121         long num_pages = sysconf(_SC_PHYS_PAGES);
 122         if (page_size <= 0 || num_pages <= 0)
 123                 goto default_size;
 124         return ((u64)page_size * (u64)num_pages);
 125 #endif
 126
 127 default_size:
 128         WARNING("Failed to determine available memory; assuming 1 GiB");
 129         return 1U << 30;
 130 }
 131
 132 static int
 133 message_queue_init(struct message_queue *q)
 134 {
 135         if (pthread_mutex_init(&q->lock, NULL)) {
 136                 ERROR_WITH_ERRNO("Failed to initialize mutex");
 137                 goto err;
 138         }
 139         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
 140                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 141                 goto err_destroy_lock;
 142         }
 143         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
 144                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 145                 goto err_destroy_msg_avail_cond;
 146         }
 147         INIT_LIST_HEAD(&q->list);
 148         return 0;
 149
 150 err_destroy_msg_avail_cond:
 151         pthread_cond_destroy(&q->msg_avail_cond);
 152 err_destroy_lock:
 153         pthread_mutex_destroy(&q->lock);
 154 err:
 155         return WIMLIB_ERR_NOMEM;
 156 }
 157
 158 static void
 159 message_queue_destroy(struct message_queue *q)
 160 {
 161         if (q->list.next != NULL) {
 162                 pthread_mutex_destroy(&q->lock);
 163                 pthread_cond_destroy(&q->msg_avail_cond);
 164                 pthread_cond_destroy(&q->space_avail_cond);
 165         }
 166 }
 167
 168 static void
 169 message_queue_put(struct message_queue *q, struct message *msg)
 170 {
 171         pthread_mutex_lock(&q->lock);
 172         list_add_tail(&msg->list, &q->list);
 173         pthread_cond_signal(&q->msg_avail_cond);
 174         pthread_mutex_unlock(&q->lock);
 175 }
 176
 177 static struct message *
 178 message_queue_get(struct message_queue *q)
 179 {
 180         struct message *msg;
 181
 182         pthread_mutex_lock(&q->lock);
 183         while (list_empty(&q->list) && !q->terminating)
 184                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
 185         if (!q->terminating) {
 186                 msg = list_entry(q->list.next, struct message, list);
 187                 list_del(&msg->list);
 188         } else
 189                 msg = NULL;
 190         pthread_mutex_unlock(&q->lock);
 191         return msg;
 192 }
 193
 194 static void
 195 message_queue_terminate(struct message_queue *q)
 196 {
 197         pthread_mutex_lock(&q->lock);
 198         q->terminating = true;
 199         pthread_cond_broadcast(&q->msg_avail_cond);
 200         pthread_mutex_unlock(&q->lock);
 201 }
 202
 203 static int
 204 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
 205 {
 206         msg->num_alloc_chunks = num_chunks;
 207         for (size_t i = 0; i < num_chunks; i++) {
 208                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
 209                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
 210                 if (msg->compressed_chunks[i] == NULL ||
 211                     msg->uncompressed_chunks[i] == NULL)
 212                         return WIMLIB_ERR_NOMEM;
 213         }
 214         return 0;
 215 }
 216
 217 static void
 218 destroy_message(struct message *msg)
 219 {
 220         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
 221                 FREE(msg->compressed_chunks[i]);
 222                 FREE(msg->uncompressed_chunks[i]);
 223         }
 224 }
 225
 226 static void
 227 free_messages(struct message *msgs, size_t num_messages)
 228 {
 229         if (msgs) {
 230                 for (size_t i = 0; i < num_messages; i++)
 231                         destroy_message(&msgs[i]);
 232                 FREE(msgs);
 233         }
 234 }
 235
 236 static struct message *
 237 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
 238 {
 239         struct message *msgs;
 240
 241         msgs = CALLOC(count, sizeof(struct message));
 242         if (msgs == NULL)
 243                 return NULL;
 244         for (size_t i = 0; i < count; i++) {
 245                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
 246                         free_messages(msgs, count);
 247                         return NULL;
 248                 }
 249         }
 250         return msgs;
 251 }
 252
 253 static void
 254 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
 255 {
 256
 257         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
 258                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
 259                 msg->compressed_chunk_sizes[i] =
 260                         wimlib_compress(msg->uncompressed_chunks[i],
 261                                         msg->uncompressed_chunk_sizes[i],
 262                                         msg->compressed_chunks[i],
 263                                         msg->uncompressed_chunk_sizes[i] - 1,
 264                                         compressor);
 265         }
 266 }
 267
 268 static void *
 269 compressor_thread_proc(void *arg)
 270 {
 271         struct compressor_thread_data *params = arg;
 272         struct message *msg;
 273
 274         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
 275                 compress_chunks(msg, params->compressor);
 276                 message_queue_put(params->compressed_chunks_queue, msg);
 277         }
 278         return NULL;
 279 }
 280
 281 static void
 282 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
 283 {
 284         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 285         unsigned i;
 286
 287         if (ctx == NULL)
 288                 return;
 289
 290         if (ctx->num_started_threads != 0) {
 291                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
 292                 message_queue_terminate(&ctx->chunks_to_compress_queue);
 293
 294                 for (i = 0; i < ctx->num_started_threads; i++)
 295                         pthread_join(ctx->thread_data[i].thread, NULL);
 296         }
 297
 298         message_queue_destroy(&ctx->chunks_to_compress_queue);
 299         message_queue_destroy(&ctx->compressed_chunks_queue);
 300
 301         if (ctx->thread_data != NULL)
 302                 for (i = 0; i < ctx->num_threads; i++)
 303                         wimlib_free_compressor(ctx->thread_data[i].compressor);
 304
 305         FREE(ctx->thread_data);
 306
 307         free_messages(ctx->msgs, ctx->num_messages);
 308
 309         FREE(ctx);
 310 }
 311
 312 static void
 313 submit_compression_msg(struct parallel_chunk_compressor *ctx)
 314 {
 315         struct message *msg = ctx->next_submit_msg;
 316
 317         msg->complete = false;
 318         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
 319         message_queue_put(&ctx->chunks_to_compress_queue, msg);
 320         ctx->next_submit_msg = NULL;
 321 }
 322
 323 static bool
 324 parallel_chunk_compressor_submit_chunk(struct chunk_compressor *_ctx,
 325                                        const void *chunk, size_t size)
 326 {
 327         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 328         struct message *msg;
 329
 330         wimlib_assert(size > 0);
 331         wimlib_assert(size <= ctx->base.out_chunk_size);
 332
 333         if (ctx->next_submit_msg) {
 334                 msg = ctx->next_submit_msg;
 335         } else {
 336                 if (list_empty(&ctx->available_msgs))
 337                         return false;
 338
 339                 msg = list_entry(ctx->available_msgs.next, struct message, list);
 340                 list_del(&msg->list);
 341                 ctx->next_submit_msg = msg;
 342                 msg->num_filled_chunks = 0;
 343         }
 344
 345         memcpy(msg->uncompressed_chunks[msg->num_filled_chunks], chunk, size);
 346         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = size;
 347         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
 348                 submit_compression_msg(ctx);
 349         return true;
 350 }
 351
 352 static bool
 353 parallel_chunk_compressor_get_chunk(struct chunk_compressor *_ctx,
 354                                     const void **cdata_ret, unsigned *csize_ret,
 355                                     unsigned *usize_ret)
 356 {
 357         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 358         struct message *msg;
 359
 360
 361         if (ctx->next_submit_msg)
 362                 submit_compression_msg(ctx);
 363
 364         if (ctx->next_ready_msg) {
 365                 msg = ctx->next_ready_msg;
 366         } else {
 367                 if (list_empty(&ctx->submitted_msgs))
 368                         return false;
 369
 370                 while (!(msg = list_entry(ctx->submitted_msgs.next,
 371                                           struct message,
 372                                           submission_list))->complete)
 373                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
 374
 375                 ctx->next_ready_msg = msg;
 376                 ctx->next_chunk_idx = 0;
 377         }
 378
 379         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
 380                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
 381                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
 382         } else {
 383                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
 384                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 385         }
 386         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 387
 388         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
 389                 list_del(&msg->submission_list);
 390                 list_add_tail(&msg->list, &ctx->available_msgs);
 391                 ctx->next_ready_msg = NULL;
 392         }
 393         return true;
 394 }
 395
 396 int
 397 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
 398                               unsigned num_threads, u64 max_memory,
 399                               struct chunk_compressor **compressor_ret)
 400 {
 401         u64 approx_mem_required;
 402         size_t chunks_per_msg;
 403         size_t msgs_per_thread;
 404         struct parallel_chunk_compressor *ctx;
 405         unsigned i;
 406         int ret;
 407         unsigned desired_num_threads;
 408
 409         wimlib_assert(out_chunk_size > 0);
 410         wimlib_assert(out_ctype != WIMLIB_COMPRESSION_TYPE_NONE);
 411
 412         if (num_threads == 0)
 413                 num_threads = get_default_num_threads();
 414
 415         if (num_threads == 1) {
 416                 DEBUG("Only 1 thread; Not bothering with "
 417                       "parallel chunk compressor.");
 418                 return -1;
 419         }
 420
 421         if (max_memory == 0)
 422                 max_memory = get_avail_memory();
 423
 424         desired_num_threads = num_threads;
 425
 426         if (out_chunk_size < ((u32)1 << 23)) {
 427                 chunks_per_msg = MAX_CHUNKS_PER_MSG;
 428                 msgs_per_thread = 2;
 429         } else {
 430                 /* Big chunks: Just have one buffer per thread --- more would
 431                  * just waste memory.  */
 432                 chunks_per_msg = 1;
 433                 msgs_per_thread = 1;
 434         }
 435         for (;;) {
 436                 approx_mem_required =
 437                         (u64)chunks_per_msg *
 438                         (u64)msgs_per_thread *
 439                         (u64)num_threads *
 440                         (u64)out_chunk_size
 441                         + out_chunk_size
 442                         + 1000000
 443                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
 444                                                                             out_chunk_size,
 445                                                                             NULL);
 446                 if (approx_mem_required <= max_memory)
 447                         break;
 448
 449                 if (chunks_per_msg > 1)
 450                         chunks_per_msg--;
 451                 else if (msgs_per_thread > 1)
 452                         msgs_per_thread--;
 453                 else if (num_threads > 1)
 454                         num_threads--;
 455                 else
 456                         break;
 457         }
 458
 459         if (num_threads < desired_num_threads) {
 460                 WARNING("Wanted to use %u threads, but limiting to %u "
 461                         "to fit in available memory!",
 462                         desired_num_threads, num_threads);
 463         }
 464
 465         if (num_threads == 1) {
 466                 DEBUG("Only 1 thread; Not bothering with "
 467                       "parallel chunk compressor.");
 468                 return -2;
 469         }
 470
 471         ret = WIMLIB_ERR_NOMEM;
 472         ctx = CALLOC(1, sizeof(*ctx));
 473         if (ctx == NULL)
 474                 goto err;
 475
 476         ctx->base.out_ctype = out_ctype;
 477         ctx->base.out_chunk_size = out_chunk_size;
 478         ctx->base.num_threads = num_threads;
 479         ctx->base.destroy = parallel_chunk_compressor_destroy;
 480         ctx->base.submit_chunk = parallel_chunk_compressor_submit_chunk;
 481         ctx->base.get_chunk = parallel_chunk_compressor_get_chunk;
 482
 483         ctx->num_threads = num_threads;
 484
 485         ret = message_queue_init(&ctx->chunks_to_compress_queue);
 486         if (ret)
 487                 goto err;
 488
 489         ret = message_queue_init(&ctx->compressed_chunks_queue);
 490         if (ret)
 491                 goto err;
 492
 493         ret = WIMLIB_ERR_NOMEM;
 494         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
 495         if (ctx->thread_data == NULL)
 496                 goto err;
 497
 498         for (i = 0; i < num_threads; i++) {
 499                 struct compressor_thread_data *dat;
 500
 501                 dat = &ctx->thread_data[i];
 502
 503                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
 504                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
 505                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
 506                                                NULL, &dat->compressor);
 507                 if (ret)
 508                         goto err;
 509         }
 510
 511         for (ctx->num_started_threads = 0;
 512              ctx->num_started_threads < num_threads;
 513              ctx->num_started_threads++)
 514         {
 515                 DEBUG("pthread_create thread %u of %u",
 516                       ctx->num_started_threads + 1, num_threads);
 517                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
 518                                      NULL,
 519                                      compressor_thread_proc,
 520                                      &ctx->thread_data[ctx->num_started_threads]);
 521                 if (ret) {
 522                         errno = ret;
 523                         ret = WIMLIB_ERR_NOMEM;
 524                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u");
 525                         goto err;
 526                 }
 527         }
 528
 529         ret = WIMLIB_ERR_NOMEM;
 530         ctx->num_messages = num_threads * msgs_per_thread;
 531         ctx->msgs = allocate_messages(ctx->num_messages,
 532                                       chunks_per_msg, out_chunk_size);
 533         if (ctx->msgs == NULL)
 534                 goto err;
 535
 536         INIT_LIST_HEAD(&ctx->available_msgs);
 537         for (size_t i = 0; i < ctx->num_messages; i++)
 538                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
 539
 540         INIT_LIST_HEAD(&ctx->submitted_msgs);
 541
 542         *compressor_ret = &ctx->base;
 543         return 0;
 544
 545 err:
 546         parallel_chunk_compressor_destroy(&ctx->base);
 547         return ret;
 548 }
 549
 550 #endif /* ENABLE_MULTITHREADED_COMPRESSION */