wimlib.net Git - wimlib/blob - src/compress_parallel.c

   1 /*
   2  * compress_parallel.c
   3  *
   4  * Compress chunks of data (parallel version).
   5  */
   6
   7 /*
   8  * Copyright (C) 2013 Eric Biggers
   9  *
  10  * This file is part of wimlib, a library for working with WIM files.
  11  *
  12  * wimlib is free software; you can redistribute it and/or modify it under the
  13  * terms of the GNU General Public License as published by the Free Software
  14  * Foundation; either version 3 of the License, or (at your option) any later
  15  * version.
  16  *
  17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License along with
  22  * wimlib; if not, see http://www.gnu.org/licenses/.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 #  include "config.h"
  27 #endif
  28
  29 #ifdef ENABLE_MULTITHREADED_COMPRESSION
  30
  31 #include "wimlib/assert.h"
  32 #include "wimlib/chunk_compressor.h"
  33 #include "wimlib/error.h"
  34 #include "wimlib/list.h"
  35 #include "wimlib/util.h"
  36 #ifdef __WIN32__
  37 #  include "wimlib/win32.h" /* win32_get_number_of_processors() */
  38 #endif
  39
  40 #include <errno.h>
  41 #include <limits.h>
  42 #include <pthread.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <unistd.h>
  46 #ifdef HAVE_SYS_SYSCTL_H
  47 #  include <sys/sysctl.h>
  48 #endif
  49
  50 struct message_queue {
  51         struct list_head list;
  52         pthread_mutex_t lock;
  53         pthread_cond_t msg_avail_cond;
  54         pthread_cond_t space_avail_cond;
  55         bool terminating;
  56 };
  57
  58 struct compressor_thread_data {
  59         pthread_t thread;
  60         struct message_queue *chunks_to_compress_queue;
  61         struct message_queue *compressed_chunks_queue;
  62         struct wimlib_compressor *compressor;
  63 };
  64
  65 #define MAX_CHUNKS_PER_MSG 2
  66
  67 struct message {
  68         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
  69         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
  70         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  71         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  72         size_t num_filled_chunks;
  73         size_t num_alloc_chunks;
  74         struct list_head list;
  75         bool complete;
  76         struct list_head submission_list;
  77 };
  78
  79 struct parallel_chunk_compressor {
  80         struct chunk_compressor base;
  81
  82         struct message_queue chunks_to_compress_queue;
  83         struct message_queue compressed_chunks_queue;
  84         struct compressor_thread_data *thread_data;
  85         unsigned num_thread_data;
  86         unsigned num_started_threads;
  87
  88         struct message *msgs;
  89         size_t num_messages;
  90
  91         struct list_head available_msgs;
  92         struct list_head submitted_msgs;
  93         struct message *next_submit_msg;
  94         struct message *next_ready_msg;
  95         size_t next_chunk_idx;
  96 };
  97
  98 static unsigned
  99 get_default_num_threads(void)
 100 {
 101         long n;
 102 #ifdef __WIN32__
 103         n = win32_get_number_of_processors();
 104 #else
 105         n = sysconf(_SC_NPROCESSORS_ONLN);
 106 #endif
 107         if (n < 1 || n >= UINT_MAX) {
 108                 WARNING("Failed to determine number of processors; assuming 1.");
 109                 return 1;
 110         }
 111         return n;
 112 }
 113
 114 static u64
 115 get_avail_memory(void)
 116 {
 117 #ifdef __WIN32__
 118         u64 phys_bytes = win32_get_avail_memory();
 119         if (phys_bytes == 0)
 120                 goto default_size;
 121         return phys_bytes;
 122 #elif defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
 123         long page_size = sysconf(_SC_PAGESIZE);
 124         long num_pages = sysconf(_SC_PHYS_PAGES);
 125         if (page_size <= 0 || num_pages <= 0)
 126                 goto default_size;
 127         return ((u64)page_size * (u64)num_pages);
 128 #else
 129         int mib[2] = {CTL_HW, HW_MEMSIZE};
 130         u64 memsize;
 131         size_t len = sizeof(memsize);
 132         if (sysctl(mib, ARRAY_LEN(mib), &memsize, &len, NULL, 0) < 0 || len != 8)
 133                 goto default_size;
 134         return memsize;
 135 #endif
 136
 137 default_size:
 138         WARNING("Failed to determine available memory; assuming 1 GiB");
 139         return 1ULL << 30;
 140 }
 141
 142 static int
 143 message_queue_init(struct message_queue *q)
 144 {
 145         if (pthread_mutex_init(&q->lock, NULL)) {
 146                 ERROR_WITH_ERRNO("Failed to initialize mutex");
 147                 goto err;
 148         }
 149         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
 150                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 151                 goto err_destroy_lock;
 152         }
 153         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
 154                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 155                 goto err_destroy_msg_avail_cond;
 156         }
 157         INIT_LIST_HEAD(&q->list);
 158         return 0;
 159
 160 err_destroy_msg_avail_cond:
 161         pthread_cond_destroy(&q->msg_avail_cond);
 162 err_destroy_lock:
 163         pthread_mutex_destroy(&q->lock);
 164 err:
 165         return WIMLIB_ERR_NOMEM;
 166 }
 167
 168 static void
 169 message_queue_destroy(struct message_queue *q)
 170 {
 171         if (q->list.next != NULL) {
 172                 pthread_mutex_destroy(&q->lock);
 173                 pthread_cond_destroy(&q->msg_avail_cond);
 174                 pthread_cond_destroy(&q->space_avail_cond);
 175         }
 176 }
 177
 178 static void
 179 message_queue_put(struct message_queue *q, struct message *msg)
 180 {
 181         pthread_mutex_lock(&q->lock);
 182         list_add_tail(&msg->list, &q->list);
 183         pthread_cond_signal(&q->msg_avail_cond);
 184         pthread_mutex_unlock(&q->lock);
 185 }
 186
 187 static struct message *
 188 message_queue_get(struct message_queue *q)
 189 {
 190         struct message *msg;
 191
 192         pthread_mutex_lock(&q->lock);
 193         while (list_empty(&q->list) && !q->terminating)
 194                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
 195         if (!q->terminating) {
 196                 msg = list_entry(q->list.next, struct message, list);
 197                 list_del(&msg->list);
 198         } else
 199                 msg = NULL;
 200         pthread_mutex_unlock(&q->lock);
 201         return msg;
 202 }
 203
 204 static void
 205 message_queue_terminate(struct message_queue *q)
 206 {
 207         pthread_mutex_lock(&q->lock);
 208         q->terminating = true;
 209         pthread_cond_broadcast(&q->msg_avail_cond);
 210         pthread_mutex_unlock(&q->lock);
 211 }
 212
 213 static int
 214 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
 215 {
 216         msg->num_alloc_chunks = num_chunks;
 217         for (size_t i = 0; i < num_chunks; i++) {
 218                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
 219                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
 220                 if (msg->compressed_chunks[i] == NULL ||
 221                     msg->uncompressed_chunks[i] == NULL)
 222                         return WIMLIB_ERR_NOMEM;
 223         }
 224         return 0;
 225 }
 226
 227 static void
 228 destroy_message(struct message *msg)
 229 {
 230         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
 231                 FREE(msg->compressed_chunks[i]);
 232                 FREE(msg->uncompressed_chunks[i]);
 233         }
 234 }
 235
 236 static void
 237 free_messages(struct message *msgs, size_t num_messages)
 238 {
 239         if (msgs) {
 240                 for (size_t i = 0; i < num_messages; i++)
 241                         destroy_message(&msgs[i]);
 242                 FREE(msgs);
 243         }
 244 }
 245
 246 static struct message *
 247 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
 248 {
 249         struct message *msgs;
 250
 251         msgs = CALLOC(count, sizeof(struct message));
 252         if (msgs == NULL)
 253                 return NULL;
 254         for (size_t i = 0; i < count; i++) {
 255                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
 256                         free_messages(msgs, count);
 257                         return NULL;
 258                 }
 259         }
 260         return msgs;
 261 }
 262
 263 static void
 264 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
 265 {
 266
 267         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
 268                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
 269                 msg->compressed_chunk_sizes[i] =
 270                         wimlib_compress(msg->uncompressed_chunks[i],
 271                                         msg->uncompressed_chunk_sizes[i],
 272                                         msg->compressed_chunks[i],
 273                                         msg->uncompressed_chunk_sizes[i] - 1,
 274                                         compressor);
 275         }
 276 }
 277
 278 static void *
 279 compressor_thread_proc(void *arg)
 280 {
 281         struct compressor_thread_data *params = arg;
 282         struct message *msg;
 283
 284         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
 285                 compress_chunks(msg, params->compressor);
 286                 message_queue_put(params->compressed_chunks_queue, msg);
 287         }
 288         return NULL;
 289 }
 290
 291 static void
 292 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
 293 {
 294         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 295         unsigned i;
 296
 297         if (ctx == NULL)
 298                 return;
 299
 300         if (ctx->num_started_threads != 0) {
 301                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
 302                 message_queue_terminate(&ctx->chunks_to_compress_queue);
 303
 304                 for (i = 0; i < ctx->num_started_threads; i++)
 305                         pthread_join(ctx->thread_data[i].thread, NULL);
 306         }
 307
 308         message_queue_destroy(&ctx->chunks_to_compress_queue);
 309         message_queue_destroy(&ctx->compressed_chunks_queue);
 310
 311         if (ctx->thread_data != NULL)
 312                 for (i = 0; i < ctx->num_thread_data; i++)
 313                         wimlib_free_compressor(ctx->thread_data[i].compressor);
 314
 315         FREE(ctx->thread_data);
 316
 317         free_messages(ctx->msgs, ctx->num_messages);
 318
 319         FREE(ctx);
 320 }
 321
 322 static void
 323 submit_compression_msg(struct parallel_chunk_compressor *ctx)
 324 {
 325         struct message *msg = ctx->next_submit_msg;
 326
 327         msg->complete = false;
 328         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
 329         message_queue_put(&ctx->chunks_to_compress_queue, msg);
 330         ctx->next_submit_msg = NULL;
 331 }
 332
 333 static bool
 334 parallel_chunk_compressor_submit_chunk(struct chunk_compressor *_ctx,
 335                                        const void *chunk, u32 size)
 336 {
 337         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 338         struct message *msg;
 339
 340         wimlib_assert(size > 0);
 341         wimlib_assert(size <= ctx->base.out_chunk_size);
 342
 343         if (ctx->next_submit_msg) {
 344                 msg = ctx->next_submit_msg;
 345         } else {
 346                 if (list_empty(&ctx->available_msgs))
 347                         return false;
 348
 349                 msg = list_entry(ctx->available_msgs.next, struct message, list);
 350                 list_del(&msg->list);
 351                 ctx->next_submit_msg = msg;
 352                 msg->num_filled_chunks = 0;
 353         }
 354
 355         memcpy(msg->uncompressed_chunks[msg->num_filled_chunks], chunk, size);
 356         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = size;
 357         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
 358                 submit_compression_msg(ctx);
 359         return true;
 360 }
 361
 362 static bool
 363 parallel_chunk_compressor_get_chunk(struct chunk_compressor *_ctx,
 364                                     const void **cdata_ret, u32 *csize_ret,
 365                                     u32 *usize_ret)
 366 {
 367         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 368         struct message *msg;
 369
 370
 371         if (ctx->next_submit_msg)
 372                 submit_compression_msg(ctx);
 373
 374         if (ctx->next_ready_msg) {
 375                 msg = ctx->next_ready_msg;
 376         } else {
 377                 if (list_empty(&ctx->submitted_msgs))
 378                         return false;
 379
 380                 while (!(msg = list_entry(ctx->submitted_msgs.next,
 381                                           struct message,
 382                                           submission_list))->complete)
 383                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
 384
 385                 ctx->next_ready_msg = msg;
 386                 ctx->next_chunk_idx = 0;
 387         }
 388
 389         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
 390                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
 391                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
 392         } else {
 393                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
 394                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 395         }
 396         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 397
 398         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
 399                 list_del(&msg->submission_list);
 400                 list_add_tail(&msg->list, &ctx->available_msgs);
 401                 ctx->next_ready_msg = NULL;
 402         }
 403         return true;
 404 }
 405
 406 int
 407 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
 408                               unsigned num_threads, u64 max_memory,
 409                               struct chunk_compressor **compressor_ret)
 410 {
 411         u64 approx_mem_required;
 412         size_t chunks_per_msg;
 413         size_t msgs_per_thread;
 414         struct parallel_chunk_compressor *ctx;
 415         unsigned i;
 416         int ret;
 417         unsigned desired_num_threads;
 418
 419         wimlib_assert(out_chunk_size > 0);
 420
 421         if (num_threads == 0)
 422                 num_threads = get_default_num_threads();
 423
 424         if (num_threads == 1) {
 425                 DEBUG("Only 1 thread; Not bothering with "
 426                       "parallel chunk compressor.");
 427                 return -1;
 428         }
 429
 430         if (max_memory == 0)
 431                 max_memory = get_avail_memory();
 432
 433         desired_num_threads = num_threads;
 434
 435         if (out_chunk_size < ((u32)1 << 23)) {
 436                 chunks_per_msg = MAX_CHUNKS_PER_MSG;
 437                 msgs_per_thread = 2;
 438         } else {
 439                 /* Big chunks: Just have one buffer per thread --- more would
 440                  * just waste memory.  */
 441                 chunks_per_msg = 1;
 442                 msgs_per_thread = 1;
 443         }
 444         for (;;) {
 445                 approx_mem_required =
 446                         (u64)chunks_per_msg *
 447                         (u64)msgs_per_thread *
 448                         (u64)num_threads *
 449                         (u64)out_chunk_size
 450                         + out_chunk_size
 451                         + 1000000
 452                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
 453                                                                             out_chunk_size,
 454                                                                             NULL);
 455                 if (approx_mem_required <= max_memory)
 456                         break;
 457
 458                 if (chunks_per_msg > 1)
 459                         chunks_per_msg--;
 460                 else if (msgs_per_thread > 1)
 461                         msgs_per_thread--;
 462                 else if (num_threads > 1)
 463                         num_threads--;
 464                 else
 465                         break;
 466         }
 467
 468         if (num_threads < desired_num_threads) {
 469                 WARNING("Wanted to use %u threads, but limiting to %u "
 470                         "to fit in available memory!",
 471                         desired_num_threads, num_threads);
 472         }
 473
 474         if (num_threads == 1) {
 475                 DEBUG("Only 1 thread; Not bothering with "
 476                       "parallel chunk compressor.");
 477                 return -2;
 478         }
 479
 480         ret = WIMLIB_ERR_NOMEM;
 481         ctx = CALLOC(1, sizeof(*ctx));
 482         if (ctx == NULL)
 483                 goto err;
 484
 485         ctx->base.out_ctype = out_ctype;
 486         ctx->base.out_chunk_size = out_chunk_size;
 487         ctx->base.destroy = parallel_chunk_compressor_destroy;
 488         ctx->base.submit_chunk = parallel_chunk_compressor_submit_chunk;
 489         ctx->base.get_chunk = parallel_chunk_compressor_get_chunk;
 490
 491         ctx->num_thread_data = num_threads;
 492
 493         ret = message_queue_init(&ctx->chunks_to_compress_queue);
 494         if (ret)
 495                 goto err;
 496
 497         ret = message_queue_init(&ctx->compressed_chunks_queue);
 498         if (ret)
 499                 goto err;
 500
 501         ret = WIMLIB_ERR_NOMEM;
 502         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
 503         if (ctx->thread_data == NULL)
 504                 goto err;
 505
 506         for (i = 0; i < num_threads; i++) {
 507                 struct compressor_thread_data *dat;
 508
 509                 dat = &ctx->thread_data[i];
 510
 511                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
 512                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
 513                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
 514                                                NULL, &dat->compressor);
 515                 if (ret)
 516                         goto err;
 517         }
 518
 519         for (ctx->num_started_threads = 0;
 520              ctx->num_started_threads < num_threads;
 521              ctx->num_started_threads++)
 522         {
 523                 DEBUG("pthread_create thread %u of %u",
 524                       ctx->num_started_threads + 1, num_threads);
 525                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
 526                                      NULL,
 527                                      compressor_thread_proc,
 528                                      &ctx->thread_data[ctx->num_started_threads]);
 529                 if (ret) {
 530                         errno = ret;
 531                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
 532                                            ctx->num_started_threads + 1,
 533                                            num_threads);
 534                         ret = WIMLIB_ERR_NOMEM;
 535                         if (ctx->num_started_threads >= 2)
 536                                 break;
 537                         goto err;
 538                 }
 539         }
 540
 541         ctx->base.num_threads = ctx->num_started_threads;
 542
 543         ret = WIMLIB_ERR_NOMEM;
 544         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
 545         ctx->msgs = allocate_messages(ctx->num_messages,
 546                                       chunks_per_msg, out_chunk_size);
 547         if (ctx->msgs == NULL)
 548                 goto err;
 549
 550         INIT_LIST_HEAD(&ctx->available_msgs);
 551         for (size_t i = 0; i < ctx->num_messages; i++)
 552                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
 553
 554         INIT_LIST_HEAD(&ctx->submitted_msgs);
 555
 556         *compressor_ret = &ctx->base;
 557         return 0;
 558
 559 err:
 560         parallel_chunk_compressor_destroy(&ctx->base);
 561         return ret;
 562 }
 563
 564 #endif /* ENABLE_MULTITHREADED_COMPRESSION */