wimlib.net Git - wimlib/blob - src/compress_parallel.c

   1 /*
   2  * compress_parallel.c
   3  *
   4  * Compress chunks of data (parallel version).
   5  */
   6
   7 /*
   8  * Copyright (C) 2013 Eric Biggers
   9  *
  10  * This file is free software; you can redistribute it and/or modify it under
  11  * the terms of the GNU Lesser General Public License as published by the Free
  12  * Software Foundation; either version 3 of the License, or (at your option) any
  13  * later version.
  14  *
  15  * This file is distributed in the hope that it will be useful, but WITHOUT
  16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  18  * details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public License
  21  * along with this file; if not, see http://www.gnu.org/licenses/.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #  include "config.h"
  26 #endif
  27
  28 #ifdef ENABLE_MULTITHREADED_COMPRESSION
  29
  30 #include <errno.h>
  31 #include <limits.h>
  32 #include <pthread.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <unistd.h>
  36 #ifdef HAVE_SYS_SYSCTL_H
  37 #  include <sys/sysctl.h>
  38 #endif
  39
  40 #include "wimlib/assert.h"
  41 #include "wimlib/chunk_compressor.h"
  42 #include "wimlib/error.h"
  43 #include "wimlib/list.h"
  44 #include "wimlib/util.h"
  45 #ifdef __WIN32__
  46 #  include "wimlib/win32.h" /* win32_get_number_of_processors() */
  47 #endif
  48
  49 struct message_queue {
  50         struct list_head list;
  51         pthread_mutex_t lock;
  52         pthread_cond_t msg_avail_cond;
  53         pthread_cond_t space_avail_cond;
  54         bool terminating;
  55 };
  56
  57 struct compressor_thread_data {
  58         pthread_t thread;
  59         struct message_queue *chunks_to_compress_queue;
  60         struct message_queue *compressed_chunks_queue;
  61         struct wimlib_compressor *compressor;
  62 };
  63
  64 #define MAX_CHUNKS_PER_MSG 16
  65
  66 struct message {
  67         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
  68         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
  69         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  70         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  71         size_t num_filled_chunks;
  72         size_t num_alloc_chunks;
  73         struct list_head list;
  74         bool complete;
  75         struct list_head submission_list;
  76 };
  77
  78 struct parallel_chunk_compressor {
  79         struct chunk_compressor base;
  80
  81         struct message_queue chunks_to_compress_queue;
  82         struct message_queue compressed_chunks_queue;
  83         struct compressor_thread_data *thread_data;
  84         unsigned num_thread_data;
  85         unsigned num_started_threads;
  86
  87         struct message *msgs;
  88         size_t num_messages;
  89
  90         struct list_head available_msgs;
  91         struct list_head submitted_msgs;
  92         struct message *next_submit_msg;
  93         struct message *next_ready_msg;
  94         size_t next_chunk_idx;
  95 };
  96
  97 static unsigned
  98 get_default_num_threads(void)
  99 {
 100         long n;
 101 #ifdef __WIN32__
 102         n = win32_get_number_of_processors();
 103 #else
 104         n = sysconf(_SC_NPROCESSORS_ONLN);
 105 #endif
 106         if (n < 1 || n >= UINT_MAX) {
 107                 WARNING("Failed to determine number of processors; assuming 1.");
 108                 return 1;
 109         }
 110         return n;
 111 }
 112
 113 static u64
 114 get_avail_memory(void)
 115 {
 116 #ifdef __WIN32__
 117         u64 phys_bytes = win32_get_avail_memory();
 118         if (phys_bytes == 0)
 119                 goto default_size;
 120         return phys_bytes;
 121 #elif defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
 122         long page_size = sysconf(_SC_PAGESIZE);
 123         long num_pages = sysconf(_SC_PHYS_PAGES);
 124         if (page_size <= 0 || num_pages <= 0)
 125                 goto default_size;
 126         return ((u64)page_size * (u64)num_pages);
 127 #else
 128         int mib[2] = {CTL_HW, HW_MEMSIZE};
 129         u64 memsize;
 130         size_t len = sizeof(memsize);
 131         if (sysctl(mib, ARRAY_LEN(mib), &memsize, &len, NULL, 0) < 0 || len != 8)
 132                 goto default_size;
 133         return memsize;
 134 #endif
 135
 136 default_size:
 137         WARNING("Failed to determine available memory; assuming 1 GiB");
 138         return 1ULL << 30;
 139 }
 140
 141 static int
 142 message_queue_init(struct message_queue *q)
 143 {
 144         if (pthread_mutex_init(&q->lock, NULL)) {
 145                 ERROR_WITH_ERRNO("Failed to initialize mutex");
 146                 goto err;
 147         }
 148         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
 149                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 150                 goto err_destroy_lock;
 151         }
 152         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
 153                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 154                 goto err_destroy_msg_avail_cond;
 155         }
 156         INIT_LIST_HEAD(&q->list);
 157         return 0;
 158
 159 err_destroy_msg_avail_cond:
 160         pthread_cond_destroy(&q->msg_avail_cond);
 161 err_destroy_lock:
 162         pthread_mutex_destroy(&q->lock);
 163 err:
 164         return WIMLIB_ERR_NOMEM;
 165 }
 166
 167 static void
 168 message_queue_destroy(struct message_queue *q)
 169 {
 170         if (q->list.next != NULL) {
 171                 pthread_mutex_destroy(&q->lock);
 172                 pthread_cond_destroy(&q->msg_avail_cond);
 173                 pthread_cond_destroy(&q->space_avail_cond);
 174         }
 175 }
 176
 177 static void
 178 message_queue_put(struct message_queue *q, struct message *msg)
 179 {
 180         pthread_mutex_lock(&q->lock);
 181         list_add_tail(&msg->list, &q->list);
 182         pthread_cond_signal(&q->msg_avail_cond);
 183         pthread_mutex_unlock(&q->lock);
 184 }
 185
 186 static struct message *
 187 message_queue_get(struct message_queue *q)
 188 {
 189         struct message *msg;
 190
 191         pthread_mutex_lock(&q->lock);
 192         while (list_empty(&q->list) && !q->terminating)
 193                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
 194         if (!q->terminating) {
 195                 msg = list_entry(q->list.next, struct message, list);
 196                 list_del(&msg->list);
 197         } else
 198                 msg = NULL;
 199         pthread_mutex_unlock(&q->lock);
 200         return msg;
 201 }
 202
 203 static void
 204 message_queue_terminate(struct message_queue *q)
 205 {
 206         pthread_mutex_lock(&q->lock);
 207         q->terminating = true;
 208         pthread_cond_broadcast(&q->msg_avail_cond);
 209         pthread_mutex_unlock(&q->lock);
 210 }
 211
 212 static int
 213 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
 214 {
 215         msg->num_alloc_chunks = num_chunks;
 216         for (size_t i = 0; i < num_chunks; i++) {
 217                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
 218                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
 219                 if (msg->compressed_chunks[i] == NULL ||
 220                     msg->uncompressed_chunks[i] == NULL)
 221                         return WIMLIB_ERR_NOMEM;
 222         }
 223         return 0;
 224 }
 225
 226 static void
 227 destroy_message(struct message *msg)
 228 {
 229         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
 230                 FREE(msg->compressed_chunks[i]);
 231                 FREE(msg->uncompressed_chunks[i]);
 232         }
 233 }
 234
 235 static void
 236 free_messages(struct message *msgs, size_t num_messages)
 237 {
 238         if (msgs) {
 239                 for (size_t i = 0; i < num_messages; i++)
 240                         destroy_message(&msgs[i]);
 241                 FREE(msgs);
 242         }
 243 }
 244
 245 static struct message *
 246 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
 247 {
 248         struct message *msgs;
 249
 250         msgs = CALLOC(count, sizeof(struct message));
 251         if (msgs == NULL)
 252                 return NULL;
 253         for (size_t i = 0; i < count; i++) {
 254                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
 255                         free_messages(msgs, count);
 256                         return NULL;
 257                 }
 258         }
 259         return msgs;
 260 }
 261
 262 static void
 263 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
 264 {
 265
 266         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
 267                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
 268                 msg->compressed_chunk_sizes[i] =
 269                         wimlib_compress(msg->uncompressed_chunks[i],
 270                                         msg->uncompressed_chunk_sizes[i],
 271                                         msg->compressed_chunks[i],
 272                                         msg->uncompressed_chunk_sizes[i] - 1,
 273                                         compressor);
 274         }
 275 }
 276
 277 static void *
 278 compressor_thread_proc(void *arg)
 279 {
 280         struct compressor_thread_data *params = arg;
 281         struct message *msg;
 282
 283         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
 284                 compress_chunks(msg, params->compressor);
 285                 message_queue_put(params->compressed_chunks_queue, msg);
 286         }
 287         return NULL;
 288 }
 289
 290 static void
 291 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
 292 {
 293         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 294         unsigned i;
 295
 296         if (ctx == NULL)
 297                 return;
 298
 299         if (ctx->num_started_threads != 0) {
 300                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
 301                 message_queue_terminate(&ctx->chunks_to_compress_queue);
 302
 303                 for (i = 0; i < ctx->num_started_threads; i++)
 304                         pthread_join(ctx->thread_data[i].thread, NULL);
 305         }
 306
 307         message_queue_destroy(&ctx->chunks_to_compress_queue);
 308         message_queue_destroy(&ctx->compressed_chunks_queue);
 309
 310         if (ctx->thread_data != NULL)
 311                 for (i = 0; i < ctx->num_thread_data; i++)
 312                         wimlib_free_compressor(ctx->thread_data[i].compressor);
 313
 314         FREE(ctx->thread_data);
 315
 316         free_messages(ctx->msgs, ctx->num_messages);
 317
 318         FREE(ctx);
 319 }
 320
 321 static void
 322 submit_compression_msg(struct parallel_chunk_compressor *ctx)
 323 {
 324         struct message *msg = ctx->next_submit_msg;
 325
 326         msg->complete = false;
 327         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
 328         message_queue_put(&ctx->chunks_to_compress_queue, msg);
 329         ctx->next_submit_msg = NULL;
 330 }
 331
 332 static void *
 333 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
 334 {
 335         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 336         struct message *msg;
 337
 338         if (ctx->next_submit_msg) {
 339                 msg = ctx->next_submit_msg;
 340         } else {
 341                 if (list_empty(&ctx->available_msgs))
 342                         return NULL;
 343
 344                 msg = list_entry(ctx->available_msgs.next, struct message, list);
 345                 list_del(&msg->list);
 346                 ctx->next_submit_msg = msg;
 347                 msg->num_filled_chunks = 0;
 348         }
 349
 350         return msg->uncompressed_chunks[msg->num_filled_chunks];
 351 }
 352
 353 static void
 354 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
 355 {
 356         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 357         struct message *msg;
 358
 359         wimlib_assert(usize > 0);
 360         wimlib_assert(usize <= ctx->base.out_chunk_size);
 361         wimlib_assert(ctx->next_submit_msg);
 362
 363         msg = ctx->next_submit_msg;
 364         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
 365         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
 366                 submit_compression_msg(ctx);
 367 }
 368
 369 static bool
 370 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
 371                                                  const void **cdata_ret, u32 *csize_ret,
 372                                                  u32 *usize_ret)
 373 {
 374         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 375         struct message *msg;
 376
 377         if (ctx->next_submit_msg)
 378                 submit_compression_msg(ctx);
 379
 380         if (ctx->next_ready_msg) {
 381                 msg = ctx->next_ready_msg;
 382         } else {
 383                 if (list_empty(&ctx->submitted_msgs))
 384                         return false;
 385
 386                 while (!(msg = list_entry(ctx->submitted_msgs.next,
 387                                           struct message,
 388                                           submission_list))->complete)
 389                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
 390
 391                 ctx->next_ready_msg = msg;
 392                 ctx->next_chunk_idx = 0;
 393         }
 394
 395         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
 396                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
 397                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
 398         } else {
 399                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
 400                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 401         }
 402         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 403
 404         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
 405                 list_del(&msg->submission_list);
 406                 list_add_tail(&msg->list, &ctx->available_msgs);
 407                 ctx->next_ready_msg = NULL;
 408         }
 409         return true;
 410 }
 411
 412 int
 413 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
 414                               unsigned num_threads, u64 max_memory,
 415                               struct chunk_compressor **compressor_ret)
 416 {
 417         u64 approx_mem_required;
 418         size_t chunks_per_msg;
 419         size_t msgs_per_thread;
 420         struct parallel_chunk_compressor *ctx;
 421         unsigned i;
 422         int ret;
 423         unsigned desired_num_threads;
 424
 425         wimlib_assert(out_chunk_size > 0);
 426
 427         if (num_threads == 0)
 428                 num_threads = get_default_num_threads();
 429
 430         if (num_threads == 1) {
 431                 DEBUG("Only 1 thread; Not bothering with "
 432                       "parallel chunk compressor.");
 433                 return -1;
 434         }
 435
 436         if (max_memory == 0)
 437                 max_memory = get_avail_memory();
 438
 439         desired_num_threads = num_threads;
 440
 441         if (out_chunk_size < ((u32)1 << 23)) {
 442                 /* Relatively small chunks.  Use 2 messages per thread, each
 443                  * with at least 2 chunks.  Use more chunks per message if there
 444                  * are lots of threads and/or the chunks are very small.  */
 445                 chunks_per_msg = 2;
 446                 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
 447                 chunks_per_msg = max(chunks_per_msg, 2);
 448                 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
 449                 msgs_per_thread = 2;
 450         } else {
 451                 /* Big chunks: Just have one buffer per thread --- more would
 452                  * just waste memory.  */
 453                 chunks_per_msg = 1;
 454                 msgs_per_thread = 1;
 455         }
 456         for (;;) {
 457                 approx_mem_required =
 458                         (u64)chunks_per_msg *
 459                         (u64)msgs_per_thread *
 460                         (u64)num_threads *
 461                         (u64)out_chunk_size
 462                         + out_chunk_size
 463                         + 1000000
 464                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
 465                                                                             out_chunk_size,
 466                                                                             0);
 467                 if (approx_mem_required <= max_memory)
 468                         break;
 469
 470                 if (chunks_per_msg > 1)
 471                         chunks_per_msg--;
 472                 else if (msgs_per_thread > 1)
 473                         msgs_per_thread--;
 474                 else if (num_threads > 1)
 475                         num_threads--;
 476                 else
 477                         break;
 478         }
 479
 480         if (num_threads < desired_num_threads) {
 481                 WARNING("Wanted to use %u threads, but limiting to %u "
 482                         "to fit in available memory!",
 483                         desired_num_threads, num_threads);
 484         }
 485
 486         if (num_threads == 1) {
 487                 DEBUG("Only 1 thread; Not bothering with "
 488                       "parallel chunk compressor.");
 489                 return -2;
 490         }
 491
 492         ret = WIMLIB_ERR_NOMEM;
 493         ctx = CALLOC(1, sizeof(*ctx));
 494         if (ctx == NULL)
 495                 goto err;
 496
 497         ctx->base.out_ctype = out_ctype;
 498         ctx->base.out_chunk_size = out_chunk_size;
 499         ctx->base.destroy = parallel_chunk_compressor_destroy;
 500         ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
 501         ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
 502         ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
 503
 504         ctx->num_thread_data = num_threads;
 505
 506         ret = message_queue_init(&ctx->chunks_to_compress_queue);
 507         if (ret)
 508                 goto err;
 509
 510         ret = message_queue_init(&ctx->compressed_chunks_queue);
 511         if (ret)
 512                 goto err;
 513
 514         ret = WIMLIB_ERR_NOMEM;
 515         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
 516         if (ctx->thread_data == NULL)
 517                 goto err;
 518
 519         for (i = 0; i < num_threads; i++) {
 520                 struct compressor_thread_data *dat;
 521
 522                 dat = &ctx->thread_data[i];
 523
 524                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
 525                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
 526                 ret = wimlib_create_compressor(out_ctype, out_chunk_size, 0,
 527                                                &dat->compressor);
 528                 if (ret)
 529                         goto err;
 530         }
 531
 532         for (ctx->num_started_threads = 0;
 533              ctx->num_started_threads < num_threads;
 534              ctx->num_started_threads++)
 535         {
 536                 DEBUG("pthread_create thread %u of %u",
 537                       ctx->num_started_threads + 1, num_threads);
 538                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
 539                                      NULL,
 540                                      compressor_thread_proc,
 541                                      &ctx->thread_data[ctx->num_started_threads]);
 542                 if (ret) {
 543                         errno = ret;
 544                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
 545                                            ctx->num_started_threads + 1,
 546                                            num_threads);
 547                         ret = WIMLIB_ERR_NOMEM;
 548                         if (ctx->num_started_threads >= 2)
 549                                 break;
 550                         goto err;
 551                 }
 552         }
 553
 554         ctx->base.num_threads = ctx->num_started_threads;
 555
 556         ret = WIMLIB_ERR_NOMEM;
 557         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
 558         ctx->msgs = allocate_messages(ctx->num_messages,
 559                                       chunks_per_msg, out_chunk_size);
 560         if (ctx->msgs == NULL)
 561                 goto err;
 562
 563         INIT_LIST_HEAD(&ctx->available_msgs);
 564         for (size_t i = 0; i < ctx->num_messages; i++)
 565                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
 566
 567         INIT_LIST_HEAD(&ctx->submitted_msgs);
 568
 569         *compressor_ret = &ctx->base;
 570         return 0;
 571
 572 err:
 573         parallel_chunk_compressor_destroy(&ctx->base);
 574         return ret;
 575 }
 576
 577 #endif /* ENABLE_MULTITHREADED_COMPRESSION */