wimlib.net Git - wimlib/blob - src/compress_parallel.c

   1 /*
   2  * compress_parallel.c
   3  *
   4  * Compress chunks of data (parallel version).
   5  */
   6
   7 /*
   8  * Copyright (C) 2013 Eric Biggers
   9  *
  10  * This file is free software; you can redistribute it and/or modify it under
  11  * the terms of the GNU Lesser General Public License as published by the Free
  12  * Software Foundation; either version 3 of the License, or (at your option) any
  13  * later version.
  14  *
  15  * This file is distributed in the hope that it will be useful, but WITHOUT
  16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  18  * details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public License
  21  * along with this file; if not, see http://www.gnu.org/licenses/.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #  include "config.h"
  26 #endif
  27
  28 #ifdef ENABLE_MULTITHREADED_COMPRESSION
  29
  30 #include <errno.h>
  31 #include <limits.h>
  32 #include <pthread.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35 #include <unistd.h>
  36 #ifdef HAVE_SYS_SYSCTL_H
  37 #  include <sys/sysctl.h>
  38 #endif
  39
  40 #include "wimlib/assert.h"
  41 #include "wimlib/chunk_compressor.h"
  42 #include "wimlib/error.h"
  43 #include "wimlib/list.h"
  44 #include "wimlib/util.h"
  45 #include "wimlib/win32.h" /* win32_get_number_of_processors() */
  46
  47 struct message_queue {
  48         struct list_head list;
  49         pthread_mutex_t lock;
  50         pthread_cond_t msg_avail_cond;
  51         pthread_cond_t space_avail_cond;
  52         bool terminating;
  53 };
  54
  55 struct compressor_thread_data {
  56         pthread_t thread;
  57         struct message_queue *chunks_to_compress_queue;
  58         struct message_queue *compressed_chunks_queue;
  59         struct wimlib_compressor *compressor;
  60 };
  61
  62 #define MAX_CHUNKS_PER_MSG 16
  63
  64 struct message {
  65         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
  66         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
  67         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  68         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  69         size_t num_filled_chunks;
  70         size_t num_alloc_chunks;
  71         struct list_head list;
  72         bool complete;
  73         struct list_head submission_list;
  74 };
  75
  76 struct parallel_chunk_compressor {
  77         struct chunk_compressor base;
  78
  79         struct message_queue chunks_to_compress_queue;
  80         struct message_queue compressed_chunks_queue;
  81         struct compressor_thread_data *thread_data;
  82         unsigned num_thread_data;
  83         unsigned num_started_threads;
  84
  85         struct message *msgs;
  86         size_t num_messages;
  87
  88         struct list_head available_msgs;
  89         struct list_head submitted_msgs;
  90         struct message *next_submit_msg;
  91         struct message *next_ready_msg;
  92         size_t next_chunk_idx;
  93 };
  94
  95 static unsigned
  96 get_default_num_threads(void)
  97 {
  98         long n;
  99 #ifdef __WIN32__
 100         n = win32_get_number_of_processors();
 101 #else
 102         n = sysconf(_SC_NPROCESSORS_ONLN);
 103 #endif
 104         if (n < 1 || n >= UINT_MAX) {
 105                 WARNING("Failed to determine number of processors; assuming 1.");
 106                 return 1;
 107         }
 108         return n;
 109 }
 110
 111 static u64
 112 get_avail_memory(void)
 113 {
 114 #ifdef __WIN32__
 115         u64 phys_bytes = win32_get_avail_memory();
 116         if (phys_bytes == 0)
 117                 goto default_size;
 118         return phys_bytes;
 119 #elif defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
 120         long page_size = sysconf(_SC_PAGESIZE);
 121         long num_pages = sysconf(_SC_PHYS_PAGES);
 122         if (page_size <= 0 || num_pages <= 0)
 123                 goto default_size;
 124         return ((u64)page_size * (u64)num_pages);
 125 #else
 126         int mib[2] = {CTL_HW, HW_MEMSIZE};
 127         u64 memsize;
 128         size_t len = sizeof(memsize);
 129         if (sysctl(mib, ARRAY_LEN(mib), &memsize, &len, NULL, 0) < 0 || len != 8)
 130                 goto default_size;
 131         return memsize;
 132 #endif
 133
 134 default_size:
 135         WARNING("Failed to determine available memory; assuming 1 GiB");
 136         return 1ULL << 30;
 137 }
 138
 139 static int
 140 message_queue_init(struct message_queue *q)
 141 {
 142         if (pthread_mutex_init(&q->lock, NULL)) {
 143                 ERROR_WITH_ERRNO("Failed to initialize mutex");
 144                 goto err;
 145         }
 146         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
 147                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 148                 goto err_destroy_lock;
 149         }
 150         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
 151                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 152                 goto err_destroy_msg_avail_cond;
 153         }
 154         INIT_LIST_HEAD(&q->list);
 155         return 0;
 156
 157 err_destroy_msg_avail_cond:
 158         pthread_cond_destroy(&q->msg_avail_cond);
 159 err_destroy_lock:
 160         pthread_mutex_destroy(&q->lock);
 161 err:
 162         return WIMLIB_ERR_NOMEM;
 163 }
 164
 165 static void
 166 message_queue_destroy(struct message_queue *q)
 167 {
 168         if (q->list.next != NULL) {
 169                 pthread_mutex_destroy(&q->lock);
 170                 pthread_cond_destroy(&q->msg_avail_cond);
 171                 pthread_cond_destroy(&q->space_avail_cond);
 172         }
 173 }
 174
 175 static void
 176 message_queue_put(struct message_queue *q, struct message *msg)
 177 {
 178         pthread_mutex_lock(&q->lock);
 179         list_add_tail(&msg->list, &q->list);
 180         pthread_cond_signal(&q->msg_avail_cond);
 181         pthread_mutex_unlock(&q->lock);
 182 }
 183
 184 static struct message *
 185 message_queue_get(struct message_queue *q)
 186 {
 187         struct message *msg;
 188
 189         pthread_mutex_lock(&q->lock);
 190         while (list_empty(&q->list) && !q->terminating)
 191                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
 192         if (!q->terminating) {
 193                 msg = list_entry(q->list.next, struct message, list);
 194                 list_del(&msg->list);
 195         } else
 196                 msg = NULL;
 197         pthread_mutex_unlock(&q->lock);
 198         return msg;
 199 }
 200
 201 static void
 202 message_queue_terminate(struct message_queue *q)
 203 {
 204         pthread_mutex_lock(&q->lock);
 205         q->terminating = true;
 206         pthread_cond_broadcast(&q->msg_avail_cond);
 207         pthread_mutex_unlock(&q->lock);
 208 }
 209
 210 static int
 211 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
 212 {
 213         msg->num_alloc_chunks = num_chunks;
 214         for (size_t i = 0; i < num_chunks; i++) {
 215                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
 216                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
 217                 if (msg->compressed_chunks[i] == NULL ||
 218                     msg->uncompressed_chunks[i] == NULL)
 219                         return WIMLIB_ERR_NOMEM;
 220         }
 221         return 0;
 222 }
 223
 224 static void
 225 destroy_message(struct message *msg)
 226 {
 227         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
 228                 FREE(msg->compressed_chunks[i]);
 229                 FREE(msg->uncompressed_chunks[i]);
 230         }
 231 }
 232
 233 static void
 234 free_messages(struct message *msgs, size_t num_messages)
 235 {
 236         if (msgs) {
 237                 for (size_t i = 0; i < num_messages; i++)
 238                         destroy_message(&msgs[i]);
 239                 FREE(msgs);
 240         }
 241 }
 242
 243 static struct message *
 244 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
 245 {
 246         struct message *msgs;
 247
 248         msgs = CALLOC(count, sizeof(struct message));
 249         if (msgs == NULL)
 250                 return NULL;
 251         for (size_t i = 0; i < count; i++) {
 252                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
 253                         free_messages(msgs, count);
 254                         return NULL;
 255                 }
 256         }
 257         return msgs;
 258 }
 259
 260 static void
 261 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
 262 {
 263
 264         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
 265                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
 266                 msg->compressed_chunk_sizes[i] =
 267                         wimlib_compress(msg->uncompressed_chunks[i],
 268                                         msg->uncompressed_chunk_sizes[i],
 269                                         msg->compressed_chunks[i],
 270                                         msg->uncompressed_chunk_sizes[i] - 1,
 271                                         compressor);
 272         }
 273 }
 274
 275 static void *
 276 compressor_thread_proc(void *arg)
 277 {
 278         struct compressor_thread_data *params = arg;
 279         struct message *msg;
 280
 281         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
 282                 compress_chunks(msg, params->compressor);
 283                 message_queue_put(params->compressed_chunks_queue, msg);
 284         }
 285         return NULL;
 286 }
 287
 288 static void
 289 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
 290 {
 291         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 292         unsigned i;
 293
 294         if (ctx == NULL)
 295                 return;
 296
 297         if (ctx->num_started_threads != 0) {
 298                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
 299                 message_queue_terminate(&ctx->chunks_to_compress_queue);
 300
 301                 for (i = 0; i < ctx->num_started_threads; i++)
 302                         pthread_join(ctx->thread_data[i].thread, NULL);
 303         }
 304
 305         message_queue_destroy(&ctx->chunks_to_compress_queue);
 306         message_queue_destroy(&ctx->compressed_chunks_queue);
 307
 308         if (ctx->thread_data != NULL)
 309                 for (i = 0; i < ctx->num_thread_data; i++)
 310                         wimlib_free_compressor(ctx->thread_data[i].compressor);
 311
 312         FREE(ctx->thread_data);
 313
 314         free_messages(ctx->msgs, ctx->num_messages);
 315
 316         FREE(ctx);
 317 }
 318
 319 static void
 320 submit_compression_msg(struct parallel_chunk_compressor *ctx)
 321 {
 322         struct message *msg = ctx->next_submit_msg;
 323
 324         msg->complete = false;
 325         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
 326         message_queue_put(&ctx->chunks_to_compress_queue, msg);
 327         ctx->next_submit_msg = NULL;
 328 }
 329
 330 static void *
 331 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
 332 {
 333         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 334         struct message *msg;
 335
 336         if (ctx->next_submit_msg) {
 337                 msg = ctx->next_submit_msg;
 338         } else {
 339                 if (list_empty(&ctx->available_msgs))
 340                         return NULL;
 341
 342                 msg = list_entry(ctx->available_msgs.next, struct message, list);
 343                 list_del(&msg->list);
 344                 ctx->next_submit_msg = msg;
 345                 msg->num_filled_chunks = 0;
 346         }
 347
 348         return msg->uncompressed_chunks[msg->num_filled_chunks];
 349 }
 350
 351 static void
 352 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
 353 {
 354         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 355         struct message *msg;
 356
 357         wimlib_assert(usize > 0);
 358         wimlib_assert(usize <= ctx->base.out_chunk_size);
 359         wimlib_assert(ctx->next_submit_msg);
 360
 361         msg = ctx->next_submit_msg;
 362         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
 363         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
 364                 submit_compression_msg(ctx);
 365 }
 366
 367 static bool
 368 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
 369                                                  const void **cdata_ret, u32 *csize_ret,
 370                                                  u32 *usize_ret)
 371 {
 372         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 373         struct message *msg;
 374
 375         if (ctx->next_submit_msg)
 376                 submit_compression_msg(ctx);
 377
 378         if (ctx->next_ready_msg) {
 379                 msg = ctx->next_ready_msg;
 380         } else {
 381                 if (list_empty(&ctx->submitted_msgs))
 382                         return false;
 383
 384                 while (!(msg = list_entry(ctx->submitted_msgs.next,
 385                                           struct message,
 386                                           submission_list))->complete)
 387                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
 388
 389                 ctx->next_ready_msg = msg;
 390                 ctx->next_chunk_idx = 0;
 391         }
 392
 393         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
 394                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
 395                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
 396         } else {
 397                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
 398                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 399         }
 400         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 401
 402         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
 403                 list_del(&msg->submission_list);
 404                 list_add_tail(&msg->list, &ctx->available_msgs);
 405                 ctx->next_ready_msg = NULL;
 406         }
 407         return true;
 408 }
 409
 410 int
 411 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
 412                               unsigned num_threads, u64 max_memory,
 413                               struct chunk_compressor **compressor_ret)
 414 {
 415         u64 approx_mem_required;
 416         size_t chunks_per_msg;
 417         size_t msgs_per_thread;
 418         struct parallel_chunk_compressor *ctx;
 419         unsigned i;
 420         int ret;
 421         unsigned desired_num_threads;
 422
 423         wimlib_assert(out_chunk_size > 0);
 424
 425         if (num_threads == 0)
 426                 num_threads = get_default_num_threads();
 427
 428         if (num_threads == 1) {
 429                 DEBUG("Only 1 thread; Not bothering with "
 430                       "parallel chunk compressor.");
 431                 return -1;
 432         }
 433
 434         if (max_memory == 0)
 435                 max_memory = get_avail_memory();
 436
 437         desired_num_threads = num_threads;
 438
 439         if (out_chunk_size < ((u32)1 << 23)) {
 440                 /* Relatively small chunks.  Use 2 messages per thread, each
 441                  * with at least 2 chunks.  Use more chunks per message if there
 442                  * are lots of threads and/or the chunks are very small.  */
 443                 chunks_per_msg = 2;
 444                 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
 445                 chunks_per_msg = max(chunks_per_msg, 2);
 446                 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
 447                 msgs_per_thread = 2;
 448         } else {
 449                 /* Big chunks: Just have one buffer per thread --- more would
 450                  * just waste memory.  */
 451                 chunks_per_msg = 1;
 452                 msgs_per_thread = 1;
 453         }
 454         for (;;) {
 455                 approx_mem_required =
 456                         (u64)chunks_per_msg *
 457                         (u64)msgs_per_thread *
 458                         (u64)num_threads *
 459                         (u64)out_chunk_size
 460                         + out_chunk_size
 461                         + 1000000
 462                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
 463                                                                             out_chunk_size,
 464                                                                             0);
 465                 if (approx_mem_required <= max_memory)
 466                         break;
 467
 468                 if (chunks_per_msg > 1)
 469                         chunks_per_msg--;
 470                 else if (msgs_per_thread > 1)
 471                         msgs_per_thread--;
 472                 else if (num_threads > 1)
 473                         num_threads--;
 474                 else
 475                         break;
 476         }
 477
 478         if (num_threads < desired_num_threads) {
 479                 WARNING("Wanted to use %u threads, but limiting to %u "
 480                         "to fit in available memory!",
 481                         desired_num_threads, num_threads);
 482         }
 483
 484         if (num_threads == 1) {
 485                 DEBUG("Only 1 thread; Not bothering with "
 486                       "parallel chunk compressor.");
 487                 return -2;
 488         }
 489
 490         ret = WIMLIB_ERR_NOMEM;
 491         ctx = CALLOC(1, sizeof(*ctx));
 492         if (ctx == NULL)
 493                 goto err;
 494
 495         ctx->base.out_ctype = out_ctype;
 496         ctx->base.out_chunk_size = out_chunk_size;
 497         ctx->base.destroy = parallel_chunk_compressor_destroy;
 498         ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
 499         ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
 500         ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
 501
 502         ctx->num_thread_data = num_threads;
 503
 504         ret = message_queue_init(&ctx->chunks_to_compress_queue);
 505         if (ret)
 506                 goto err;
 507
 508         ret = message_queue_init(&ctx->compressed_chunks_queue);
 509         if (ret)
 510                 goto err;
 511
 512         ret = WIMLIB_ERR_NOMEM;
 513         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
 514         if (ctx->thread_data == NULL)
 515                 goto err;
 516
 517         for (i = 0; i < num_threads; i++) {
 518                 struct compressor_thread_data *dat;
 519
 520                 dat = &ctx->thread_data[i];
 521
 522                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
 523                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
 524                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
 525                                                WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
 526                                                &dat->compressor);
 527                 if (ret)
 528                         goto err;
 529         }
 530
 531         for (ctx->num_started_threads = 0;
 532              ctx->num_started_threads < num_threads;
 533              ctx->num_started_threads++)
 534         {
 535                 DEBUG("pthread_create thread %u of %u",
 536                       ctx->num_started_threads + 1, num_threads);
 537                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
 538                                      NULL,
 539                                      compressor_thread_proc,
 540                                      &ctx->thread_data[ctx->num_started_threads]);
 541                 if (ret) {
 542                         errno = ret;
 543                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
 544                                            ctx->num_started_threads + 1,
 545                                            num_threads);
 546                         ret = WIMLIB_ERR_NOMEM;
 547                         if (ctx->num_started_threads >= 2)
 548                                 break;
 549                         goto err;
 550                 }
 551         }
 552
 553         ctx->base.num_threads = ctx->num_started_threads;
 554
 555         ret = WIMLIB_ERR_NOMEM;
 556         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
 557         ctx->msgs = allocate_messages(ctx->num_messages,
 558                                       chunks_per_msg, out_chunk_size);
 559         if (ctx->msgs == NULL)
 560                 goto err;
 561
 562         INIT_LIST_HEAD(&ctx->available_msgs);
 563         for (size_t i = 0; i < ctx->num_messages; i++)
 564                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
 565
 566         INIT_LIST_HEAD(&ctx->submitted_msgs);
 567
 568         *compressor_ret = &ctx->base;
 569         return 0;
 570
 571 err:
 572         parallel_chunk_compressor_destroy(&ctx->base);
 573         return ret;
 574 }
 575
 576 #endif /* ENABLE_MULTITHREADED_COMPRESSION */