wimlib.net Git - wimlib/blob - src/compress_parallel.c

   1 /*
   2  * compress_parallel.c
   3  *
   4  * Compress chunks of data (parallel version).
   5  */
   6
   7 /*
   8  * Copyright (C) 2013-2023 Eric Biggers
   9  *
  10  * This file is free software; you can redistribute it and/or modify it under
  11  * the terms of the GNU Lesser General Public License as published by the Free
  12  * Software Foundation; either version 3 of the License, or (at your option) any
  13  * later version.
  14  *
  15  * This file is distributed in the hope that it will be useful, but WITHOUT
  16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  18  * details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public License
  21  * along with this file; if not, see https://www.gnu.org/licenses/.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #  include "config.h"
  26 #endif
  27
  28 #include <errno.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31
  32 #include "wimlib/assert.h"
  33 #include "wimlib/chunk_compressor.h"
  34 #include "wimlib/error.h"
  35 #include "wimlib/list.h"
  36 #include "wimlib/threads.h"
  37 #include "wimlib/util.h"
  38
  39 struct message_queue {
  40         struct list_head list;
  41         struct mutex lock;
  42         struct condvar msg_avail_cond;
  43         struct condvar space_avail_cond;
  44         bool terminating;
  45 };
  46
  47 struct compressor_thread_data {
  48         struct thread thread;
  49         struct message_queue *chunks_to_compress_queue;
  50         struct message_queue *compressed_chunks_queue;
  51         struct wimlib_compressor *compressor;
  52 };
  53
  54 #define MAX_CHUNKS_PER_MSG 16
  55
  56 struct message {
  57         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
  58         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
  59         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  60         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  61         size_t num_filled_chunks;
  62         size_t num_alloc_chunks;
  63         struct list_head list;
  64         bool complete;
  65         struct list_head submission_list;
  66 };
  67
  68 struct parallel_chunk_compressor {
  69         struct chunk_compressor base;
  70
  71         struct message_queue chunks_to_compress_queue;
  72         struct message_queue compressed_chunks_queue;
  73         struct compressor_thread_data *thread_data;
  74         unsigned num_thread_data;
  75         unsigned num_started_threads;
  76
  77         struct message *msgs;
  78         size_t num_messages;
  79
  80         struct list_head available_msgs;
  81         struct list_head submitted_msgs;
  82         struct message *next_submit_msg;
  83         struct message *next_ready_msg;
  84         size_t next_chunk_idx;
  85 };
  86
  87
  88
  89 static int
  90 message_queue_init(struct message_queue *q)
  91 {
  92         if (!mutex_init(&q->lock))
  93                 goto err;
  94         if (!condvar_init(&q->msg_avail_cond))
  95                 goto err_destroy_lock;
  96         if (!condvar_init(&q->space_avail_cond))
  97                 goto err_destroy_msg_avail_cond;
  98         INIT_LIST_HEAD(&q->list);
  99         return 0;
 100
 101 err_destroy_msg_avail_cond:
 102         condvar_destroy(&q->msg_avail_cond);
 103 err_destroy_lock:
 104         mutex_destroy(&q->lock);
 105 err:
 106         return WIMLIB_ERR_NOMEM;
 107 }
 108
 109 static void
 110 message_queue_destroy(struct message_queue *q)
 111 {
 112         if (q->list.next != NULL) {
 113                 mutex_destroy(&q->lock);
 114                 condvar_destroy(&q->msg_avail_cond);
 115                 condvar_destroy(&q->space_avail_cond);
 116         }
 117 }
 118
 119 static void
 120 message_queue_put(struct message_queue *q, struct message *msg)
 121 {
 122         mutex_lock(&q->lock);
 123         list_add_tail(&msg->list, &q->list);
 124         condvar_signal(&q->msg_avail_cond);
 125         mutex_unlock(&q->lock);
 126 }
 127
 128 static struct message *
 129 message_queue_get(struct message_queue *q)
 130 {
 131         struct message *msg;
 132
 133         mutex_lock(&q->lock);
 134         while (list_empty(&q->list) && !q->terminating)
 135                 condvar_wait(&q->msg_avail_cond, &q->lock);
 136         if (!q->terminating) {
 137                 msg = list_entry(q->list.next, struct message, list);
 138                 list_del(&msg->list);
 139         } else
 140                 msg = NULL;
 141         mutex_unlock(&q->lock);
 142         return msg;
 143 }
 144
 145 static void
 146 message_queue_terminate(struct message_queue *q)
 147 {
 148         mutex_lock(&q->lock);
 149         q->terminating = true;
 150         condvar_broadcast(&q->msg_avail_cond);
 151         mutex_unlock(&q->lock);
 152 }
 153
 154 static int
 155 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
 156 {
 157         msg->num_alloc_chunks = num_chunks;
 158         for (size_t i = 0; i < num_chunks; i++) {
 159                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
 160                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
 161                 if (msg->compressed_chunks[i] == NULL ||
 162                     msg->uncompressed_chunks[i] == NULL)
 163                         return WIMLIB_ERR_NOMEM;
 164         }
 165         return 0;
 166 }
 167
 168 static void
 169 destroy_message(struct message *msg)
 170 {
 171         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
 172                 FREE(msg->compressed_chunks[i]);
 173                 FREE(msg->uncompressed_chunks[i]);
 174         }
 175 }
 176
 177 static void
 178 free_messages(struct message *msgs, size_t num_messages)
 179 {
 180         if (msgs) {
 181                 for (size_t i = 0; i < num_messages; i++)
 182                         destroy_message(&msgs[i]);
 183                 FREE(msgs);
 184         }
 185 }
 186
 187 static struct message *
 188 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
 189 {
 190         struct message *msgs;
 191
 192         msgs = CALLOC(count, sizeof(struct message));
 193         if (msgs == NULL)
 194                 return NULL;
 195         for (size_t i = 0; i < count; i++) {
 196                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
 197                         free_messages(msgs, count);
 198                         return NULL;
 199                 }
 200         }
 201         return msgs;
 202 }
 203
 204 static void
 205 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
 206 {
 207
 208         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
 209                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
 210                 msg->compressed_chunk_sizes[i] =
 211                         wimlib_compress(msg->uncompressed_chunks[i],
 212                                         msg->uncompressed_chunk_sizes[i],
 213                                         msg->compressed_chunks[i],
 214                                         msg->uncompressed_chunk_sizes[i] - 1,
 215                                         compressor);
 216         }
 217 }
 218
 219 static void *
 220 compressor_thread_proc(void *arg)
 221 {
 222         struct compressor_thread_data *params = arg;
 223         struct message *msg;
 224
 225         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
 226                 compress_chunks(msg, params->compressor);
 227                 message_queue_put(params->compressed_chunks_queue, msg);
 228         }
 229         return NULL;
 230 }
 231
 232 static void
 233 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
 234 {
 235         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 236         unsigned i;
 237
 238         if (ctx == NULL)
 239                 return;
 240
 241         if (ctx->num_started_threads != 0) {
 242                 message_queue_terminate(&ctx->chunks_to_compress_queue);
 243
 244                 for (i = 0; i < ctx->num_started_threads; i++)
 245                         thread_join(&ctx->thread_data[i].thread);
 246         }
 247
 248         message_queue_destroy(&ctx->chunks_to_compress_queue);
 249         message_queue_destroy(&ctx->compressed_chunks_queue);
 250
 251         if (ctx->thread_data != NULL)
 252                 for (i = 0; i < ctx->num_thread_data; i++)
 253                         wimlib_free_compressor(ctx->thread_data[i].compressor);
 254
 255         FREE(ctx->thread_data);
 256
 257         free_messages(ctx->msgs, ctx->num_messages);
 258
 259         FREE(ctx);
 260 }
 261
 262 static void
 263 submit_compression_msg(struct parallel_chunk_compressor *ctx)
 264 {
 265         struct message *msg = ctx->next_submit_msg;
 266
 267         msg->complete = false;
 268         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
 269         message_queue_put(&ctx->chunks_to_compress_queue, msg);
 270         ctx->next_submit_msg = NULL;
 271 }
 272
 273 static void *
 274 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
 275 {
 276         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 277         struct message *msg;
 278
 279         if (ctx->next_submit_msg) {
 280                 msg = ctx->next_submit_msg;
 281         } else {
 282                 if (list_empty(&ctx->available_msgs))
 283                         return NULL;
 284
 285                 msg = list_entry(ctx->available_msgs.next, struct message, list);
 286                 list_del(&msg->list);
 287                 ctx->next_submit_msg = msg;
 288                 msg->num_filled_chunks = 0;
 289         }
 290
 291         return msg->uncompressed_chunks[msg->num_filled_chunks];
 292 }
 293
 294 static void
 295 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
 296 {
 297         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 298         struct message *msg;
 299
 300         wimlib_assert(usize > 0);
 301         wimlib_assert(usize <= ctx->base.out_chunk_size);
 302         wimlib_assert(ctx->next_submit_msg);
 303
 304         msg = ctx->next_submit_msg;
 305         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
 306         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
 307                 submit_compression_msg(ctx);
 308 }
 309
 310 static bool
 311 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
 312                                                  const void **cdata_ret, u32 *csize_ret,
 313                                                  u32 *usize_ret)
 314 {
 315         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 316         struct message *msg;
 317
 318         if (ctx->next_submit_msg)
 319                 submit_compression_msg(ctx);
 320
 321         if (ctx->next_ready_msg) {
 322                 msg = ctx->next_ready_msg;
 323         } else {
 324                 if (list_empty(&ctx->submitted_msgs))
 325                         return false;
 326
 327                 while (!(msg = list_entry(ctx->submitted_msgs.next,
 328                                           struct message,
 329                                           submission_list))->complete)
 330                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
 331
 332                 ctx->next_ready_msg = msg;
 333                 ctx->next_chunk_idx = 0;
 334         }
 335
 336         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
 337                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
 338                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
 339         } else {
 340                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
 341                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 342         }
 343         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 344
 345         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
 346                 list_del(&msg->submission_list);
 347                 list_add_tail(&msg->list, &ctx->available_msgs);
 348                 ctx->next_ready_msg = NULL;
 349         }
 350         return true;
 351 }
 352
 353 int
 354 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
 355                               unsigned num_threads, u64 max_memory,
 356                               struct chunk_compressor **compressor_ret)
 357 {
 358         u64 approx_mem_required;
 359         size_t chunks_per_msg;
 360         size_t msgs_per_thread;
 361         struct parallel_chunk_compressor *ctx;
 362         unsigned i;
 363         int ret;
 364         unsigned desired_num_threads;
 365
 366         wimlib_assert(out_chunk_size > 0);
 367
 368         if (num_threads == 0)
 369                 num_threads = get_available_cpus();
 370
 371         if (num_threads == 1)
 372                 return -1;
 373
 374         if (max_memory == 0)
 375                 max_memory = get_available_memory();
 376
 377         desired_num_threads = num_threads;
 378
 379         if (out_chunk_size < ((u32)1 << 23)) {
 380                 /* Relatively small chunks.  Use 2 messages per thread, each
 381                  * with at least 2 chunks.  Use more chunks per message if there
 382                  * are lots of threads and/or the chunks are very small.  */
 383                 chunks_per_msg = 2;
 384                 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
 385                 chunks_per_msg = max(chunks_per_msg, 2);
 386                 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
 387                 msgs_per_thread = 2;
 388         } else {
 389                 /* Big chunks: Just have one buffer per thread --- more would
 390                  * just waste memory.  */
 391                 chunks_per_msg = 1;
 392                 msgs_per_thread = 1;
 393         }
 394         for (;;) {
 395                 approx_mem_required =
 396                         (u64)chunks_per_msg *
 397                         (u64)msgs_per_thread *
 398                         (u64)num_threads *
 399                         (u64)out_chunk_size
 400                         + out_chunk_size
 401                         + 1000000
 402                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
 403                                                                             out_chunk_size,
 404                                                                             0);
 405                 if (approx_mem_required <= max_memory)
 406                         break;
 407
 408                 if (chunks_per_msg > 1)
 409                         chunks_per_msg--;
 410                 else if (msgs_per_thread > 1)
 411                         msgs_per_thread--;
 412                 else if (num_threads > 1)
 413                         num_threads--;
 414                 else
 415                         break;
 416         }
 417
 418         if (num_threads < desired_num_threads) {
 419                 WARNING("Wanted to use %u threads, but limiting to %u "
 420                         "to fit in available memory!",
 421                         desired_num_threads, num_threads);
 422         }
 423
 424         if (num_threads == 1)
 425                 return -2;
 426
 427         ret = WIMLIB_ERR_NOMEM;
 428         ctx = CALLOC(1, sizeof(*ctx));
 429         if (ctx == NULL)
 430                 goto err;
 431
 432         ctx->base.out_ctype = out_ctype;
 433         ctx->base.out_chunk_size = out_chunk_size;
 434         ctx->base.destroy = parallel_chunk_compressor_destroy;
 435         ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
 436         ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
 437         ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
 438
 439         ctx->num_thread_data = num_threads;
 440
 441         ret = message_queue_init(&ctx->chunks_to_compress_queue);
 442         if (ret)
 443                 goto err;
 444
 445         ret = message_queue_init(&ctx->compressed_chunks_queue);
 446         if (ret)
 447                 goto err;
 448
 449         ret = WIMLIB_ERR_NOMEM;
 450         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
 451         if (ctx->thread_data == NULL)
 452                 goto err;
 453
 454         for (i = 0; i < num_threads; i++) {
 455                 struct compressor_thread_data *dat;
 456
 457                 dat = &ctx->thread_data[i];
 458
 459                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
 460                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
 461                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
 462                                                WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
 463                                                &dat->compressor);
 464                 if (ret)
 465                         goto err;
 466         }
 467
 468         for (ctx->num_started_threads = 0;
 469              ctx->num_started_threads < num_threads;
 470              ctx->num_started_threads++)
 471         {
 472                 if (!thread_create(&ctx->thread_data[ctx->num_started_threads].thread,
 473                                    compressor_thread_proc,
 474                                    &ctx->thread_data[ctx->num_started_threads]))
 475                 {
 476                         ret = WIMLIB_ERR_NOMEM;
 477                         if (ctx->num_started_threads >= 2)
 478                                 break;
 479                         goto err;
 480                 }
 481         }
 482
 483         ctx->base.num_threads = ctx->num_started_threads;
 484
 485         ret = WIMLIB_ERR_NOMEM;
 486         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
 487         ctx->msgs = allocate_messages(ctx->num_messages,
 488                                       chunks_per_msg, out_chunk_size);
 489         if (ctx->msgs == NULL)
 490                 goto err;
 491
 492         INIT_LIST_HEAD(&ctx->available_msgs);
 493         for (size_t i = 0; i < ctx->num_messages; i++)
 494                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
 495
 496         INIT_LIST_HEAD(&ctx->submitted_msgs);
 497
 498         *compressor_ret = &ctx->base;
 499         return 0;
 500
 501 err:
 502         parallel_chunk_compressor_destroy(&ctx->base);
 503         return ret;
 504 }