wimlib.net Git - wimlib/blob - src/compress_parallel.c

   1 /*
   2  * compress_parallel.c
   3  *
   4  * Compress chunks of data (parallel version).
   5  */
   6
   7 /*
   8  * Copyright (C) 2013 Eric Biggers
   9  *
  10  * This file is free software; you can redistribute it and/or modify it under
  11  * the terms of the GNU Lesser General Public License as published by the Free
  12  * Software Foundation; either version 3 of the License, or (at your option) any
  13  * later version.
  14  *
  15  * This file is distributed in the hope that it will be useful, but WITHOUT
  16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  18  * details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public License
  21  * along with this file; if not, see http://www.gnu.org/licenses/.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #  include "config.h"
  26 #endif
  27
  28 #include <errno.h>
  29 #include <pthread.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #include "wimlib/assert.h"
  34 #include "wimlib/chunk_compressor.h"
  35 #include "wimlib/error.h"
  36 #include "wimlib/list.h"
  37 #include "wimlib/util.h"
  38
  39 struct message_queue {
  40         struct list_head list;
  41         pthread_mutex_t lock;
  42         pthread_cond_t msg_avail_cond;
  43         pthread_cond_t space_avail_cond;
  44         bool terminating;
  45 };
  46
  47 struct compressor_thread_data {
  48         pthread_t thread;
  49         struct message_queue *chunks_to_compress_queue;
  50         struct message_queue *compressed_chunks_queue;
  51         struct wimlib_compressor *compressor;
  52 };
  53
  54 #define MAX_CHUNKS_PER_MSG 16
  55
  56 struct message {
  57         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
  58         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
  59         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  60         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
  61         size_t num_filled_chunks;
  62         size_t num_alloc_chunks;
  63         struct list_head list;
  64         bool complete;
  65         struct list_head submission_list;
  66 };
  67
  68 struct parallel_chunk_compressor {
  69         struct chunk_compressor base;
  70
  71         struct message_queue chunks_to_compress_queue;
  72         struct message_queue compressed_chunks_queue;
  73         struct compressor_thread_data *thread_data;
  74         unsigned num_thread_data;
  75         unsigned num_started_threads;
  76
  77         struct message *msgs;
  78         size_t num_messages;
  79
  80         struct list_head available_msgs;
  81         struct list_head submitted_msgs;
  82         struct message *next_submit_msg;
  83         struct message *next_ready_msg;
  84         size_t next_chunk_idx;
  85 };
  86
  87
  88
  89 static int
  90 message_queue_init(struct message_queue *q)
  91 {
  92         if (pthread_mutex_init(&q->lock, NULL)) {
  93                 ERROR_WITH_ERRNO("Failed to initialize mutex");
  94                 goto err;
  95         }
  96         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
  97                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
  98                 goto err_destroy_lock;
  99         }
 100         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
 101                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 102                 goto err_destroy_msg_avail_cond;
 103         }
 104         INIT_LIST_HEAD(&q->list);
 105         return 0;
 106
 107 err_destroy_msg_avail_cond:
 108         pthread_cond_destroy(&q->msg_avail_cond);
 109 err_destroy_lock:
 110         pthread_mutex_destroy(&q->lock);
 111 err:
 112         return WIMLIB_ERR_NOMEM;
 113 }
 114
 115 static void
 116 message_queue_destroy(struct message_queue *q)
 117 {
 118         if (q->list.next != NULL) {
 119                 pthread_mutex_destroy(&q->lock);
 120                 pthread_cond_destroy(&q->msg_avail_cond);
 121                 pthread_cond_destroy(&q->space_avail_cond);
 122         }
 123 }
 124
 125 static void
 126 message_queue_put(struct message_queue *q, struct message *msg)
 127 {
 128         pthread_mutex_lock(&q->lock);
 129         list_add_tail(&msg->list, &q->list);
 130         pthread_cond_signal(&q->msg_avail_cond);
 131         pthread_mutex_unlock(&q->lock);
 132 }
 133
 134 static struct message *
 135 message_queue_get(struct message_queue *q)
 136 {
 137         struct message *msg;
 138
 139         pthread_mutex_lock(&q->lock);
 140         while (list_empty(&q->list) && !q->terminating)
 141                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
 142         if (!q->terminating) {
 143                 msg = list_entry(q->list.next, struct message, list);
 144                 list_del(&msg->list);
 145         } else
 146                 msg = NULL;
 147         pthread_mutex_unlock(&q->lock);
 148         return msg;
 149 }
 150
 151 static void
 152 message_queue_terminate(struct message_queue *q)
 153 {
 154         pthread_mutex_lock(&q->lock);
 155         q->terminating = true;
 156         pthread_cond_broadcast(&q->msg_avail_cond);
 157         pthread_mutex_unlock(&q->lock);
 158 }
 159
 160 static int
 161 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
 162 {
 163         msg->num_alloc_chunks = num_chunks;
 164         for (size_t i = 0; i < num_chunks; i++) {
 165                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
 166                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
 167                 if (msg->compressed_chunks[i] == NULL ||
 168                     msg->uncompressed_chunks[i] == NULL)
 169                         return WIMLIB_ERR_NOMEM;
 170         }
 171         return 0;
 172 }
 173
 174 static void
 175 destroy_message(struct message *msg)
 176 {
 177         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
 178                 FREE(msg->compressed_chunks[i]);
 179                 FREE(msg->uncompressed_chunks[i]);
 180         }
 181 }
 182
 183 static void
 184 free_messages(struct message *msgs, size_t num_messages)
 185 {
 186         if (msgs) {
 187                 for (size_t i = 0; i < num_messages; i++)
 188                         destroy_message(&msgs[i]);
 189                 FREE(msgs);
 190         }
 191 }
 192
 193 static struct message *
 194 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
 195 {
 196         struct message *msgs;
 197
 198         msgs = CALLOC(count, sizeof(struct message));
 199         if (msgs == NULL)
 200                 return NULL;
 201         for (size_t i = 0; i < count; i++) {
 202                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
 203                         free_messages(msgs, count);
 204                         return NULL;
 205                 }
 206         }
 207         return msgs;
 208 }
 209
 210 static void
 211 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
 212 {
 213
 214         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
 215                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
 216                 msg->compressed_chunk_sizes[i] =
 217                         wimlib_compress(msg->uncompressed_chunks[i],
 218                                         msg->uncompressed_chunk_sizes[i],
 219                                         msg->compressed_chunks[i],
 220                                         msg->uncompressed_chunk_sizes[i] - 1,
 221                                         compressor);
 222         }
 223 }
 224
 225 static void *
 226 compressor_thread_proc(void *arg)
 227 {
 228         struct compressor_thread_data *params = arg;
 229         struct message *msg;
 230
 231         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
 232                 compress_chunks(msg, params->compressor);
 233                 message_queue_put(params->compressed_chunks_queue, msg);
 234         }
 235         return NULL;
 236 }
 237
 238 static void
 239 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
 240 {
 241         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 242         unsigned i;
 243
 244         if (ctx == NULL)
 245                 return;
 246
 247         if (ctx->num_started_threads != 0) {
 248                 message_queue_terminate(&ctx->chunks_to_compress_queue);
 249
 250                 for (i = 0; i < ctx->num_started_threads; i++)
 251                         pthread_join(ctx->thread_data[i].thread, NULL);
 252         }
 253
 254         message_queue_destroy(&ctx->chunks_to_compress_queue);
 255         message_queue_destroy(&ctx->compressed_chunks_queue);
 256
 257         if (ctx->thread_data != NULL)
 258                 for (i = 0; i < ctx->num_thread_data; i++)
 259                         wimlib_free_compressor(ctx->thread_data[i].compressor);
 260
 261         FREE(ctx->thread_data);
 262
 263         free_messages(ctx->msgs, ctx->num_messages);
 264
 265         FREE(ctx);
 266 }
 267
 268 static void
 269 submit_compression_msg(struct parallel_chunk_compressor *ctx)
 270 {
 271         struct message *msg = ctx->next_submit_msg;
 272
 273         msg->complete = false;
 274         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
 275         message_queue_put(&ctx->chunks_to_compress_queue, msg);
 276         ctx->next_submit_msg = NULL;
 277 }
 278
 279 static void *
 280 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
 281 {
 282         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 283         struct message *msg;
 284
 285         if (ctx->next_submit_msg) {
 286                 msg = ctx->next_submit_msg;
 287         } else {
 288                 if (list_empty(&ctx->available_msgs))
 289                         return NULL;
 290
 291                 msg = list_entry(ctx->available_msgs.next, struct message, list);
 292                 list_del(&msg->list);
 293                 ctx->next_submit_msg = msg;
 294                 msg->num_filled_chunks = 0;
 295         }
 296
 297         return msg->uncompressed_chunks[msg->num_filled_chunks];
 298 }
 299
 300 static void
 301 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
 302 {
 303         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 304         struct message *msg;
 305
 306         wimlib_assert(usize > 0);
 307         wimlib_assert(usize <= ctx->base.out_chunk_size);
 308         wimlib_assert(ctx->next_submit_msg);
 309
 310         msg = ctx->next_submit_msg;
 311         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
 312         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
 313                 submit_compression_msg(ctx);
 314 }
 315
 316 static bool
 317 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
 318                                                  const void **cdata_ret, u32 *csize_ret,
 319                                                  u32 *usize_ret)
 320 {
 321         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
 322         struct message *msg;
 323
 324         if (ctx->next_submit_msg)
 325                 submit_compression_msg(ctx);
 326
 327         if (ctx->next_ready_msg) {
 328                 msg = ctx->next_ready_msg;
 329         } else {
 330                 if (list_empty(&ctx->submitted_msgs))
 331                         return false;
 332
 333                 while (!(msg = list_entry(ctx->submitted_msgs.next,
 334                                           struct message,
 335                                           submission_list))->complete)
 336                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
 337
 338                 ctx->next_ready_msg = msg;
 339                 ctx->next_chunk_idx = 0;
 340         }
 341
 342         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
 343                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
 344                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
 345         } else {
 346                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
 347                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 348         }
 349         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
 350
 351         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
 352                 list_del(&msg->submission_list);
 353                 list_add_tail(&msg->list, &ctx->available_msgs);
 354                 ctx->next_ready_msg = NULL;
 355         }
 356         return true;
 357 }
 358
 359 int
 360 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
 361                               unsigned num_threads, u64 max_memory,
 362                               struct chunk_compressor **compressor_ret)
 363 {
 364         u64 approx_mem_required;
 365         size_t chunks_per_msg;
 366         size_t msgs_per_thread;
 367         struct parallel_chunk_compressor *ctx;
 368         unsigned i;
 369         int ret;
 370         unsigned desired_num_threads;
 371
 372         wimlib_assert(out_chunk_size > 0);
 373
 374         if (num_threads == 0)
 375                 num_threads = get_available_cpus();
 376
 377         if (num_threads == 1)
 378                 return -1;
 379
 380         if (max_memory == 0)
 381                 max_memory = get_available_memory();
 382
 383         desired_num_threads = num_threads;
 384
 385         if (out_chunk_size < ((u32)1 << 23)) {
 386                 /* Relatively small chunks.  Use 2 messages per thread, each
 387                  * with at least 2 chunks.  Use more chunks per message if there
 388                  * are lots of threads and/or the chunks are very small.  */
 389                 chunks_per_msg = 2;
 390                 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
 391                 chunks_per_msg = max(chunks_per_msg, 2);
 392                 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
 393                 msgs_per_thread = 2;
 394         } else {
 395                 /* Big chunks: Just have one buffer per thread --- more would
 396                  * just waste memory.  */
 397                 chunks_per_msg = 1;
 398                 msgs_per_thread = 1;
 399         }
 400         for (;;) {
 401                 approx_mem_required =
 402                         (u64)chunks_per_msg *
 403                         (u64)msgs_per_thread *
 404                         (u64)num_threads *
 405                         (u64)out_chunk_size
 406                         + out_chunk_size
 407                         + 1000000
 408                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
 409                                                                             out_chunk_size,
 410                                                                             0);
 411                 if (approx_mem_required <= max_memory)
 412                         break;
 413
 414                 if (chunks_per_msg > 1)
 415                         chunks_per_msg--;
 416                 else if (msgs_per_thread > 1)
 417                         msgs_per_thread--;
 418                 else if (num_threads > 1)
 419                         num_threads--;
 420                 else
 421                         break;
 422         }
 423
 424         if (num_threads < desired_num_threads) {
 425                 WARNING("Wanted to use %u threads, but limiting to %u "
 426                         "to fit in available memory!",
 427                         desired_num_threads, num_threads);
 428         }
 429
 430         if (num_threads == 1)
 431                 return -2;
 432
 433         ret = WIMLIB_ERR_NOMEM;
 434         ctx = CALLOC(1, sizeof(*ctx));
 435         if (ctx == NULL)
 436                 goto err;
 437
 438         ctx->base.out_ctype = out_ctype;
 439         ctx->base.out_chunk_size = out_chunk_size;
 440         ctx->base.destroy = parallel_chunk_compressor_destroy;
 441         ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
 442         ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
 443         ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
 444
 445         ctx->num_thread_data = num_threads;
 446
 447         ret = message_queue_init(&ctx->chunks_to_compress_queue);
 448         if (ret)
 449                 goto err;
 450
 451         ret = message_queue_init(&ctx->compressed_chunks_queue);
 452         if (ret)
 453                 goto err;
 454
 455         ret = WIMLIB_ERR_NOMEM;
 456         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
 457         if (ctx->thread_data == NULL)
 458                 goto err;
 459
 460         for (i = 0; i < num_threads; i++) {
 461                 struct compressor_thread_data *dat;
 462
 463                 dat = &ctx->thread_data[i];
 464
 465                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
 466                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
 467                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
 468                                                WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
 469                                                &dat->compressor);
 470                 if (ret)
 471                         goto err;
 472         }
 473
 474         for (ctx->num_started_threads = 0;
 475              ctx->num_started_threads < num_threads;
 476              ctx->num_started_threads++)
 477         {
 478                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
 479                                      NULL,
 480                                      compressor_thread_proc,
 481                                      &ctx->thread_data[ctx->num_started_threads]);
 482                 if (ret) {
 483                         errno = ret;
 484                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
 485                                            ctx->num_started_threads + 1,
 486                                            num_threads);
 487                         ret = WIMLIB_ERR_NOMEM;
 488                         if (ctx->num_started_threads >= 2)
 489                                 break;
 490                         goto err;
 491                 }
 492         }
 493
 494         ctx->base.num_threads = ctx->num_started_threads;
 495
 496         ret = WIMLIB_ERR_NOMEM;
 497         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
 498         ctx->msgs = allocate_messages(ctx->num_messages,
 499                                       chunks_per_msg, out_chunk_size);
 500         if (ctx->msgs == NULL)
 501                 goto err;
 502
 503         INIT_LIST_HEAD(&ctx->available_msgs);
 504         for (size_t i = 0; i < ctx->num_messages; i++)
 505                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
 506
 507         INIT_LIST_HEAD(&ctx->submitted_msgs);
 508
 509         *compressor_ret = &ctx->base;
 510         return 0;
 511
 512 err:
 513         parallel_chunk_compressor_destroy(&ctx->base);
 514         return ret;
 515 }