]> wimlib.net Git - wimlib/blob - src/compress_parallel.c
Rename _full_path => d_full_path
[wimlib] / src / compress_parallel.c
1 /*
2  * compress_parallel.c
3  *
4  * Compress chunks of data (parallel version).
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option) any
13  * later version.
14  *
15  * This file is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this file; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #ifdef ENABLE_MULTITHREADED_COMPRESSION
29
30 #include <errno.h>
31 #include <pthread.h>
32 #include <stdlib.h>
33 #include <string.h>
34
35 #include "wimlib/assert.h"
36 #include "wimlib/chunk_compressor.h"
37 #include "wimlib/error.h"
38 #include "wimlib/list.h"
39 #include "wimlib/util.h"
40
41 struct message_queue {
42         struct list_head list;
43         pthread_mutex_t lock;
44         pthread_cond_t msg_avail_cond;
45         pthread_cond_t space_avail_cond;
46         bool terminating;
47 };
48
49 struct compressor_thread_data {
50         pthread_t thread;
51         struct message_queue *chunks_to_compress_queue;
52         struct message_queue *compressed_chunks_queue;
53         struct wimlib_compressor *compressor;
54 };
55
56 #define MAX_CHUNKS_PER_MSG 16
57
58 struct message {
59         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
60         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
61         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
62         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
63         size_t num_filled_chunks;
64         size_t num_alloc_chunks;
65         struct list_head list;
66         bool complete;
67         struct list_head submission_list;
68 };
69
70 struct parallel_chunk_compressor {
71         struct chunk_compressor base;
72
73         struct message_queue chunks_to_compress_queue;
74         struct message_queue compressed_chunks_queue;
75         struct compressor_thread_data *thread_data;
76         unsigned num_thread_data;
77         unsigned num_started_threads;
78
79         struct message *msgs;
80         size_t num_messages;
81
82         struct list_head available_msgs;
83         struct list_head submitted_msgs;
84         struct message *next_submit_msg;
85         struct message *next_ready_msg;
86         size_t next_chunk_idx;
87 };
88
89
90
91 static int
92 message_queue_init(struct message_queue *q)
93 {
94         if (pthread_mutex_init(&q->lock, NULL)) {
95                 ERROR_WITH_ERRNO("Failed to initialize mutex");
96                 goto err;
97         }
98         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
99                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
100                 goto err_destroy_lock;
101         }
102         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
103                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
104                 goto err_destroy_msg_avail_cond;
105         }
106         INIT_LIST_HEAD(&q->list);
107         return 0;
108
109 err_destroy_msg_avail_cond:
110         pthread_cond_destroy(&q->msg_avail_cond);
111 err_destroy_lock:
112         pthread_mutex_destroy(&q->lock);
113 err:
114         return WIMLIB_ERR_NOMEM;
115 }
116
117 static void
118 message_queue_destroy(struct message_queue *q)
119 {
120         if (q->list.next != NULL) {
121                 pthread_mutex_destroy(&q->lock);
122                 pthread_cond_destroy(&q->msg_avail_cond);
123                 pthread_cond_destroy(&q->space_avail_cond);
124         }
125 }
126
127 static void
128 message_queue_put(struct message_queue *q, struct message *msg)
129 {
130         pthread_mutex_lock(&q->lock);
131         list_add_tail(&msg->list, &q->list);
132         pthread_cond_signal(&q->msg_avail_cond);
133         pthread_mutex_unlock(&q->lock);
134 }
135
136 static struct message *
137 message_queue_get(struct message_queue *q)
138 {
139         struct message *msg;
140
141         pthread_mutex_lock(&q->lock);
142         while (list_empty(&q->list) && !q->terminating)
143                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
144         if (!q->terminating) {
145                 msg = list_entry(q->list.next, struct message, list);
146                 list_del(&msg->list);
147         } else
148                 msg = NULL;
149         pthread_mutex_unlock(&q->lock);
150         return msg;
151 }
152
153 static void
154 message_queue_terminate(struct message_queue *q)
155 {
156         pthread_mutex_lock(&q->lock);
157         q->terminating = true;
158         pthread_cond_broadcast(&q->msg_avail_cond);
159         pthread_mutex_unlock(&q->lock);
160 }
161
162 static int
163 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
164 {
165         msg->num_alloc_chunks = num_chunks;
166         for (size_t i = 0; i < num_chunks; i++) {
167                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
168                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
169                 if (msg->compressed_chunks[i] == NULL ||
170                     msg->uncompressed_chunks[i] == NULL)
171                         return WIMLIB_ERR_NOMEM;
172         }
173         return 0;
174 }
175
176 static void
177 destroy_message(struct message *msg)
178 {
179         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
180                 FREE(msg->compressed_chunks[i]);
181                 FREE(msg->uncompressed_chunks[i]);
182         }
183 }
184
185 static void
186 free_messages(struct message *msgs, size_t num_messages)
187 {
188         if (msgs) {
189                 for (size_t i = 0; i < num_messages; i++)
190                         destroy_message(&msgs[i]);
191                 FREE(msgs);
192         }
193 }
194
195 static struct message *
196 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
197 {
198         struct message *msgs;
199
200         msgs = CALLOC(count, sizeof(struct message));
201         if (msgs == NULL)
202                 return NULL;
203         for (size_t i = 0; i < count; i++) {
204                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
205                         free_messages(msgs, count);
206                         return NULL;
207                 }
208         }
209         return msgs;
210 }
211
212 static void
213 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
214 {
215
216         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
217                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
218                 msg->compressed_chunk_sizes[i] =
219                         wimlib_compress(msg->uncompressed_chunks[i],
220                                         msg->uncompressed_chunk_sizes[i],
221                                         msg->compressed_chunks[i],
222                                         msg->uncompressed_chunk_sizes[i] - 1,
223                                         compressor);
224         }
225 }
226
227 static void *
228 compressor_thread_proc(void *arg)
229 {
230         struct compressor_thread_data *params = arg;
231         struct message *msg;
232
233         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
234                 compress_chunks(msg, params->compressor);
235                 message_queue_put(params->compressed_chunks_queue, msg);
236         }
237         return NULL;
238 }
239
240 static void
241 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
242 {
243         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
244         unsigned i;
245
246         if (ctx == NULL)
247                 return;
248
249         if (ctx->num_started_threads != 0) {
250                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
251                 message_queue_terminate(&ctx->chunks_to_compress_queue);
252
253                 for (i = 0; i < ctx->num_started_threads; i++)
254                         pthread_join(ctx->thread_data[i].thread, NULL);
255         }
256
257         message_queue_destroy(&ctx->chunks_to_compress_queue);
258         message_queue_destroy(&ctx->compressed_chunks_queue);
259
260         if (ctx->thread_data != NULL)
261                 for (i = 0; i < ctx->num_thread_data; i++)
262                         wimlib_free_compressor(ctx->thread_data[i].compressor);
263
264         FREE(ctx->thread_data);
265
266         free_messages(ctx->msgs, ctx->num_messages);
267
268         FREE(ctx);
269 }
270
271 static void
272 submit_compression_msg(struct parallel_chunk_compressor *ctx)
273 {
274         struct message *msg = ctx->next_submit_msg;
275
276         msg->complete = false;
277         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
278         message_queue_put(&ctx->chunks_to_compress_queue, msg);
279         ctx->next_submit_msg = NULL;
280 }
281
282 static void *
283 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
284 {
285         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
286         struct message *msg;
287
288         if (ctx->next_submit_msg) {
289                 msg = ctx->next_submit_msg;
290         } else {
291                 if (list_empty(&ctx->available_msgs))
292                         return NULL;
293
294                 msg = list_entry(ctx->available_msgs.next, struct message, list);
295                 list_del(&msg->list);
296                 ctx->next_submit_msg = msg;
297                 msg->num_filled_chunks = 0;
298         }
299
300         return msg->uncompressed_chunks[msg->num_filled_chunks];
301 }
302
303 static void
304 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
305 {
306         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
307         struct message *msg;
308
309         wimlib_assert(usize > 0);
310         wimlib_assert(usize <= ctx->base.out_chunk_size);
311         wimlib_assert(ctx->next_submit_msg);
312
313         msg = ctx->next_submit_msg;
314         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
315         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
316                 submit_compression_msg(ctx);
317 }
318
319 static bool
320 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
321                                                  const void **cdata_ret, u32 *csize_ret,
322                                                  u32 *usize_ret)
323 {
324         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
325         struct message *msg;
326
327         if (ctx->next_submit_msg)
328                 submit_compression_msg(ctx);
329
330         if (ctx->next_ready_msg) {
331                 msg = ctx->next_ready_msg;
332         } else {
333                 if (list_empty(&ctx->submitted_msgs))
334                         return false;
335
336                 while (!(msg = list_entry(ctx->submitted_msgs.next,
337                                           struct message,
338                                           submission_list))->complete)
339                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
340
341                 ctx->next_ready_msg = msg;
342                 ctx->next_chunk_idx = 0;
343         }
344
345         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
346                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
347                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
348         } else {
349                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
350                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
351         }
352         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
353
354         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
355                 list_del(&msg->submission_list);
356                 list_add_tail(&msg->list, &ctx->available_msgs);
357                 ctx->next_ready_msg = NULL;
358         }
359         return true;
360 }
361
362 int
363 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
364                               unsigned num_threads, u64 max_memory,
365                               struct chunk_compressor **compressor_ret)
366 {
367         u64 approx_mem_required;
368         size_t chunks_per_msg;
369         size_t msgs_per_thread;
370         struct parallel_chunk_compressor *ctx;
371         unsigned i;
372         int ret;
373         unsigned desired_num_threads;
374
375         wimlib_assert(out_chunk_size > 0);
376
377         if (num_threads == 0)
378                 num_threads = get_available_cpus();
379
380         if (num_threads == 1) {
381                 DEBUG("Only 1 thread; Not bothering with "
382                       "parallel chunk compressor.");
383                 return -1;
384         }
385
386         if (max_memory == 0)
387                 max_memory = get_available_memory();
388
389         desired_num_threads = num_threads;
390
391         if (out_chunk_size < ((u32)1 << 23)) {
392                 /* Relatively small chunks.  Use 2 messages per thread, each
393                  * with at least 2 chunks.  Use more chunks per message if there
394                  * are lots of threads and/or the chunks are very small.  */
395                 chunks_per_msg = 2;
396                 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
397                 chunks_per_msg = max(chunks_per_msg, 2);
398                 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
399                 msgs_per_thread = 2;
400         } else {
401                 /* Big chunks: Just have one buffer per thread --- more would
402                  * just waste memory.  */
403                 chunks_per_msg = 1;
404                 msgs_per_thread = 1;
405         }
406         for (;;) {
407                 approx_mem_required =
408                         (u64)chunks_per_msg *
409                         (u64)msgs_per_thread *
410                         (u64)num_threads *
411                         (u64)out_chunk_size
412                         + out_chunk_size
413                         + 1000000
414                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
415                                                                             out_chunk_size,
416                                                                             0);
417                 if (approx_mem_required <= max_memory)
418                         break;
419
420                 if (chunks_per_msg > 1)
421                         chunks_per_msg--;
422                 else if (msgs_per_thread > 1)
423                         msgs_per_thread--;
424                 else if (num_threads > 1)
425                         num_threads--;
426                 else
427                         break;
428         }
429
430         if (num_threads < desired_num_threads) {
431                 WARNING("Wanted to use %u threads, but limiting to %u "
432                         "to fit in available memory!",
433                         desired_num_threads, num_threads);
434         }
435
436         if (num_threads == 1) {
437                 DEBUG("Only 1 thread; Not bothering with "
438                       "parallel chunk compressor.");
439                 return -2;
440         }
441
442         ret = WIMLIB_ERR_NOMEM;
443         ctx = CALLOC(1, sizeof(*ctx));
444         if (ctx == NULL)
445                 goto err;
446
447         ctx->base.out_ctype = out_ctype;
448         ctx->base.out_chunk_size = out_chunk_size;
449         ctx->base.destroy = parallel_chunk_compressor_destroy;
450         ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
451         ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
452         ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
453
454         ctx->num_thread_data = num_threads;
455
456         ret = message_queue_init(&ctx->chunks_to_compress_queue);
457         if (ret)
458                 goto err;
459
460         ret = message_queue_init(&ctx->compressed_chunks_queue);
461         if (ret)
462                 goto err;
463
464         ret = WIMLIB_ERR_NOMEM;
465         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
466         if (ctx->thread_data == NULL)
467                 goto err;
468
469         for (i = 0; i < num_threads; i++) {
470                 struct compressor_thread_data *dat;
471
472                 dat = &ctx->thread_data[i];
473
474                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
475                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
476                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
477                                                WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
478                                                &dat->compressor);
479                 if (ret)
480                         goto err;
481         }
482
483         for (ctx->num_started_threads = 0;
484              ctx->num_started_threads < num_threads;
485              ctx->num_started_threads++)
486         {
487                 DEBUG("pthread_create thread %u of %u",
488                       ctx->num_started_threads + 1, num_threads);
489                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
490                                      NULL,
491                                      compressor_thread_proc,
492                                      &ctx->thread_data[ctx->num_started_threads]);
493                 if (ret) {
494                         errno = ret;
495                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
496                                            ctx->num_started_threads + 1,
497                                            num_threads);
498                         ret = WIMLIB_ERR_NOMEM;
499                         if (ctx->num_started_threads >= 2)
500                                 break;
501                         goto err;
502                 }
503         }
504
505         ctx->base.num_threads = ctx->num_started_threads;
506
507         ret = WIMLIB_ERR_NOMEM;
508         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
509         ctx->msgs = allocate_messages(ctx->num_messages,
510                                       chunks_per_msg, out_chunk_size);
511         if (ctx->msgs == NULL)
512                 goto err;
513
514         INIT_LIST_HEAD(&ctx->available_msgs);
515         for (size_t i = 0; i < ctx->num_messages; i++)
516                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
517
518         INIT_LIST_HEAD(&ctx->submitted_msgs);
519
520         *compressor_ret = &ctx->base;
521         return 0;
522
523 err:
524         parallel_chunk_compressor_destroy(&ctx->base);
525         return ret;
526 }
527
528 #endif /* ENABLE_MULTITHREADED_COMPRESSION */