6cd306c2fcb96d5aa6d8b7e64c08c27022c3ff12
[wimlib] / src / compress_parallel.c
1 /*
2  * compress_parallel.c
3  *
4  * Compress chunks of data (parallel version).
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free Software
14  * Foundation; either version 3 of the License, or (at your option) any later
15  * version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License along with
22  * wimlib; if not, see http://www.gnu.org/licenses/.
23  */
24
25 #ifdef HAVE_CONFIG_H
26 #  include "config.h"
27 #endif
28
29 #ifdef ENABLE_MULTITHREADED_COMPRESSION
30
31 #include "wimlib/assert.h"
32 #include "wimlib/chunk_compressor.h"
33 #include "wimlib/error.h"
34 #include "wimlib/list.h"
35 #include "wimlib/util.h"
36 #ifdef __WIN32__
37 #  include "wimlib/win32.h" /* win32_get_number_of_processors() */
38 #endif
39
40 #include <errno.h>
41 #include <limits.h>
42 #include <pthread.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46
47 struct message_queue {
48         struct list_head list;
49         pthread_mutex_t lock;
50         pthread_cond_t msg_avail_cond;
51         pthread_cond_t space_avail_cond;
52         bool terminating;
53 };
54
55 struct compressor_thread_data {
56         pthread_t thread;
57         struct message_queue *chunks_to_compress_queue;
58         struct message_queue *compressed_chunks_queue;
59         struct wimlib_compressor *compressor;
60 };
61
62 #define MAX_CHUNKS_PER_MSG 2
63
64 struct message {
65         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
66         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
67         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
68         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
69         size_t num_filled_chunks;
70         size_t num_alloc_chunks;
71         struct list_head list;
72         bool complete;
73         struct list_head submission_list;
74 };
75
76 struct parallel_chunk_compressor {
77         struct chunk_compressor base;
78
79         struct message_queue chunks_to_compress_queue;
80         struct message_queue compressed_chunks_queue;
81         struct compressor_thread_data *thread_data;
82         unsigned num_threads;
83         unsigned num_started_threads;
84
85         struct message *msgs;
86         size_t num_messages;
87
88         struct list_head available_msgs;
89         struct list_head submitted_msgs;
90         struct message *next_submit_msg;
91         struct message *next_ready_msg;
92         size_t next_chunk_idx;
93 };
94
95 static unsigned
96 get_default_num_threads(void)
97 {
98         long n;
99 #ifdef __WIN32__
100         n = win32_get_number_of_processors();
101 #else
102         n = sysconf(_SC_NPROCESSORS_ONLN);
103 #endif
104         if (n < 1 || n >= UINT_MAX) {
105                 WARNING("Failed to determine number of processors; assuming 1.");
106                 return 1;
107         }
108         return n;
109 }
110
111 static u64
112 get_avail_memory(void)
113 {
114 #ifdef __WIN32__
115         u64 phys_bytes = win32_get_avail_memory();
116         if (phys_bytes == 0)
117                 goto default_size;
118         return phys_bytes;
119 #else
120         long page_size = sysconf(_SC_PAGESIZE);
121         long num_pages = sysconf(_SC_PHYS_PAGES);
122         if (page_size <= 0 || num_pages <= 0)
123                 goto default_size;
124         return ((u64)page_size * (u64)num_pages);
125 #endif
126
127 default_size:
128         WARNING("Failed to determine available memory; assuming 1 GiB");
129         return 1U << 30;
130 }
131
132 static int
133 message_queue_init(struct message_queue *q)
134 {
135         if (pthread_mutex_init(&q->lock, NULL)) {
136                 ERROR_WITH_ERRNO("Failed to initialize mutex");
137                 goto err;
138         }
139         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
140                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
141                 goto err_destroy_lock;
142         }
143         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
144                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
145                 goto err_destroy_msg_avail_cond;
146         }
147         INIT_LIST_HEAD(&q->list);
148         return 0;
149
150 err_destroy_msg_avail_cond:
151         pthread_cond_destroy(&q->msg_avail_cond);
152 err_destroy_lock:
153         pthread_mutex_destroy(&q->lock);
154 err:
155         return WIMLIB_ERR_NOMEM;
156 }
157
158 static void
159 message_queue_destroy(struct message_queue *q)
160 {
161         if (q->list.next != NULL) {
162                 pthread_mutex_destroy(&q->lock);
163                 pthread_cond_destroy(&q->msg_avail_cond);
164                 pthread_cond_destroy(&q->space_avail_cond);
165         }
166 }
167
168 static void
169 message_queue_put(struct message_queue *q, struct message *msg)
170 {
171         pthread_mutex_lock(&q->lock);
172         list_add_tail(&msg->list, &q->list);
173         pthread_cond_signal(&q->msg_avail_cond);
174         pthread_mutex_unlock(&q->lock);
175 }
176
177 static struct message *
178 message_queue_get(struct message_queue *q)
179 {
180         struct message *msg;
181
182         pthread_mutex_lock(&q->lock);
183         while (list_empty(&q->list) && !q->terminating)
184                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
185         if (!q->terminating) {
186                 msg = list_entry(q->list.next, struct message, list);
187                 list_del(&msg->list);
188         } else
189                 msg = NULL;
190         pthread_mutex_unlock(&q->lock);
191         return msg;
192 }
193
194 static void
195 message_queue_terminate(struct message_queue *q)
196 {
197         pthread_mutex_lock(&q->lock);
198         q->terminating = true;
199         pthread_cond_broadcast(&q->msg_avail_cond);
200         pthread_mutex_unlock(&q->lock);
201 }
202
203 static int
204 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
205 {
206         msg->num_alloc_chunks = num_chunks;
207         for (size_t i = 0; i < num_chunks; i++) {
208                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
209                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
210                 if (msg->compressed_chunks[i] == NULL ||
211                     msg->uncompressed_chunks[i] == NULL)
212                         return WIMLIB_ERR_NOMEM;
213         }
214         return 0;
215 }
216
217 static void
218 destroy_message(struct message *msg)
219 {
220         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
221                 FREE(msg->compressed_chunks[i]);
222                 FREE(msg->uncompressed_chunks[i]);
223         }
224 }
225
226 static void
227 free_messages(struct message *msgs, size_t num_messages)
228 {
229         if (msgs) {
230                 for (size_t i = 0; i < num_messages; i++)
231                         destroy_message(&msgs[i]);
232                 FREE(msgs);
233         }
234 }
235
236 static struct message *
237 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
238 {
239         struct message *msgs;
240
241         msgs = CALLOC(count, sizeof(struct message));
242         if (msgs == NULL)
243                 return NULL;
244         for (size_t i = 0; i < count; i++) {
245                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
246                         free_messages(msgs, count);
247                         return NULL;
248                 }
249         }
250         return msgs;
251 }
252
253 static void
254 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
255 {
256
257         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
258                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
259                 msg->compressed_chunk_sizes[i] =
260                         wimlib_compress(msg->uncompressed_chunks[i],
261                                         msg->uncompressed_chunk_sizes[i],
262                                         msg->compressed_chunks[i],
263                                         msg->uncompressed_chunk_sizes[i] - 1,
264                                         compressor);
265         }
266 }
267
268 static void *
269 compressor_thread_proc(void *arg)
270 {
271         struct compressor_thread_data *params = arg;
272         struct message *msg;
273
274         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
275                 compress_chunks(msg, params->compressor);
276                 message_queue_put(params->compressed_chunks_queue, msg);
277         }
278         return NULL;
279 }
280
281 static void
282 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
283 {
284         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
285         unsigned i;
286
287         if (ctx == NULL)
288                 return;
289
290         if (ctx->num_started_threads != 0) {
291                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
292                 message_queue_terminate(&ctx->chunks_to_compress_queue);
293
294                 for (i = 0; i < ctx->num_started_threads; i++)
295                         pthread_join(ctx->thread_data[i].thread, NULL);
296         }
297
298         message_queue_destroy(&ctx->chunks_to_compress_queue);
299         message_queue_destroy(&ctx->compressed_chunks_queue);
300
301         if (ctx->thread_data != NULL)
302                 for (i = 0; i < ctx->num_threads; i++)
303                         wimlib_free_compressor(ctx->thread_data[i].compressor);
304
305         FREE(ctx->thread_data);
306
307         free_messages(ctx->msgs, ctx->num_messages);
308
309         FREE(ctx);
310 }
311
312 static void
313 submit_compression_msg(struct parallel_chunk_compressor *ctx)
314 {
315         struct message *msg = ctx->next_submit_msg;
316
317         msg->complete = false;
318         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
319         message_queue_put(&ctx->chunks_to_compress_queue, msg);
320         ctx->next_submit_msg = NULL;
321 }
322
323 static bool
324 parallel_chunk_compressor_submit_chunk(struct chunk_compressor *_ctx,
325                                        const void *chunk, size_t size)
326 {
327         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
328         struct message *msg;
329
330         wimlib_assert(size > 0);
331         wimlib_assert(size <= ctx->base.out_chunk_size);
332
333         if (ctx->next_submit_msg) {
334                 msg = ctx->next_submit_msg;
335         } else {
336                 if (list_empty(&ctx->available_msgs))
337                         return false;
338
339                 msg = list_entry(ctx->available_msgs.next, struct message, list);
340                 list_del(&msg->list);
341                 ctx->next_submit_msg = msg;
342                 msg->num_filled_chunks = 0;
343         }
344
345         memcpy(msg->uncompressed_chunks[msg->num_filled_chunks], chunk, size);
346         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = size;
347         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
348                 submit_compression_msg(ctx);
349         return true;
350 }
351
352 static bool
353 parallel_chunk_compressor_get_chunk(struct chunk_compressor *_ctx,
354                                     const void **cdata_ret, unsigned *csize_ret,
355                                     unsigned *usize_ret)
356 {
357         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
358         struct message *msg;
359
360
361         if (ctx->next_submit_msg)
362                 submit_compression_msg(ctx);
363
364         if (ctx->next_ready_msg) {
365                 msg = ctx->next_ready_msg;
366         } else {
367                 if (list_empty(&ctx->submitted_msgs))
368                         return false;
369
370                 while (!(msg = list_entry(ctx->submitted_msgs.next,
371                                           struct message,
372                                           submission_list))->complete)
373                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
374
375                 ctx->next_ready_msg = msg;
376                 ctx->next_chunk_idx = 0;
377         }
378
379         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
380                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
381                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
382         } else {
383                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
384                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
385         }
386         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
387
388         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
389                 list_del(&msg->submission_list);
390                 list_add_tail(&msg->list, &ctx->available_msgs);
391                 ctx->next_ready_msg = NULL;
392         }
393         return true;
394 }
395
396 int
397 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
398                               unsigned num_threads, u64 max_memory,
399                               struct chunk_compressor **compressor_ret)
400 {
401         u64 approx_mem_required;
402         size_t chunks_per_msg;
403         size_t msgs_per_thread;
404         struct parallel_chunk_compressor *ctx;
405         unsigned i;
406         int ret;
407         unsigned desired_num_threads;
408
409         wimlib_assert(out_chunk_size > 0);
410         wimlib_assert(out_ctype != WIMLIB_COMPRESSION_TYPE_NONE);
411
412         if (num_threads == 0)
413                 num_threads = get_default_num_threads();
414
415         if (num_threads == 1) {
416                 DEBUG("Only 1 thread; Not bothering with "
417                       "parallel chunk compressor.");
418                 return -1;
419         }
420
421         if (max_memory == 0)
422                 max_memory = get_avail_memory();
423
424         desired_num_threads = num_threads;
425
426         if (out_chunk_size < ((u32)1 << 23)) {
427                 chunks_per_msg = MAX_CHUNKS_PER_MSG;
428                 msgs_per_thread = 2;
429         } else {
430                 /* Big chunks: Just have one buffer per thread --- more would
431                  * just waste memory.  */
432                 chunks_per_msg = 1;
433                 msgs_per_thread = 1;
434         }
435         for (;;) {
436                 approx_mem_required =
437                         (u64)chunks_per_msg *
438                         (u64)msgs_per_thread *
439                         (u64)num_threads *
440                         (u64)out_chunk_size
441                         + 1000000
442                         + (out_chunk_size * num_threads * 4);
443                 if (approx_mem_required <= max_memory)
444                         break;
445
446                 if (chunks_per_msg > 1)
447                         chunks_per_msg--;
448                 else if (msgs_per_thread > 1)
449                         msgs_per_thread--;
450                 else if (num_threads > 1)
451                         num_threads--;
452                 else
453                         break;
454         }
455
456         if (num_threads < desired_num_threads) {
457                 WARNING("Wanted to use %u threads, but limiting to %u "
458                         "to fit in available memory!",
459                         desired_num_threads, num_threads);
460         }
461
462         if (num_threads == 1) {
463                 DEBUG("Only 1 thread; Not bothering with "
464                       "parallel chunk compressor.");
465                 return -2;
466         }
467
468         ret = WIMLIB_ERR_NOMEM;
469         ctx = CALLOC(1, sizeof(*ctx));
470         if (ctx == NULL)
471                 goto err;
472
473         ctx->base.out_ctype = out_ctype;
474         ctx->base.out_chunk_size = out_chunk_size;
475         ctx->base.num_threads = num_threads;
476         ctx->base.destroy = parallel_chunk_compressor_destroy;
477         ctx->base.submit_chunk = parallel_chunk_compressor_submit_chunk;
478         ctx->base.get_chunk = parallel_chunk_compressor_get_chunk;
479
480         ctx->num_threads = num_threads;
481
482         ret = message_queue_init(&ctx->chunks_to_compress_queue);
483         if (ret)
484                 goto err;
485
486         ret = message_queue_init(&ctx->compressed_chunks_queue);
487         if (ret)
488                 goto err;
489
490         ret = WIMLIB_ERR_NOMEM;
491         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
492         if (ctx->thread_data == NULL)
493                 goto err;
494
495         for (i = 0; i < num_threads; i++) {
496                 struct compressor_thread_data *dat;
497
498                 dat = &ctx->thread_data[i];
499
500                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
501                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
502                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
503                                                NULL, &dat->compressor);
504                 if (ret)
505                         goto err;
506         }
507
508         for (ctx->num_started_threads = 0;
509              ctx->num_started_threads < num_threads;
510              ctx->num_started_threads++)
511         {
512                 DEBUG("pthread_create thread %u of %u",
513                       ctx->num_started_threads + 1, num_threads);
514                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
515                                      NULL,
516                                      compressor_thread_proc,
517                                      &ctx->thread_data[ctx->num_started_threads]);
518                 if (ret) {
519                         errno = ret;
520                         ret = WIMLIB_ERR_NOMEM;
521                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u");
522                         goto err;
523                 }
524         }
525
526         ret = WIMLIB_ERR_NOMEM;
527         ctx->num_messages = num_threads * msgs_per_thread;
528         ctx->msgs = allocate_messages(ctx->num_messages,
529                                       chunks_per_msg, out_chunk_size);
530         if (ctx->msgs == NULL)
531                 goto err;
532
533         INIT_LIST_HEAD(&ctx->available_msgs);
534         for (size_t i = 0; i < ctx->num_messages; i++)
535                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
536
537         INIT_LIST_HEAD(&ctx->submitted_msgs);
538
539         *compressor_ret = &ctx->base;
540         return 0;
541
542 err:
543         parallel_chunk_compressor_destroy(&ctx->base);
544         return ret;
545 }
546
547 #endif /* ENABLE_MULTITHREADED_COMPRESSION */