]> wimlib.net Git - wimlib/blob - src/compress_parallel.c
Update progress functions
[wimlib] / src / compress_parallel.c
1 /*
2  * compress_parallel.c
3  *
4  * Compress chunks of data (parallel version).
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free Software
14  * Foundation; either version 3 of the License, or (at your option) any later
15  * version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License along with
22  * wimlib; if not, see http://www.gnu.org/licenses/.
23  */
24
25 #ifdef HAVE_CONFIG_H
26 #  include "config.h"
27 #endif
28
29 #ifdef ENABLE_MULTITHREADED_COMPRESSION
30
31 #include "wimlib/assert.h"
32 #include "wimlib/chunk_compressor.h"
33 #include "wimlib/error.h"
34 #include "wimlib/list.h"
35 #include "wimlib/util.h"
36 #ifdef __WIN32__
37 #  include "wimlib/win32.h" /* win32_get_number_of_processors() */
38 #endif
39
40 #include <errno.h>
41 #include <limits.h>
42 #include <pthread.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <unistd.h>
46 #ifdef HAVE_SYS_SYSCTL_H
47 #  include <sys/sysctl.h>
48 #endif
49
50 struct message_queue {
51         struct list_head list;
52         pthread_mutex_t lock;
53         pthread_cond_t msg_avail_cond;
54         pthread_cond_t space_avail_cond;
55         bool terminating;
56 };
57
58 struct compressor_thread_data {
59         pthread_t thread;
60         struct message_queue *chunks_to_compress_queue;
61         struct message_queue *compressed_chunks_queue;
62         struct wimlib_compressor *compressor;
63 };
64
65 #define MAX_CHUNKS_PER_MSG 2
66
67 struct message {
68         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
69         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
70         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
71         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
72         size_t num_filled_chunks;
73         size_t num_alloc_chunks;
74         struct list_head list;
75         bool complete;
76         struct list_head submission_list;
77 };
78
79 struct parallel_chunk_compressor {
80         struct chunk_compressor base;
81
82         struct message_queue chunks_to_compress_queue;
83         struct message_queue compressed_chunks_queue;
84         struct compressor_thread_data *thread_data;
85         unsigned num_threads;
86         unsigned num_started_threads;
87
88         struct message *msgs;
89         size_t num_messages;
90
91         struct list_head available_msgs;
92         struct list_head submitted_msgs;
93         struct message *next_submit_msg;
94         struct message *next_ready_msg;
95         size_t next_chunk_idx;
96 };
97
98 static unsigned
99 get_default_num_threads(void)
100 {
101         long n;
102 #ifdef __WIN32__
103         n = win32_get_number_of_processors();
104 #else
105         n = sysconf(_SC_NPROCESSORS_ONLN);
106 #endif
107         if (n < 1 || n >= UINT_MAX) {
108                 WARNING("Failed to determine number of processors; assuming 1.");
109                 return 1;
110         }
111         return n;
112 }
113
114 static u64
115 get_avail_memory(void)
116 {
117 #ifdef __WIN32__
118         u64 phys_bytes = win32_get_avail_memory();
119         if (phys_bytes == 0)
120                 goto default_size;
121         return phys_bytes;
122 #elif defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
123         long page_size = sysconf(_SC_PAGESIZE);
124         long num_pages = sysconf(_SC_PHYS_PAGES);
125         if (page_size <= 0 || num_pages <= 0)
126                 goto default_size;
127         return ((u64)page_size * (u64)num_pages);
128 #else
129         int mib[2] = {CTL_HW, HW_MEMSIZE};
130         u64 memsize;
131         size_t len = sizeof(memsize);
132         if (sysctl(mib, ARRAY_LEN(mib), &memsize, &len, NULL, 0) < 0 || len != 8)
133                 goto default_size;
134         return memsize;
135 #endif
136
137 default_size:
138         WARNING("Failed to determine available memory; assuming 1 GiB");
139         return 1U << 30;
140 }
141
142 static int
143 message_queue_init(struct message_queue *q)
144 {
145         if (pthread_mutex_init(&q->lock, NULL)) {
146                 ERROR_WITH_ERRNO("Failed to initialize mutex");
147                 goto err;
148         }
149         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
150                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
151                 goto err_destroy_lock;
152         }
153         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
154                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
155                 goto err_destroy_msg_avail_cond;
156         }
157         INIT_LIST_HEAD(&q->list);
158         return 0;
159
160 err_destroy_msg_avail_cond:
161         pthread_cond_destroy(&q->msg_avail_cond);
162 err_destroy_lock:
163         pthread_mutex_destroy(&q->lock);
164 err:
165         return WIMLIB_ERR_NOMEM;
166 }
167
168 static void
169 message_queue_destroy(struct message_queue *q)
170 {
171         if (q->list.next != NULL) {
172                 pthread_mutex_destroy(&q->lock);
173                 pthread_cond_destroy(&q->msg_avail_cond);
174                 pthread_cond_destroy(&q->space_avail_cond);
175         }
176 }
177
178 static void
179 message_queue_put(struct message_queue *q, struct message *msg)
180 {
181         pthread_mutex_lock(&q->lock);
182         list_add_tail(&msg->list, &q->list);
183         pthread_cond_signal(&q->msg_avail_cond);
184         pthread_mutex_unlock(&q->lock);
185 }
186
187 static struct message *
188 message_queue_get(struct message_queue *q)
189 {
190         struct message *msg;
191
192         pthread_mutex_lock(&q->lock);
193         while (list_empty(&q->list) && !q->terminating)
194                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
195         if (!q->terminating) {
196                 msg = list_entry(q->list.next, struct message, list);
197                 list_del(&msg->list);
198         } else
199                 msg = NULL;
200         pthread_mutex_unlock(&q->lock);
201         return msg;
202 }
203
204 static void
205 message_queue_terminate(struct message_queue *q)
206 {
207         pthread_mutex_lock(&q->lock);
208         q->terminating = true;
209         pthread_cond_broadcast(&q->msg_avail_cond);
210         pthread_mutex_unlock(&q->lock);
211 }
212
213 static int
214 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
215 {
216         msg->num_alloc_chunks = num_chunks;
217         for (size_t i = 0; i < num_chunks; i++) {
218                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
219                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
220                 if (msg->compressed_chunks[i] == NULL ||
221                     msg->uncompressed_chunks[i] == NULL)
222                         return WIMLIB_ERR_NOMEM;
223         }
224         return 0;
225 }
226
227 static void
228 destroy_message(struct message *msg)
229 {
230         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
231                 FREE(msg->compressed_chunks[i]);
232                 FREE(msg->uncompressed_chunks[i]);
233         }
234 }
235
236 static void
237 free_messages(struct message *msgs, size_t num_messages)
238 {
239         if (msgs) {
240                 for (size_t i = 0; i < num_messages; i++)
241                         destroy_message(&msgs[i]);
242                 FREE(msgs);
243         }
244 }
245
246 static struct message *
247 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
248 {
249         struct message *msgs;
250
251         msgs = CALLOC(count, sizeof(struct message));
252         if (msgs == NULL)
253                 return NULL;
254         for (size_t i = 0; i < count; i++) {
255                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
256                         free_messages(msgs, count);
257                         return NULL;
258                 }
259         }
260         return msgs;
261 }
262
263 static void
264 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
265 {
266
267         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
268                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
269                 msg->compressed_chunk_sizes[i] =
270                         wimlib_compress(msg->uncompressed_chunks[i],
271                                         msg->uncompressed_chunk_sizes[i],
272                                         msg->compressed_chunks[i],
273                                         msg->uncompressed_chunk_sizes[i] - 1,
274                                         compressor);
275         }
276 }
277
278 static void *
279 compressor_thread_proc(void *arg)
280 {
281         struct compressor_thread_data *params = arg;
282         struct message *msg;
283
284         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
285                 compress_chunks(msg, params->compressor);
286                 message_queue_put(params->compressed_chunks_queue, msg);
287         }
288         return NULL;
289 }
290
291 static void
292 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
293 {
294         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
295         unsigned i;
296
297         if (ctx == NULL)
298                 return;
299
300         if (ctx->num_started_threads != 0) {
301                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
302                 message_queue_terminate(&ctx->chunks_to_compress_queue);
303
304                 for (i = 0; i < ctx->num_started_threads; i++)
305                         pthread_join(ctx->thread_data[i].thread, NULL);
306         }
307
308         message_queue_destroy(&ctx->chunks_to_compress_queue);
309         message_queue_destroy(&ctx->compressed_chunks_queue);
310
311         if (ctx->thread_data != NULL)
312                 for (i = 0; i < ctx->num_threads; i++)
313                         wimlib_free_compressor(ctx->thread_data[i].compressor);
314
315         FREE(ctx->thread_data);
316
317         free_messages(ctx->msgs, ctx->num_messages);
318
319         FREE(ctx);
320 }
321
322 static void
323 submit_compression_msg(struct parallel_chunk_compressor *ctx)
324 {
325         struct message *msg = ctx->next_submit_msg;
326
327         msg->complete = false;
328         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
329         message_queue_put(&ctx->chunks_to_compress_queue, msg);
330         ctx->next_submit_msg = NULL;
331 }
332
333 static bool
334 parallel_chunk_compressor_submit_chunk(struct chunk_compressor *_ctx,
335                                        const void *chunk, size_t size)
336 {
337         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
338         struct message *msg;
339
340         wimlib_assert(size > 0);
341         wimlib_assert(size <= ctx->base.out_chunk_size);
342
343         if (ctx->next_submit_msg) {
344                 msg = ctx->next_submit_msg;
345         } else {
346                 if (list_empty(&ctx->available_msgs))
347                         return false;
348
349                 msg = list_entry(ctx->available_msgs.next, struct message, list);
350                 list_del(&msg->list);
351                 ctx->next_submit_msg = msg;
352                 msg->num_filled_chunks = 0;
353         }
354
355         memcpy(msg->uncompressed_chunks[msg->num_filled_chunks], chunk, size);
356         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = size;
357         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
358                 submit_compression_msg(ctx);
359         return true;
360 }
361
362 static bool
363 parallel_chunk_compressor_get_chunk(struct chunk_compressor *_ctx,
364                                     const void **cdata_ret, u32 *csize_ret,
365                                     u32 *usize_ret)
366 {
367         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
368         struct message *msg;
369
370
371         if (ctx->next_submit_msg)
372                 submit_compression_msg(ctx);
373
374         if (ctx->next_ready_msg) {
375                 msg = ctx->next_ready_msg;
376         } else {
377                 if (list_empty(&ctx->submitted_msgs))
378                         return false;
379
380                 while (!(msg = list_entry(ctx->submitted_msgs.next,
381                                           struct message,
382                                           submission_list))->complete)
383                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
384
385                 ctx->next_ready_msg = msg;
386                 ctx->next_chunk_idx = 0;
387         }
388
389         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
390                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
391                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
392         } else {
393                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
394                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
395         }
396         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
397
398         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
399                 list_del(&msg->submission_list);
400                 list_add_tail(&msg->list, &ctx->available_msgs);
401                 ctx->next_ready_msg = NULL;
402         }
403         return true;
404 }
405
406 int
407 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
408                               unsigned num_threads, u64 max_memory,
409                               struct chunk_compressor **compressor_ret)
410 {
411         u64 approx_mem_required;
412         size_t chunks_per_msg;
413         size_t msgs_per_thread;
414         struct parallel_chunk_compressor *ctx;
415         unsigned i;
416         int ret;
417         unsigned desired_num_threads;
418
419         wimlib_assert(out_chunk_size > 0);
420
421         if (num_threads == 0)
422                 num_threads = get_default_num_threads();
423
424         if (num_threads == 1) {
425                 DEBUG("Only 1 thread; Not bothering with "
426                       "parallel chunk compressor.");
427                 return -1;
428         }
429
430         if (max_memory == 0)
431                 max_memory = get_avail_memory();
432
433         desired_num_threads = num_threads;
434
435         if (out_chunk_size < ((u32)1 << 23)) {
436                 chunks_per_msg = MAX_CHUNKS_PER_MSG;
437                 msgs_per_thread = 2;
438         } else {
439                 /* Big chunks: Just have one buffer per thread --- more would
440                  * just waste memory.  */
441                 chunks_per_msg = 1;
442                 msgs_per_thread = 1;
443         }
444         for (;;) {
445                 approx_mem_required =
446                         (u64)chunks_per_msg *
447                         (u64)msgs_per_thread *
448                         (u64)num_threads *
449                         (u64)out_chunk_size
450                         + out_chunk_size
451                         + 1000000
452                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
453                                                                             out_chunk_size,
454                                                                             NULL);
455                 if (approx_mem_required <= max_memory)
456                         break;
457
458                 if (chunks_per_msg > 1)
459                         chunks_per_msg--;
460                 else if (msgs_per_thread > 1)
461                         msgs_per_thread--;
462                 else if (num_threads > 1)
463                         num_threads--;
464                 else
465                         break;
466         }
467
468         if (num_threads < desired_num_threads) {
469                 WARNING("Wanted to use %u threads, but limiting to %u "
470                         "to fit in available memory!",
471                         desired_num_threads, num_threads);
472         }
473
474         if (num_threads == 1) {
475                 DEBUG("Only 1 thread; Not bothering with "
476                       "parallel chunk compressor.");
477                 return -2;
478         }
479
480         ret = WIMLIB_ERR_NOMEM;
481         ctx = CALLOC(1, sizeof(*ctx));
482         if (ctx == NULL)
483                 goto err;
484
485         ctx->base.out_ctype = out_ctype;
486         ctx->base.out_chunk_size = out_chunk_size;
487         ctx->base.num_threads = num_threads;
488         ctx->base.destroy = parallel_chunk_compressor_destroy;
489         ctx->base.submit_chunk = parallel_chunk_compressor_submit_chunk;
490         ctx->base.get_chunk = parallel_chunk_compressor_get_chunk;
491
492         ctx->num_threads = num_threads;
493
494         ret = message_queue_init(&ctx->chunks_to_compress_queue);
495         if (ret)
496                 goto err;
497
498         ret = message_queue_init(&ctx->compressed_chunks_queue);
499         if (ret)
500                 goto err;
501
502         ret = WIMLIB_ERR_NOMEM;
503         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
504         if (ctx->thread_data == NULL)
505                 goto err;
506
507         for (i = 0; i < num_threads; i++) {
508                 struct compressor_thread_data *dat;
509
510                 dat = &ctx->thread_data[i];
511
512                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
513                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
514                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
515                                                NULL, &dat->compressor);
516                 if (ret)
517                         goto err;
518         }
519
520         for (ctx->num_started_threads = 0;
521              ctx->num_started_threads < num_threads;
522              ctx->num_started_threads++)
523         {
524                 DEBUG("pthread_create thread %u of %u",
525                       ctx->num_started_threads + 1, num_threads);
526                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
527                                      NULL,
528                                      compressor_thread_proc,
529                                      &ctx->thread_data[ctx->num_started_threads]);
530                 if (ret) {
531                         errno = ret;
532                         ret = WIMLIB_ERR_NOMEM;
533                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
534                                            ctx->num_started_threads + 1,
535                                            num_threads);
536                         goto err;
537                 }
538         }
539
540         ret = WIMLIB_ERR_NOMEM;
541         ctx->num_messages = num_threads * msgs_per_thread;
542         ctx->msgs = allocate_messages(ctx->num_messages,
543                                       chunks_per_msg, out_chunk_size);
544         if (ctx->msgs == NULL)
545                 goto err;
546
547         INIT_LIST_HEAD(&ctx->available_msgs);
548         for (size_t i = 0; i < ctx->num_messages; i++)
549                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
550
551         INIT_LIST_HEAD(&ctx->submitted_msgs);
552
553         *compressor_ret = &ctx->base;
554         return 0;
555
556 err:
557         parallel_chunk_compressor_destroy(&ctx->base);
558         return ret;
559 }
560
561 #endif /* ENABLE_MULTITHREADED_COMPRESSION */