]> wimlib.net Git - wimlib/blob - src/compress_parallel.c
d0ce5266464cb4021797cbd2a81c4ed5c5c652c3
[wimlib] / src / compress_parallel.c
1 /*
2  * compress_parallel.c
3  *
4  * Compress chunks of data (parallel version).
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free Software
14  * Foundation; either version 3 of the License, or (at your option) any later
15  * version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License along with
22  * wimlib; if not, see http://www.gnu.org/licenses/.
23  */
24
25 #ifdef HAVE_CONFIG_H
26 #  include "config.h"
27 #endif
28
29 #include "wimlib/assert.h"
30 #include "wimlib/chunk_compressor.h"
31 #include "wimlib/error.h"
32 #include "wimlib/list.h"
33 #include "wimlib/util.h"
34 #ifdef __WIN32__
35 #  include "wimlib/win32.h" /* win32_get_number_of_processors() */
36 #endif
37
38 #include <errno.h>
39 #include <limits.h>
40 #include <pthread.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <unistd.h>
44
45 struct message_queue {
46         struct list_head list;
47         pthread_mutex_t lock;
48         pthread_cond_t msg_avail_cond;
49         pthread_cond_t space_avail_cond;
50         bool terminating;
51 };
52
53 struct compressor_thread_data {
54         pthread_t thread;
55         struct message_queue *chunks_to_compress_queue;
56         struct message_queue *compressed_chunks_queue;
57         struct wimlib_compressor *compressor;
58 };
59
60 #define MAX_CHUNKS_PER_MSG 2
61
62 struct message {
63         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
64         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
65         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
66         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
67         size_t num_filled_chunks;
68         size_t num_alloc_chunks;
69         struct list_head list;
70         bool complete;
71         struct list_head submission_list;
72 };
73
74 struct parallel_chunk_compressor {
75         struct chunk_compressor base;
76
77         struct message_queue chunks_to_compress_queue;
78         struct message_queue compressed_chunks_queue;
79         struct compressor_thread_data *thread_data;
80         unsigned num_threads;
81         unsigned num_started_threads;
82
83         struct message *msgs;
84         size_t num_messages;
85
86         struct list_head available_msgs;
87         struct list_head submitted_msgs;
88         struct message *next_submit_msg;
89         struct message *next_ready_msg;
90         size_t next_chunk_idx;
91 };
92
93 static unsigned
94 get_default_num_threads(void)
95 {
96         long n;
97 #ifdef __WIN32__
98         n = win32_get_number_of_processors();
99 #else
100         n = sysconf(_SC_NPROCESSORS_ONLN);
101 #endif
102         if (n < 1 || n >= UINT_MAX) {
103                 WARNING("Failed to determine number of processors; assuming 1.");
104                 return 1;
105         }
106         return n;
107 }
108
109 static u64
110 get_avail_memory(void)
111 {
112 #ifdef __WIN32__
113         u64 phys_bytes = win32_get_avail_memory();
114         if (phys_bytes == 0)
115                 goto default_size;
116         return phys_bytes;
117 #else
118         long page_size = sysconf(_SC_PAGESIZE);
119         long num_pages = sysconf(_SC_PHYS_PAGES);
120         if (page_size <= 0 || num_pages <= 0)
121                 goto default_size;
122         return ((u64)page_size * (u64)num_pages);
123 #endif
124
125 default_size:
126         WARNING("Failed to determine available memory; assuming 1 GiB");
127         return 1U << 30;
128 }
129
130 static int
131 message_queue_init(struct message_queue *q)
132 {
133         if (pthread_mutex_init(&q->lock, NULL)) {
134                 ERROR_WITH_ERRNO("Failed to initialize mutex");
135                 goto err;
136         }
137         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
138                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
139                 goto err_destroy_lock;
140         }
141         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
142                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
143                 goto err_destroy_msg_avail_cond;
144         }
145         INIT_LIST_HEAD(&q->list);
146         return 0;
147
148 err_destroy_msg_avail_cond:
149         pthread_cond_destroy(&q->msg_avail_cond);
150 err_destroy_lock:
151         pthread_mutex_destroy(&q->lock);
152 err:
153         return WIMLIB_ERR_NOMEM;
154 }
155
156 static void
157 message_queue_destroy(struct message_queue *q)
158 {
159         if (q->list.next != NULL) {
160                 pthread_mutex_destroy(&q->lock);
161                 pthread_cond_destroy(&q->msg_avail_cond);
162                 pthread_cond_destroy(&q->space_avail_cond);
163         }
164 }
165
166 static void
167 message_queue_put(struct message_queue *q, struct message *msg)
168 {
169         pthread_mutex_lock(&q->lock);
170         list_add_tail(&msg->list, &q->list);
171         pthread_cond_signal(&q->msg_avail_cond);
172         pthread_mutex_unlock(&q->lock);
173 }
174
175 static struct message *
176 message_queue_get(struct message_queue *q)
177 {
178         struct message *msg;
179
180         pthread_mutex_lock(&q->lock);
181         while (list_empty(&q->list) && !q->terminating)
182                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
183         if (!q->terminating) {
184                 msg = list_entry(q->list.next, struct message, list);
185                 list_del(&msg->list);
186         } else
187                 msg = NULL;
188         pthread_mutex_unlock(&q->lock);
189         return msg;
190 }
191
192 static void
193 message_queue_terminate(struct message_queue *q)
194 {
195         pthread_mutex_lock(&q->lock);
196         q->terminating = true;
197         pthread_cond_broadcast(&q->msg_avail_cond);
198         pthread_mutex_unlock(&q->lock);
199 }
200
201 static int
202 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
203 {
204         msg->num_alloc_chunks = num_chunks;
205         for (size_t i = 0; i < num_chunks; i++) {
206                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
207                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
208                 if (msg->compressed_chunks[i] == NULL ||
209                     msg->uncompressed_chunks[i] == NULL)
210                         return WIMLIB_ERR_NOMEM;
211         }
212         return 0;
213 }
214
215 static void
216 destroy_message(struct message *msg)
217 {
218         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
219                 FREE(msg->compressed_chunks[i]);
220                 FREE(msg->uncompressed_chunks[i]);
221         }
222 }
223
224 static void
225 free_messages(struct message *msgs, size_t num_messages)
226 {
227         if (msgs) {
228                 for (size_t i = 0; i < num_messages; i++)
229                         destroy_message(&msgs[i]);
230                 FREE(msgs);
231         }
232 }
233
234 static struct message *
235 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
236 {
237         struct message *msgs;
238
239         msgs = CALLOC(count, sizeof(struct message));
240         if (msgs == NULL)
241                 return NULL;
242         for (size_t i = 0; i < count; i++) {
243                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
244                         free_messages(msgs, count);
245                         return NULL;
246                 }
247         }
248         return msgs;
249 }
250
251 static void
252 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
253 {
254
255         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
256                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
257                 msg->compressed_chunk_sizes[i] =
258                         wimlib_compress(msg->uncompressed_chunks[i],
259                                         msg->uncompressed_chunk_sizes[i],
260                                         msg->compressed_chunks[i],
261                                         msg->uncompressed_chunk_sizes[i] - 1,
262                                         compressor);
263         }
264 }
265
266 static void *
267 compressor_thread_proc(void *arg)
268 {
269         struct compressor_thread_data *params = arg;
270         struct message *msg;
271
272         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
273                 compress_chunks(msg, params->compressor);
274                 message_queue_put(params->compressed_chunks_queue, msg);
275         }
276         return NULL;
277 }
278
279 static void
280 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
281 {
282         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
283         unsigned i;
284
285         if (ctx == NULL)
286                 return;
287
288         if (ctx->num_started_threads != 0) {
289                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
290                 message_queue_terminate(&ctx->chunks_to_compress_queue);
291
292                 for (i = 0; i < ctx->num_started_threads; i++)
293                         pthread_join(ctx->thread_data[i].thread, NULL);
294         }
295
296         message_queue_destroy(&ctx->chunks_to_compress_queue);
297         message_queue_destroy(&ctx->compressed_chunks_queue);
298
299         if (ctx->thread_data != NULL)
300                 for (i = 0; i < ctx->num_threads; i++)
301                         wimlib_free_compressor(ctx->thread_data[i].compressor);
302
303         FREE(ctx->thread_data);
304
305         free_messages(ctx->msgs, ctx->num_messages);
306
307         FREE(ctx);
308 }
309
310 static void
311 submit_compression_msg(struct parallel_chunk_compressor *ctx)
312 {
313         struct message *msg = ctx->next_submit_msg;
314
315         msg->complete = false;
316         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
317         message_queue_put(&ctx->chunks_to_compress_queue, msg);
318         ctx->next_submit_msg = NULL;
319 }
320
321 static bool
322 parallel_chunk_compressor_submit_chunk(struct chunk_compressor *_ctx,
323                                        const void *chunk, size_t size)
324 {
325         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
326         struct message *msg;
327
328         wimlib_assert(size > 0);
329         wimlib_assert(size <= ctx->base.out_chunk_size);
330
331         if (ctx->next_submit_msg) {
332                 msg = ctx->next_submit_msg;
333         } else {
334                 if (list_empty(&ctx->available_msgs))
335                         return false;
336
337                 msg = list_entry(ctx->available_msgs.next, struct message, list);
338                 list_del(&msg->list);
339                 ctx->next_submit_msg = msg;
340                 msg->num_filled_chunks = 0;
341         }
342
343         memcpy(msg->uncompressed_chunks[msg->num_filled_chunks], chunk, size);
344         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = size;
345         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
346                 submit_compression_msg(ctx);
347         return true;
348 }
349
350 static bool
351 parallel_chunk_compressor_get_chunk(struct chunk_compressor *_ctx,
352                                     const void **cdata_ret, unsigned *csize_ret,
353                                     unsigned *usize_ret)
354 {
355         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
356         struct message *msg;
357
358
359         if (ctx->next_submit_msg)
360                 submit_compression_msg(ctx);
361
362         if (ctx->next_ready_msg) {
363                 msg = ctx->next_ready_msg;
364         } else {
365                 if (list_empty(&ctx->submitted_msgs))
366                         return false;
367
368                 while (!(msg = list_entry(ctx->submitted_msgs.next,
369                                           struct message,
370                                           submission_list))->complete)
371                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
372
373                 ctx->next_ready_msg = msg;
374                 ctx->next_chunk_idx = 0;
375         }
376
377         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
378                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
379                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
380         } else {
381                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
382                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
383         }
384         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
385
386         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
387                 list_del(&msg->submission_list);
388                 list_add_tail(&msg->list, &ctx->available_msgs);
389                 ctx->next_ready_msg = NULL;
390         }
391         return true;
392 }
393
394 int
395 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
396                               unsigned num_threads, u64 max_memory,
397                               struct chunk_compressor **compressor_ret)
398 {
399         u64 approx_mem_required;
400         size_t chunks_per_msg;
401         size_t msgs_per_thread;
402         struct parallel_chunk_compressor *ctx;
403         unsigned i;
404         int ret;
405         unsigned desired_num_threads;
406
407         wimlib_assert(out_chunk_size > 0);
408         wimlib_assert(out_ctype != WIMLIB_COMPRESSION_TYPE_NONE);
409
410         if (num_threads == 0)
411                 num_threads = get_default_num_threads();
412
413         if (num_threads == 1) {
414                 DEBUG("Only 1 thread; Not bothering with "
415                       "parallel chunk compressor.");
416                 return -1;
417         }
418
419         if (max_memory == 0)
420                 max_memory = get_avail_memory();
421
422         desired_num_threads = num_threads;
423
424         chunks_per_msg = MAX_CHUNKS_PER_MSG;
425         msgs_per_thread = 2;
426         for (;;) {
427                 approx_mem_required =
428                         (u64)chunks_per_msg *
429                         (u64)msgs_per_thread *
430                         (u64)num_threads *
431                         (u64)out_chunk_size
432                         + 1000000
433                         + (out_chunk_size * num_threads * 4);
434                 if (approx_mem_required <= max_memory)
435                         break;
436
437                 if (chunks_per_msg > 1)
438                         chunks_per_msg--;
439                 else if (msgs_per_thread > 1)
440                         msgs_per_thread--;
441                 else if (num_threads > 1)
442                         num_threads--;
443                 else
444                         break;
445         }
446
447         if (num_threads < desired_num_threads) {
448                 WARNING("Wanted to use %u threads, but limiting to %u "
449                         "to fit in available memory!",
450                         desired_num_threads, num_threads);
451         }
452
453         if (num_threads == 1) {
454                 DEBUG("Only 1 thread; Not bothering with "
455                       "parallel chunk compressor.");
456                 return -2;
457         }
458
459         ret = WIMLIB_ERR_NOMEM;
460         ctx = CALLOC(1, sizeof(*ctx));
461         if (ctx == NULL)
462                 goto err;
463
464         ctx->base.out_ctype = out_ctype;
465         ctx->base.out_chunk_size = out_chunk_size;
466         ctx->base.num_threads = num_threads;
467         ctx->base.destroy = parallel_chunk_compressor_destroy;
468         ctx->base.submit_chunk = parallel_chunk_compressor_submit_chunk;
469         ctx->base.get_chunk = parallel_chunk_compressor_get_chunk;
470
471         ctx->num_threads = num_threads;
472
473         ret = message_queue_init(&ctx->chunks_to_compress_queue);
474         if (ret)
475                 goto err;
476
477         ret = message_queue_init(&ctx->compressed_chunks_queue);
478         if (ret)
479                 goto err;
480
481         ret = WIMLIB_ERR_NOMEM;
482         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
483         if (ctx->thread_data == NULL)
484                 goto err;
485
486         for (i = 0; i < num_threads; i++) {
487                 struct compressor_thread_data *dat;
488
489                 dat = &ctx->thread_data[i];
490
491                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
492                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
493                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
494                                                NULL, &dat->compressor);
495                 if (ret)
496                         goto err;
497         }
498
499         for (ctx->num_started_threads = 0;
500              ctx->num_started_threads < num_threads;
501              ctx->num_started_threads++)
502         {
503                 DEBUG("pthread_create thread %u of %u",
504                       ctx->num_started_threads + 1, num_threads);
505                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
506                                      NULL,
507                                      compressor_thread_proc,
508                                      &ctx->thread_data[ctx->num_started_threads]);
509                 if (ret) {
510                         errno = ret;
511                         ret = WIMLIB_ERR_NOMEM;
512                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u");
513                         goto err;
514                 }
515         }
516
517         ret = WIMLIB_ERR_NOMEM;
518         ctx->num_messages = num_threads * msgs_per_thread;
519         ctx->msgs = allocate_messages(ctx->num_messages,
520                                       chunks_per_msg, out_chunk_size);
521         if (ctx->msgs == NULL)
522                 goto err;
523
524         INIT_LIST_HEAD(&ctx->available_msgs);
525         for (size_t i = 0; i < ctx->num_messages; i++)
526                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
527
528         INIT_LIST_HEAD(&ctx->submitted_msgs);
529
530         *compressor_ret = &ctx->base;
531         return 0;
532
533 err:
534         parallel_chunk_compressor_destroy(&ctx->base);
535         return ret;
536 }