]> wimlib.net Git - wimlib/blob - src/compress_parallel.c
Remove some unnecessary configure options
[wimlib] / src / compress_parallel.c
1 /*
2  * compress_parallel.c
3  *
4  * Compress chunks of data (parallel version).
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option) any
13  * later version.
14  *
15  * This file is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this file; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #include <errno.h>
29 #include <pthread.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "wimlib/assert.h"
34 #include "wimlib/chunk_compressor.h"
35 #include "wimlib/error.h"
36 #include "wimlib/list.h"
37 #include "wimlib/util.h"
38
39 struct message_queue {
40         struct list_head list;
41         pthread_mutex_t lock;
42         pthread_cond_t msg_avail_cond;
43         pthread_cond_t space_avail_cond;
44         bool terminating;
45 };
46
47 struct compressor_thread_data {
48         pthread_t thread;
49         struct message_queue *chunks_to_compress_queue;
50         struct message_queue *compressed_chunks_queue;
51         struct wimlib_compressor *compressor;
52 };
53
54 #define MAX_CHUNKS_PER_MSG 16
55
56 struct message {
57         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
58         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
59         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
60         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
61         size_t num_filled_chunks;
62         size_t num_alloc_chunks;
63         struct list_head list;
64         bool complete;
65         struct list_head submission_list;
66 };
67
68 struct parallel_chunk_compressor {
69         struct chunk_compressor base;
70
71         struct message_queue chunks_to_compress_queue;
72         struct message_queue compressed_chunks_queue;
73         struct compressor_thread_data *thread_data;
74         unsigned num_thread_data;
75         unsigned num_started_threads;
76
77         struct message *msgs;
78         size_t num_messages;
79
80         struct list_head available_msgs;
81         struct list_head submitted_msgs;
82         struct message *next_submit_msg;
83         struct message *next_ready_msg;
84         size_t next_chunk_idx;
85 };
86
87
88
89 static int
90 message_queue_init(struct message_queue *q)
91 {
92         if (pthread_mutex_init(&q->lock, NULL)) {
93                 ERROR_WITH_ERRNO("Failed to initialize mutex");
94                 goto err;
95         }
96         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
97                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
98                 goto err_destroy_lock;
99         }
100         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
101                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
102                 goto err_destroy_msg_avail_cond;
103         }
104         INIT_LIST_HEAD(&q->list);
105         return 0;
106
107 err_destroy_msg_avail_cond:
108         pthread_cond_destroy(&q->msg_avail_cond);
109 err_destroy_lock:
110         pthread_mutex_destroy(&q->lock);
111 err:
112         return WIMLIB_ERR_NOMEM;
113 }
114
115 static void
116 message_queue_destroy(struct message_queue *q)
117 {
118         if (q->list.next != NULL) {
119                 pthread_mutex_destroy(&q->lock);
120                 pthread_cond_destroy(&q->msg_avail_cond);
121                 pthread_cond_destroy(&q->space_avail_cond);
122         }
123 }
124
125 static void
126 message_queue_put(struct message_queue *q, struct message *msg)
127 {
128         pthread_mutex_lock(&q->lock);
129         list_add_tail(&msg->list, &q->list);
130         pthread_cond_signal(&q->msg_avail_cond);
131         pthread_mutex_unlock(&q->lock);
132 }
133
134 static struct message *
135 message_queue_get(struct message_queue *q)
136 {
137         struct message *msg;
138
139         pthread_mutex_lock(&q->lock);
140         while (list_empty(&q->list) && !q->terminating)
141                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
142         if (!q->terminating) {
143                 msg = list_entry(q->list.next, struct message, list);
144                 list_del(&msg->list);
145         } else
146                 msg = NULL;
147         pthread_mutex_unlock(&q->lock);
148         return msg;
149 }
150
151 static void
152 message_queue_terminate(struct message_queue *q)
153 {
154         pthread_mutex_lock(&q->lock);
155         q->terminating = true;
156         pthread_cond_broadcast(&q->msg_avail_cond);
157         pthread_mutex_unlock(&q->lock);
158 }
159
160 static int
161 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
162 {
163         msg->num_alloc_chunks = num_chunks;
164         for (size_t i = 0; i < num_chunks; i++) {
165                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
166                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
167                 if (msg->compressed_chunks[i] == NULL ||
168                     msg->uncompressed_chunks[i] == NULL)
169                         return WIMLIB_ERR_NOMEM;
170         }
171         return 0;
172 }
173
174 static void
175 destroy_message(struct message *msg)
176 {
177         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
178                 FREE(msg->compressed_chunks[i]);
179                 FREE(msg->uncompressed_chunks[i]);
180         }
181 }
182
183 static void
184 free_messages(struct message *msgs, size_t num_messages)
185 {
186         if (msgs) {
187                 for (size_t i = 0; i < num_messages; i++)
188                         destroy_message(&msgs[i]);
189                 FREE(msgs);
190         }
191 }
192
193 static struct message *
194 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
195 {
196         struct message *msgs;
197
198         msgs = CALLOC(count, sizeof(struct message));
199         if (msgs == NULL)
200                 return NULL;
201         for (size_t i = 0; i < count; i++) {
202                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
203                         free_messages(msgs, count);
204                         return NULL;
205                 }
206         }
207         return msgs;
208 }
209
210 static void
211 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
212 {
213
214         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
215                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
216                 msg->compressed_chunk_sizes[i] =
217                         wimlib_compress(msg->uncompressed_chunks[i],
218                                         msg->uncompressed_chunk_sizes[i],
219                                         msg->compressed_chunks[i],
220                                         msg->uncompressed_chunk_sizes[i] - 1,
221                                         compressor);
222         }
223 }
224
225 static void *
226 compressor_thread_proc(void *arg)
227 {
228         struct compressor_thread_data *params = arg;
229         struct message *msg;
230
231         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
232                 compress_chunks(msg, params->compressor);
233                 message_queue_put(params->compressed_chunks_queue, msg);
234         }
235         return NULL;
236 }
237
238 static void
239 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
240 {
241         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
242         unsigned i;
243
244         if (ctx == NULL)
245                 return;
246
247         if (ctx->num_started_threads != 0) {
248                 message_queue_terminate(&ctx->chunks_to_compress_queue);
249
250                 for (i = 0; i < ctx->num_started_threads; i++)
251                         pthread_join(ctx->thread_data[i].thread, NULL);
252         }
253
254         message_queue_destroy(&ctx->chunks_to_compress_queue);
255         message_queue_destroy(&ctx->compressed_chunks_queue);
256
257         if (ctx->thread_data != NULL)
258                 for (i = 0; i < ctx->num_thread_data; i++)
259                         wimlib_free_compressor(ctx->thread_data[i].compressor);
260
261         FREE(ctx->thread_data);
262
263         free_messages(ctx->msgs, ctx->num_messages);
264
265         FREE(ctx);
266 }
267
268 static void
269 submit_compression_msg(struct parallel_chunk_compressor *ctx)
270 {
271         struct message *msg = ctx->next_submit_msg;
272
273         msg->complete = false;
274         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
275         message_queue_put(&ctx->chunks_to_compress_queue, msg);
276         ctx->next_submit_msg = NULL;
277 }
278
279 static void *
280 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
281 {
282         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
283         struct message *msg;
284
285         if (ctx->next_submit_msg) {
286                 msg = ctx->next_submit_msg;
287         } else {
288                 if (list_empty(&ctx->available_msgs))
289                         return NULL;
290
291                 msg = list_entry(ctx->available_msgs.next, struct message, list);
292                 list_del(&msg->list);
293                 ctx->next_submit_msg = msg;
294                 msg->num_filled_chunks = 0;
295         }
296
297         return msg->uncompressed_chunks[msg->num_filled_chunks];
298 }
299
300 static void
301 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
302 {
303         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
304         struct message *msg;
305
306         wimlib_assert(usize > 0);
307         wimlib_assert(usize <= ctx->base.out_chunk_size);
308         wimlib_assert(ctx->next_submit_msg);
309
310         msg = ctx->next_submit_msg;
311         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
312         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
313                 submit_compression_msg(ctx);
314 }
315
316 static bool
317 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
318                                                  const void **cdata_ret, u32 *csize_ret,
319                                                  u32 *usize_ret)
320 {
321         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
322         struct message *msg;
323
324         if (ctx->next_submit_msg)
325                 submit_compression_msg(ctx);
326
327         if (ctx->next_ready_msg) {
328                 msg = ctx->next_ready_msg;
329         } else {
330                 if (list_empty(&ctx->submitted_msgs))
331                         return false;
332
333                 while (!(msg = list_entry(ctx->submitted_msgs.next,
334                                           struct message,
335                                           submission_list))->complete)
336                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
337
338                 ctx->next_ready_msg = msg;
339                 ctx->next_chunk_idx = 0;
340         }
341
342         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
343                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
344                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
345         } else {
346                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
347                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
348         }
349         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
350
351         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
352                 list_del(&msg->submission_list);
353                 list_add_tail(&msg->list, &ctx->available_msgs);
354                 ctx->next_ready_msg = NULL;
355         }
356         return true;
357 }
358
359 int
360 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
361                               unsigned num_threads, u64 max_memory,
362                               struct chunk_compressor **compressor_ret)
363 {
364         u64 approx_mem_required;
365         size_t chunks_per_msg;
366         size_t msgs_per_thread;
367         struct parallel_chunk_compressor *ctx;
368         unsigned i;
369         int ret;
370         unsigned desired_num_threads;
371
372         wimlib_assert(out_chunk_size > 0);
373
374         if (num_threads == 0)
375                 num_threads = get_available_cpus();
376
377         if (num_threads == 1)
378                 return -1;
379
380         if (max_memory == 0)
381                 max_memory = get_available_memory();
382
383         desired_num_threads = num_threads;
384
385         if (out_chunk_size < ((u32)1 << 23)) {
386                 /* Relatively small chunks.  Use 2 messages per thread, each
387                  * with at least 2 chunks.  Use more chunks per message if there
388                  * are lots of threads and/or the chunks are very small.  */
389                 chunks_per_msg = 2;
390                 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
391                 chunks_per_msg = max(chunks_per_msg, 2);
392                 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
393                 msgs_per_thread = 2;
394         } else {
395                 /* Big chunks: Just have one buffer per thread --- more would
396                  * just waste memory.  */
397                 chunks_per_msg = 1;
398                 msgs_per_thread = 1;
399         }
400         for (;;) {
401                 approx_mem_required =
402                         (u64)chunks_per_msg *
403                         (u64)msgs_per_thread *
404                         (u64)num_threads *
405                         (u64)out_chunk_size
406                         + out_chunk_size
407                         + 1000000
408                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
409                                                                             out_chunk_size,
410                                                                             0);
411                 if (approx_mem_required <= max_memory)
412                         break;
413
414                 if (chunks_per_msg > 1)
415                         chunks_per_msg--;
416                 else if (msgs_per_thread > 1)
417                         msgs_per_thread--;
418                 else if (num_threads > 1)
419                         num_threads--;
420                 else
421                         break;
422         }
423
424         if (num_threads < desired_num_threads) {
425                 WARNING("Wanted to use %u threads, but limiting to %u "
426                         "to fit in available memory!",
427                         desired_num_threads, num_threads);
428         }
429
430         if (num_threads == 1)
431                 return -2;
432
433         ret = WIMLIB_ERR_NOMEM;
434         ctx = CALLOC(1, sizeof(*ctx));
435         if (ctx == NULL)
436                 goto err;
437
438         ctx->base.out_ctype = out_ctype;
439         ctx->base.out_chunk_size = out_chunk_size;
440         ctx->base.destroy = parallel_chunk_compressor_destroy;
441         ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
442         ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
443         ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
444
445         ctx->num_thread_data = num_threads;
446
447         ret = message_queue_init(&ctx->chunks_to_compress_queue);
448         if (ret)
449                 goto err;
450
451         ret = message_queue_init(&ctx->compressed_chunks_queue);
452         if (ret)
453                 goto err;
454
455         ret = WIMLIB_ERR_NOMEM;
456         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
457         if (ctx->thread_data == NULL)
458                 goto err;
459
460         for (i = 0; i < num_threads; i++) {
461                 struct compressor_thread_data *dat;
462
463                 dat = &ctx->thread_data[i];
464
465                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
466                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
467                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
468                                                WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
469                                                &dat->compressor);
470                 if (ret)
471                         goto err;
472         }
473
474         for (ctx->num_started_threads = 0;
475              ctx->num_started_threads < num_threads;
476              ctx->num_started_threads++)
477         {
478                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
479                                      NULL,
480                                      compressor_thread_proc,
481                                      &ctx->thread_data[ctx->num_started_threads]);
482                 if (ret) {
483                         errno = ret;
484                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
485                                            ctx->num_started_threads + 1,
486                                            num_threads);
487                         ret = WIMLIB_ERR_NOMEM;
488                         if (ctx->num_started_threads >= 2)
489                                 break;
490                         goto err;
491                 }
492         }
493
494         ctx->base.num_threads = ctx->num_started_threads;
495
496         ret = WIMLIB_ERR_NOMEM;
497         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
498         ctx->msgs = allocate_messages(ctx->num_messages,
499                                       chunks_per_msg, out_chunk_size);
500         if (ctx->msgs == NULL)
501                 goto err;
502
503         INIT_LIST_HEAD(&ctx->available_msgs);
504         for (size_t i = 0; i < ctx->num_messages; i++)
505                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
506
507         INIT_LIST_HEAD(&ctx->submitted_msgs);
508
509         *compressor_ret = &ctx->base;
510         return 0;
511
512 err:
513         parallel_chunk_compressor_destroy(&ctx->base);
514         return ret;
515 }