]> wimlib.net Git - wimlib/blob - src/compress_parallel.c
Make win32.h includable on non-Windows
[wimlib] / src / compress_parallel.c
1 /*
2  * compress_parallel.c
3  *
4  * Compress chunks of data (parallel version).
5  */
6
7 /*
8  * Copyright (C) 2013 Eric Biggers
9  *
10  * This file is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option) any
13  * later version.
14  *
15  * This file is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this file; if not, see http://www.gnu.org/licenses/.
22  */
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #ifdef ENABLE_MULTITHREADED_COMPRESSION
29
30 #include <errno.h>
31 #include <limits.h>
32 #include <pthread.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 #ifdef HAVE_SYS_SYSCTL_H
37 #  include <sys/sysctl.h>
38 #endif
39
40 #include "wimlib/assert.h"
41 #include "wimlib/chunk_compressor.h"
42 #include "wimlib/error.h"
43 #include "wimlib/list.h"
44 #include "wimlib/util.h"
45 #include "wimlib/win32.h" /* win32_get_number_of_processors() */
46
47 struct message_queue {
48         struct list_head list;
49         pthread_mutex_t lock;
50         pthread_cond_t msg_avail_cond;
51         pthread_cond_t space_avail_cond;
52         bool terminating;
53 };
54
55 struct compressor_thread_data {
56         pthread_t thread;
57         struct message_queue *chunks_to_compress_queue;
58         struct message_queue *compressed_chunks_queue;
59         struct wimlib_compressor *compressor;
60 };
61
62 #define MAX_CHUNKS_PER_MSG 16
63
64 struct message {
65         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
66         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
67         u32 uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
68         u32 compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
69         size_t num_filled_chunks;
70         size_t num_alloc_chunks;
71         struct list_head list;
72         bool complete;
73         struct list_head submission_list;
74 };
75
76 struct parallel_chunk_compressor {
77         struct chunk_compressor base;
78
79         struct message_queue chunks_to_compress_queue;
80         struct message_queue compressed_chunks_queue;
81         struct compressor_thread_data *thread_data;
82         unsigned num_thread_data;
83         unsigned num_started_threads;
84
85         struct message *msgs;
86         size_t num_messages;
87
88         struct list_head available_msgs;
89         struct list_head submitted_msgs;
90         struct message *next_submit_msg;
91         struct message *next_ready_msg;
92         size_t next_chunk_idx;
93 };
94
95 static unsigned
96 get_default_num_threads(void)
97 {
98         long n;
99 #ifdef __WIN32__
100         n = win32_get_number_of_processors();
101 #else
102         n = sysconf(_SC_NPROCESSORS_ONLN);
103 #endif
104         if (n < 1 || n >= UINT_MAX) {
105                 WARNING("Failed to determine number of processors; assuming 1.");
106                 return 1;
107         }
108         return n;
109 }
110
111 static u64
112 get_avail_memory(void)
113 {
114 #ifdef __WIN32__
115         u64 phys_bytes = win32_get_avail_memory();
116         if (phys_bytes == 0)
117                 goto default_size;
118         return phys_bytes;
119 #elif defined(_SC_PAGESIZE) && defined(_SC_PHYS_PAGES)
120         long page_size = sysconf(_SC_PAGESIZE);
121         long num_pages = sysconf(_SC_PHYS_PAGES);
122         if (page_size <= 0 || num_pages <= 0)
123                 goto default_size;
124         return ((u64)page_size * (u64)num_pages);
125 #else
126         int mib[2] = {CTL_HW, HW_MEMSIZE};
127         u64 memsize;
128         size_t len = sizeof(memsize);
129         if (sysctl(mib, ARRAY_LEN(mib), &memsize, &len, NULL, 0) < 0 || len != 8)
130                 goto default_size;
131         return memsize;
132 #endif
133
134 default_size:
135         WARNING("Failed to determine available memory; assuming 1 GiB");
136         return 1ULL << 30;
137 }
138
139 static int
140 message_queue_init(struct message_queue *q)
141 {
142         if (pthread_mutex_init(&q->lock, NULL)) {
143                 ERROR_WITH_ERRNO("Failed to initialize mutex");
144                 goto err;
145         }
146         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
147                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
148                 goto err_destroy_lock;
149         }
150         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
151                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
152                 goto err_destroy_msg_avail_cond;
153         }
154         INIT_LIST_HEAD(&q->list);
155         return 0;
156
157 err_destroy_msg_avail_cond:
158         pthread_cond_destroy(&q->msg_avail_cond);
159 err_destroy_lock:
160         pthread_mutex_destroy(&q->lock);
161 err:
162         return WIMLIB_ERR_NOMEM;
163 }
164
165 static void
166 message_queue_destroy(struct message_queue *q)
167 {
168         if (q->list.next != NULL) {
169                 pthread_mutex_destroy(&q->lock);
170                 pthread_cond_destroy(&q->msg_avail_cond);
171                 pthread_cond_destroy(&q->space_avail_cond);
172         }
173 }
174
175 static void
176 message_queue_put(struct message_queue *q, struct message *msg)
177 {
178         pthread_mutex_lock(&q->lock);
179         list_add_tail(&msg->list, &q->list);
180         pthread_cond_signal(&q->msg_avail_cond);
181         pthread_mutex_unlock(&q->lock);
182 }
183
184 static struct message *
185 message_queue_get(struct message_queue *q)
186 {
187         struct message *msg;
188
189         pthread_mutex_lock(&q->lock);
190         while (list_empty(&q->list) && !q->terminating)
191                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
192         if (!q->terminating) {
193                 msg = list_entry(q->list.next, struct message, list);
194                 list_del(&msg->list);
195         } else
196                 msg = NULL;
197         pthread_mutex_unlock(&q->lock);
198         return msg;
199 }
200
201 static void
202 message_queue_terminate(struct message_queue *q)
203 {
204         pthread_mutex_lock(&q->lock);
205         q->terminating = true;
206         pthread_cond_broadcast(&q->msg_avail_cond);
207         pthread_mutex_unlock(&q->lock);
208 }
209
210 static int
211 init_message(struct message *msg, size_t num_chunks, u32 out_chunk_size)
212 {
213         msg->num_alloc_chunks = num_chunks;
214         for (size_t i = 0; i < num_chunks; i++) {
215                 msg->compressed_chunks[i] = MALLOC(out_chunk_size - 1);
216                 msg->uncompressed_chunks[i] = MALLOC(out_chunk_size);
217                 if (msg->compressed_chunks[i] == NULL ||
218                     msg->uncompressed_chunks[i] == NULL)
219                         return WIMLIB_ERR_NOMEM;
220         }
221         return 0;
222 }
223
224 static void
225 destroy_message(struct message *msg)
226 {
227         for (size_t i = 0; i < msg->num_alloc_chunks; i++) {
228                 FREE(msg->compressed_chunks[i]);
229                 FREE(msg->uncompressed_chunks[i]);
230         }
231 }
232
233 static void
234 free_messages(struct message *msgs, size_t num_messages)
235 {
236         if (msgs) {
237                 for (size_t i = 0; i < num_messages; i++)
238                         destroy_message(&msgs[i]);
239                 FREE(msgs);
240         }
241 }
242
243 static struct message *
244 allocate_messages(size_t count, size_t chunks_per_msg, u32 out_chunk_size)
245 {
246         struct message *msgs;
247
248         msgs = CALLOC(count, sizeof(struct message));
249         if (msgs == NULL)
250                 return NULL;
251         for (size_t i = 0; i < count; i++) {
252                 if (init_message(&msgs[i], chunks_per_msg, out_chunk_size)) {
253                         free_messages(msgs, count);
254                         return NULL;
255                 }
256         }
257         return msgs;
258 }
259
260 static void
261 compress_chunks(struct message *msg, struct wimlib_compressor *compressor)
262 {
263
264         for (size_t i = 0; i < msg->num_filled_chunks; i++) {
265                 wimlib_assert(msg->uncompressed_chunk_sizes[i] != 0);
266                 msg->compressed_chunk_sizes[i] =
267                         wimlib_compress(msg->uncompressed_chunks[i],
268                                         msg->uncompressed_chunk_sizes[i],
269                                         msg->compressed_chunks[i],
270                                         msg->uncompressed_chunk_sizes[i] - 1,
271                                         compressor);
272         }
273 }
274
275 static void *
276 compressor_thread_proc(void *arg)
277 {
278         struct compressor_thread_data *params = arg;
279         struct message *msg;
280
281         while ((msg = message_queue_get(params->chunks_to_compress_queue)) != NULL) {
282                 compress_chunks(msg, params->compressor);
283                 message_queue_put(params->compressed_chunks_queue, msg);
284         }
285         return NULL;
286 }
287
288 static void
289 parallel_chunk_compressor_destroy(struct chunk_compressor *_ctx)
290 {
291         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
292         unsigned i;
293
294         if (ctx == NULL)
295                 return;
296
297         if (ctx->num_started_threads != 0) {
298                 DEBUG("Terminating %u compressor threads", ctx->num_started_threads);
299                 message_queue_terminate(&ctx->chunks_to_compress_queue);
300
301                 for (i = 0; i < ctx->num_started_threads; i++)
302                         pthread_join(ctx->thread_data[i].thread, NULL);
303         }
304
305         message_queue_destroy(&ctx->chunks_to_compress_queue);
306         message_queue_destroy(&ctx->compressed_chunks_queue);
307
308         if (ctx->thread_data != NULL)
309                 for (i = 0; i < ctx->num_thread_data; i++)
310                         wimlib_free_compressor(ctx->thread_data[i].compressor);
311
312         FREE(ctx->thread_data);
313
314         free_messages(ctx->msgs, ctx->num_messages);
315
316         FREE(ctx);
317 }
318
319 static void
320 submit_compression_msg(struct parallel_chunk_compressor *ctx)
321 {
322         struct message *msg = ctx->next_submit_msg;
323
324         msg->complete = false;
325         list_add_tail(&msg->submission_list, &ctx->submitted_msgs);
326         message_queue_put(&ctx->chunks_to_compress_queue, msg);
327         ctx->next_submit_msg = NULL;
328 }
329
330 static void *
331 parallel_chunk_compressor_get_chunk_buffer(struct chunk_compressor *_ctx)
332 {
333         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
334         struct message *msg;
335
336         if (ctx->next_submit_msg) {
337                 msg = ctx->next_submit_msg;
338         } else {
339                 if (list_empty(&ctx->available_msgs))
340                         return NULL;
341
342                 msg = list_entry(ctx->available_msgs.next, struct message, list);
343                 list_del(&msg->list);
344                 ctx->next_submit_msg = msg;
345                 msg->num_filled_chunks = 0;
346         }
347
348         return msg->uncompressed_chunks[msg->num_filled_chunks];
349 }
350
351 static void
352 parallel_chunk_compressor_signal_chunk_filled(struct chunk_compressor *_ctx, u32 usize)
353 {
354         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
355         struct message *msg;
356
357         wimlib_assert(usize > 0);
358         wimlib_assert(usize <= ctx->base.out_chunk_size);
359         wimlib_assert(ctx->next_submit_msg);
360
361         msg = ctx->next_submit_msg;
362         msg->uncompressed_chunk_sizes[msg->num_filled_chunks] = usize;
363         if (++msg->num_filled_chunks == msg->num_alloc_chunks)
364                 submit_compression_msg(ctx);
365 }
366
367 static bool
368 parallel_chunk_compressor_get_compression_result(struct chunk_compressor *_ctx,
369                                                  const void **cdata_ret, u32 *csize_ret,
370                                                  u32 *usize_ret)
371 {
372         struct parallel_chunk_compressor *ctx = (struct parallel_chunk_compressor *)_ctx;
373         struct message *msg;
374
375         if (ctx->next_submit_msg)
376                 submit_compression_msg(ctx);
377
378         if (ctx->next_ready_msg) {
379                 msg = ctx->next_ready_msg;
380         } else {
381                 if (list_empty(&ctx->submitted_msgs))
382                         return false;
383
384                 while (!(msg = list_entry(ctx->submitted_msgs.next,
385                                           struct message,
386                                           submission_list))->complete)
387                         message_queue_get(&ctx->compressed_chunks_queue)->complete = true;
388
389                 ctx->next_ready_msg = msg;
390                 ctx->next_chunk_idx = 0;
391         }
392
393         if (msg->compressed_chunk_sizes[ctx->next_chunk_idx]) {
394                 *cdata_ret = msg->compressed_chunks[ctx->next_chunk_idx];
395                 *csize_ret = msg->compressed_chunk_sizes[ctx->next_chunk_idx];
396         } else {
397                 *cdata_ret = msg->uncompressed_chunks[ctx->next_chunk_idx];
398                 *csize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
399         }
400         *usize_ret = msg->uncompressed_chunk_sizes[ctx->next_chunk_idx];
401
402         if (++ctx->next_chunk_idx == msg->num_filled_chunks) {
403                 list_del(&msg->submission_list);
404                 list_add_tail(&msg->list, &ctx->available_msgs);
405                 ctx->next_ready_msg = NULL;
406         }
407         return true;
408 }
409
410 int
411 new_parallel_chunk_compressor(int out_ctype, u32 out_chunk_size,
412                               unsigned num_threads, u64 max_memory,
413                               struct chunk_compressor **compressor_ret)
414 {
415         u64 approx_mem_required;
416         size_t chunks_per_msg;
417         size_t msgs_per_thread;
418         struct parallel_chunk_compressor *ctx;
419         unsigned i;
420         int ret;
421         unsigned desired_num_threads;
422
423         wimlib_assert(out_chunk_size > 0);
424
425         if (num_threads == 0)
426                 num_threads = get_default_num_threads();
427
428         if (num_threads == 1) {
429                 DEBUG("Only 1 thread; Not bothering with "
430                       "parallel chunk compressor.");
431                 return -1;
432         }
433
434         if (max_memory == 0)
435                 max_memory = get_avail_memory();
436
437         desired_num_threads = num_threads;
438
439         if (out_chunk_size < ((u32)1 << 23)) {
440                 /* Relatively small chunks.  Use 2 messages per thread, each
441                  * with at least 2 chunks.  Use more chunks per message if there
442                  * are lots of threads and/or the chunks are very small.  */
443                 chunks_per_msg = 2;
444                 chunks_per_msg += num_threads * (65536 / out_chunk_size) / 16;
445                 chunks_per_msg = max(chunks_per_msg, 2);
446                 chunks_per_msg = min(chunks_per_msg, MAX_CHUNKS_PER_MSG);
447                 msgs_per_thread = 2;
448         } else {
449                 /* Big chunks: Just have one buffer per thread --- more would
450                  * just waste memory.  */
451                 chunks_per_msg = 1;
452                 msgs_per_thread = 1;
453         }
454         for (;;) {
455                 approx_mem_required =
456                         (u64)chunks_per_msg *
457                         (u64)msgs_per_thread *
458                         (u64)num_threads *
459                         (u64)out_chunk_size
460                         + out_chunk_size
461                         + 1000000
462                         + num_threads * wimlib_get_compressor_needed_memory(out_ctype,
463                                                                             out_chunk_size,
464                                                                             0);
465                 if (approx_mem_required <= max_memory)
466                         break;
467
468                 if (chunks_per_msg > 1)
469                         chunks_per_msg--;
470                 else if (msgs_per_thread > 1)
471                         msgs_per_thread--;
472                 else if (num_threads > 1)
473                         num_threads--;
474                 else
475                         break;
476         }
477
478         if (num_threads < desired_num_threads) {
479                 WARNING("Wanted to use %u threads, but limiting to %u "
480                         "to fit in available memory!",
481                         desired_num_threads, num_threads);
482         }
483
484         if (num_threads == 1) {
485                 DEBUG("Only 1 thread; Not bothering with "
486                       "parallel chunk compressor.");
487                 return -2;
488         }
489
490         ret = WIMLIB_ERR_NOMEM;
491         ctx = CALLOC(1, sizeof(*ctx));
492         if (ctx == NULL)
493                 goto err;
494
495         ctx->base.out_ctype = out_ctype;
496         ctx->base.out_chunk_size = out_chunk_size;
497         ctx->base.destroy = parallel_chunk_compressor_destroy;
498         ctx->base.get_chunk_buffer = parallel_chunk_compressor_get_chunk_buffer;
499         ctx->base.signal_chunk_filled = parallel_chunk_compressor_signal_chunk_filled;
500         ctx->base.get_compression_result = parallel_chunk_compressor_get_compression_result;
501
502         ctx->num_thread_data = num_threads;
503
504         ret = message_queue_init(&ctx->chunks_to_compress_queue);
505         if (ret)
506                 goto err;
507
508         ret = message_queue_init(&ctx->compressed_chunks_queue);
509         if (ret)
510                 goto err;
511
512         ret = WIMLIB_ERR_NOMEM;
513         ctx->thread_data = CALLOC(num_threads, sizeof(ctx->thread_data[0]));
514         if (ctx->thread_data == NULL)
515                 goto err;
516
517         for (i = 0; i < num_threads; i++) {
518                 struct compressor_thread_data *dat;
519
520                 dat = &ctx->thread_data[i];
521
522                 dat->chunks_to_compress_queue = &ctx->chunks_to_compress_queue;
523                 dat->compressed_chunks_queue = &ctx->compressed_chunks_queue;
524                 ret = wimlib_create_compressor(out_ctype, out_chunk_size,
525                                                WIMLIB_COMPRESSOR_FLAG_DESTRUCTIVE,
526                                                &dat->compressor);
527                 if (ret)
528                         goto err;
529         }
530
531         for (ctx->num_started_threads = 0;
532              ctx->num_started_threads < num_threads;
533              ctx->num_started_threads++)
534         {
535                 DEBUG("pthread_create thread %u of %u",
536                       ctx->num_started_threads + 1, num_threads);
537                 ret = pthread_create(&ctx->thread_data[ctx->num_started_threads].thread,
538                                      NULL,
539                                      compressor_thread_proc,
540                                      &ctx->thread_data[ctx->num_started_threads]);
541                 if (ret) {
542                         errno = ret;
543                         WARNING_WITH_ERRNO("Failed to create compressor thread %u of %u",
544                                            ctx->num_started_threads + 1,
545                                            num_threads);
546                         ret = WIMLIB_ERR_NOMEM;
547                         if (ctx->num_started_threads >= 2)
548                                 break;
549                         goto err;
550                 }
551         }
552
553         ctx->base.num_threads = ctx->num_started_threads;
554
555         ret = WIMLIB_ERR_NOMEM;
556         ctx->num_messages = ctx->num_started_threads * msgs_per_thread;
557         ctx->msgs = allocate_messages(ctx->num_messages,
558                                       chunks_per_msg, out_chunk_size);
559         if (ctx->msgs == NULL)
560                 goto err;
561
562         INIT_LIST_HEAD(&ctx->available_msgs);
563         for (size_t i = 0; i < ctx->num_messages; i++)
564                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
565
566         INIT_LIST_HEAD(&ctx->submitted_msgs);
567
568         *compressor_ret = &ctx->base;
569         return 0;
570
571 err:
572         parallel_chunk_compressor_destroy(&ctx->base);
573         return ret;
574 }
575
576 #endif /* ENABLE_MULTITHREADED_COMPRESSION */