]> wimlib.net Git - wimlib/blob - src/write.c
b143f16d7dc4de001a0e3e2b2cd01b248ffcd593
[wimlib] / src / write.c
1 /*
2  * write.c
3  *
4  * Support for writing WIM files; write a WIM file, overwrite a WIM file, write
5  * compressed file resources, etc.
6  */
7
8 /*
9  * Copyright (C) 2010 Carl Thijssen
10  * Copyright (C) 2012 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "wimlib_internal.h"
29 #include "io.h"
30 #include "dentry.h"
31 #include "lookup_table.h"
32 #include "xml.h"
33 #include "lzx.h"
34 #include "xpress.h"
35 #include <unistd.h>
36
37 #ifdef ENABLE_MULTITHREADED_COMPRESSION
38 #include <semaphore.h>
39 #include <pthread.h>
40 #endif
41
42 #include <errno.h>
43
44 #ifdef WITH_NTFS_3G
45 #include <time.h>
46 #include <ntfs-3g/attrib.h>
47 #include <ntfs-3g/inode.h>
48 #include <ntfs-3g/dir.h>
49 #endif
50
51
52 #ifdef HAVE_ALLOCA_H
53 #include <alloca.h>
54 #else
55 #include <stdlib.h>
56 #endif
57
58 static int do_fflush(FILE *fp)
59 {
60         int ret = fflush(fp);
61         if (ret != 0) {
62                 ERROR_WITH_ERRNO("Failed to flush data to output WIM file");
63                 return WIMLIB_ERR_WRITE;
64         }
65         return 0;
66 }
67
68 static int fflush_and_ftruncate(FILE *fp, off_t size)
69 {
70         int ret;
71
72         ret = do_fflush(fp);
73         if (ret != 0)
74                 return ret;
75         ret = ftruncate(fileno(fp), size);
76         if (ret != 0) {
77                 ERROR_WITH_ERRNO("Failed to truncate output WIM file to "
78                                  "%"PRIu64" bytes", size);
79                 return WIMLIB_ERR_WRITE;
80         }
81         return 0;
82 }
83
84 /* Chunk table that's located at the beginning of each compressed resource in
85  * the WIM.  (This is not the on-disk format; the on-disk format just has an
86  * array of offsets.) */
87 struct chunk_table {
88         off_t file_offset;
89         u64 num_chunks;
90         u64 original_resource_size;
91         u64 bytes_per_chunk_entry;
92         u64 table_disk_size;
93         u64 cur_offset;
94         u64 *cur_offset_p;
95         u64 offsets[0];
96 };
97
98 /*
99  * Allocates and initializes a chunk table, and reserves space for it in the
100  * output file.
101  */
102 static int
103 begin_wim_resource_chunk_tab(const struct lookup_table_entry *lte,
104                              FILE *out_fp,
105                              off_t file_offset,
106                              struct chunk_table **chunk_tab_ret)
107 {
108         u64 size = wim_resource_size(lte);
109         u64 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
110         size_t alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
111         struct chunk_table *chunk_tab = CALLOC(1, alloc_size);
112         int ret;
113
114         if (!chunk_tab) {
115                 ERROR("Failed to allocate chunk table for %"PRIu64" byte "
116                       "resource", size);
117                 ret = WIMLIB_ERR_NOMEM;
118                 goto out;
119         }
120         chunk_tab->file_offset = file_offset;
121         chunk_tab->num_chunks = num_chunks;
122         chunk_tab->original_resource_size = size;
123         chunk_tab->bytes_per_chunk_entry = (size >= (1ULL << 32)) ? 8 : 4;
124         chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
125                                      (num_chunks - 1);
126         chunk_tab->cur_offset = 0;
127         chunk_tab->cur_offset_p = chunk_tab->offsets;
128
129         if (fwrite(chunk_tab, 1, chunk_tab->table_disk_size, out_fp) !=
130                    chunk_tab->table_disk_size) {
131                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
132                                  "file resource");
133                 ret = WIMLIB_ERR_WRITE;
134                 goto out;
135         }
136
137         ret = 0;
138 out:
139         *chunk_tab_ret = chunk_tab;
140         return ret;
141 }
142
143 /*
144  * Pointer to function to compresses a chunk of a WIM resource.
145  *
146  * @chunk:              Uncompressed data of the chunk.
147  * @chunk_size:         Size of the uncompressed chunk in bytes.
148  * @compressed_chunk:   Pointer to output buffer of size at least
149  *                              (@chunk_size - 1) bytes.
150  * @compressed_chunk_len_ret:   Pointer to an unsigned int into which the size
151  *                                      of the compressed chunk will be
152  *                                      returned.
153  *
154  * Returns zero if compressed succeeded, and nonzero if the chunk could not be
155  * compressed to any smaller than @chunk_size.  This function cannot fail for
156  * any other reasons.
157  */
158 typedef int (*compress_func_t)(const void *, unsigned, void *, unsigned *);
159
160 compress_func_t get_compress_func(int out_ctype)
161 {
162         if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX)
163                 return lzx_compress;
164         else
165                 return xpress_compress;
166 }
167
168 /*
169  * Writes a chunk of a WIM resource to an output file.
170  *
171  * @chunk:        Uncompressed data of the chunk.
172  * @chunk_size:   Size of the chunk (<= WIM_CHUNK_SIZE)
173  * @out_fp:       FILE * to write tho chunk to.
174  * @out_ctype:    Compression type to use when writing the chunk (ignored if no
175  *                      chunk table provided)
176  * @chunk_tab:    Pointer to chunk table being created.  It is updated with the
177  *                      offset of the chunk we write.
178  *
179  * Returns 0 on success; nonzero on failure.
180  */
181 static int write_wim_resource_chunk(const u8 chunk[], unsigned chunk_size,
182                                     FILE *out_fp, compress_func_t compress,
183                                     struct chunk_table *chunk_tab)
184 {
185         const u8 *out_chunk;
186         unsigned out_chunk_size;
187         if (chunk_tab) {
188                 u8 *compressed_chunk = alloca(chunk_size);
189                 int ret;
190
191                 ret = compress(chunk, chunk_size, compressed_chunk,
192                                &out_chunk_size);
193                 if (ret == 0) {
194                         out_chunk = compressed_chunk;
195                 } else {
196                         out_chunk = chunk;
197                         out_chunk_size = chunk_size;
198                 }
199                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
200                 chunk_tab->cur_offset += out_chunk_size;
201         } else {
202                 out_chunk = chunk;
203                 out_chunk_size = chunk_size;
204         }
205         if (fwrite(out_chunk, 1, out_chunk_size, out_fp) != out_chunk_size) {
206                 ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
207                 return WIMLIB_ERR_WRITE;
208         }
209         return 0;
210 }
211
212 /*
213  * Finishes a WIM chunk tale and writes it to the output file at the correct
214  * offset.
215  *
216  * The final size of the full compressed resource is returned in the
217  * @compressed_size_p.
218  */
219 static int
220 finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
221                               FILE *out_fp, u64 *compressed_size_p)
222 {
223         size_t bytes_written;
224         if (fseeko(out_fp, chunk_tab->file_offset, SEEK_SET) != 0) {
225                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of output "
226                                  "WIM file", chunk_tab->file_offset);
227                 return WIMLIB_ERR_WRITE;
228         }
229
230         if (chunk_tab->bytes_per_chunk_entry == 8) {
231                 array_cpu_to_le64(chunk_tab->offsets, chunk_tab->num_chunks);
232         } else {
233                 for (u64 i = 0; i < chunk_tab->num_chunks; i++)
234                         ((u32*)chunk_tab->offsets)[i] =
235                                 cpu_to_le32(chunk_tab->offsets[i]);
236         }
237         bytes_written = fwrite((u8*)chunk_tab->offsets +
238                                         chunk_tab->bytes_per_chunk_entry,
239                                1, chunk_tab->table_disk_size, out_fp);
240         if (bytes_written != chunk_tab->table_disk_size) {
241                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
242                                  "file resource");
243                 return WIMLIB_ERR_WRITE;
244         }
245         if (fseeko(out_fp, 0, SEEK_END) != 0) {
246                 ERROR_WITH_ERRNO("Failed to seek to end of output WIM file");
247                 return WIMLIB_ERR_WRITE;
248         }
249         *compressed_size_p = chunk_tab->cur_offset + chunk_tab->table_disk_size;
250         return 0;
251 }
252
253 /* Prepare for multiple reads to a resource by caching a FILE * or NTFS
254  * attribute pointer in the lookup table entry. */
255 static int prepare_resource_for_read(struct lookup_table_entry *lte
256
257                                         #ifdef WITH_NTFS_3G
258                                         , ntfs_inode **ni_ret
259                                         #endif
260                 )
261 {
262         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
263              && !lte->file_on_disk_fp)
264         {
265                 wimlib_assert(lte->file_on_disk);
266                 lte->file_on_disk_fp = fopen(lte->file_on_disk, "rb");
267                 if (!lte->file_on_disk_fp) {
268                         ERROR_WITH_ERRNO("Failed to open the file `%s' for "
269                                          "reading", lte->file_on_disk);
270                         return WIMLIB_ERR_OPEN;
271                 }
272         }
273 #ifdef WITH_NTFS_3G
274         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME
275                   && !lte->attr)
276         {
277                 struct ntfs_location *loc = lte->ntfs_loc;
278                 ntfs_inode *ni;
279                 wimlib_assert(loc);
280                 ni = ntfs_pathname_to_inode(*loc->ntfs_vol_p, NULL, loc->path_utf8);
281                 if (!ni) {
282                         ERROR_WITH_ERRNO("Failed to open inode `%s' in NTFS "
283                                          "volume", loc->path_utf8);
284                         return WIMLIB_ERR_NTFS_3G;
285                 }
286                 lte->attr = ntfs_attr_open(ni,
287                                            loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
288                                            (ntfschar*)loc->stream_name_utf16,
289                                            loc->stream_name_utf16_num_chars);
290                 if (!lte->attr) {
291                         ERROR_WITH_ERRNO("Failed to open attribute of `%s' in "
292                                          "NTFS volume", loc->path_utf8);
293                         ntfs_inode_close(ni);
294                         return WIMLIB_ERR_NTFS_3G;
295                 }
296                 *ni_ret = ni;
297         }
298 #endif
299         return 0;
300 }
301
302 /* Undo prepare_resource_for_read() by closing the cached FILE * or NTFS
303  * attribute. */
304 static void end_wim_resource_read(struct lookup_table_entry *lte
305                                 #ifdef WITH_NTFS_3G
306                                         , ntfs_inode *ni
307                                 #endif
308                                         )
309 {
310         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
311             && lte->file_on_disk_fp) {
312                 fclose(lte->file_on_disk_fp);
313                 lte->file_on_disk_fp = NULL;
314         }
315 #ifdef WITH_NTFS_3G
316         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME) {
317                 if (lte->attr) {
318                         ntfs_attr_close(lte->attr);
319                         lte->attr = NULL;
320                 }
321                 if (ni)
322                         ntfs_inode_close(ni);
323         }
324 #endif
325 }
326
327 /*
328  * Writes a WIM resource to a FILE * opened for writing.  The resource may be
329  * written uncompressed or compressed depending on the @out_ctype parameter.
330  *
331  * If by chance the resource compresses to more than the original size (this may
332  * happen with random data or files than are pre-compressed), the resource is
333  * instead written uncompressed (and this is reflected in the @out_res_entry by
334  * removing the WIM_RESHDR_FLAG_COMPRESSED flag).
335  *
336  * @lte:        The lookup table entry for the WIM resource.
337  * @out_fp:     The FILE * to write the resource to.
338  * @out_ctype:  The compression type of the resource to write.  Note: if this is
339  *                      the same as the compression type of the WIM resource we
340  *                      need to read, we simply copy the data (i.e. we do not
341  *                      uncompress it, then compress it again).
342  * @out_res_entry:  If non-NULL, a resource entry that is filled in with the
343  *                  offset, original size, compressed size, and compression flag
344  *                  of the output resource.
345  *
346  * Returns 0 on success; nonzero on failure.
347  */
348 int write_wim_resource(struct lookup_table_entry *lte,
349                        FILE *out_fp, int out_ctype,
350                        struct resource_entry *out_res_entry,
351                        int flags)
352 {
353         u64 bytes_remaining;
354         u64 original_size;
355         u64 old_compressed_size;
356         u64 new_compressed_size;
357         u64 offset;
358         int ret;
359         struct chunk_table *chunk_tab = NULL;
360         bool raw;
361         off_t file_offset;
362         compress_func_t compress = NULL;
363 #ifdef WITH_NTFS_3G
364         ntfs_inode *ni = NULL;
365 #endif
366
367         wimlib_assert(lte);
368
369         /* Original size of the resource */
370         original_size = wim_resource_size(lte);
371
372         /* Compressed size of the resource (as it exists now) */
373         old_compressed_size = wim_resource_compressed_size(lte);
374
375         /* Current offset in output file */
376         file_offset = ftello(out_fp);
377         if (file_offset == -1) {
378                 ERROR_WITH_ERRNO("Failed to get offset in output "
379                                  "stream");
380                 return WIMLIB_ERR_WRITE;
381         }
382
383         /* Are the compression types the same?  If so, do a raw copy (copy
384          * without decompressing and recompressing the data). */
385         raw = (wim_resource_compression_type(lte) == out_ctype
386                && out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
387                && !(flags & WIMLIB_RESOURCE_FLAG_RECOMPRESS));
388
389         if (raw) {
390                 flags |= WIMLIB_RESOURCE_FLAG_RAW;
391                 bytes_remaining = old_compressed_size;
392         } else {
393                 flags &= ~WIMLIB_RESOURCE_FLAG_RAW;
394                 bytes_remaining = original_size;
395         }
396
397         /* Empty resource; nothing needs to be done, so just return success. */
398         if (bytes_remaining == 0)
399                 return 0;
400
401         /* Buffer for reading chunks for the resource */
402         u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
403
404         /* If we are writing a compressed resource and not doing a raw copy, we
405          * need to initialize the chunk table */
406         if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE && !raw) {
407                 ret = begin_wim_resource_chunk_tab(lte, out_fp, file_offset,
408                                                    &chunk_tab);
409                 if (ret != 0)
410                         goto out;
411         }
412
413         /* If the WIM resource is in an external file, open a FILE * to it so we
414          * don't have to open a temporary one in read_wim_resource() for each
415          * chunk. */
416 #ifdef WITH_NTFS_3G
417         ret = prepare_resource_for_read(lte, &ni);
418 #else
419         ret = prepare_resource_for_read(lte);
420 #endif
421         if (ret != 0)
422                 goto out;
423
424         /* If we aren't doing a raw copy, we will compute the SHA1 message
425          * digest of the resource as we read it, and verify it's the same as the
426          * hash given in the lookup table entry once we've finished reading the
427          * resource. */
428         SHA_CTX ctx;
429         if (!raw) {
430                 sha1_init(&ctx);
431                 compress = get_compress_func(out_ctype);
432         }
433         offset = 0;
434
435         /* While there are still bytes remaining in the WIM resource, read a
436          * chunk of the resource, update SHA1, then write that chunk using the
437          * desired compression type. */
438         do {
439                 u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
440                 ret = read_wim_resource(lte, buf, to_read, offset, flags);
441                 if (ret != 0)
442                         goto out_fclose;
443                 if (!raw)
444                         sha1_update(&ctx, buf, to_read);
445                 ret = write_wim_resource_chunk(buf, to_read, out_fp,
446                                                compress, chunk_tab);
447                 if (ret != 0)
448                         goto out_fclose;
449                 bytes_remaining -= to_read;
450                 offset += to_read;
451         } while (bytes_remaining);
452
453         /* Raw copy:  The new compressed size is the same as the old compressed
454          * size
455          *
456          * Using WIMLIB_COMPRESSION_TYPE_NONE:  The new compressed size is the
457          * original size
458          *
459          * Using a different compression type:  Call
460          * finish_wim_resource_chunk_tab() and it will provide the new
461          * compressed size.
462          */
463         if (raw) {
464                 new_compressed_size = old_compressed_size;
465         } else {
466                 if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
467                         new_compressed_size = original_size;
468                 else {
469                         ret = finish_wim_resource_chunk_tab(chunk_tab, out_fp,
470                                                             &new_compressed_size);
471                         if (ret != 0)
472                                 goto out_fclose;
473                 }
474         }
475
476         /* Verify SHA1 message digest of the resource, unless we are doing a raw
477          * write (in which case we never even saw the uncompressed data).  Or,
478          * if the hash we had before is all 0's, just re-set it to be the new
479          * hash. */
480         if (!raw) {
481                 u8 md[SHA1_HASH_SIZE];
482                 sha1_final(md, &ctx);
483                 if (is_zero_hash(lte->hash)) {
484                         copy_hash(lte->hash, md);
485                 } else if (!hashes_equal(md, lte->hash)) {
486                         ERROR("WIM resource has incorrect hash!");
487                         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
488                                 ERROR("We were reading it from `%s'; maybe it changed "
489                                       "while we were reading it.",
490                                       lte->file_on_disk);
491                         }
492                         ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
493                         goto out_fclose;
494                 }
495         }
496
497         if (!raw && new_compressed_size >= original_size &&
498             out_ctype != WIMLIB_COMPRESSION_TYPE_NONE)
499         {
500                 /* Oops!  We compressed the resource to larger than the original
501                  * size.  Write the resource uncompressed instead. */
502                 if (fseeko(out_fp, file_offset, SEEK_SET) != 0) {
503                         ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
504                                          "of output WIM file", file_offset);
505                         ret = WIMLIB_ERR_WRITE;
506                         goto out_fclose;
507                 }
508                 ret = write_wim_resource(lte, out_fp, WIMLIB_COMPRESSION_TYPE_NONE,
509                                          out_res_entry, flags);
510                 if (ret != 0)
511                         goto out_fclose;
512
513                 ret = fflush_and_ftruncate(out_fp, file_offset + out_res_entry->size);
514                 if (ret != 0)
515                         goto out_fclose;
516         } else {
517                 if (out_res_entry) {
518                         out_res_entry->size          = new_compressed_size;
519                         out_res_entry->original_size = original_size;
520                         out_res_entry->offset        = file_offset;
521                         out_res_entry->flags         = lte->resource_entry.flags
522                                                         & ~WIM_RESHDR_FLAG_COMPRESSED;
523                         if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE)
524                                 out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
525                 }
526         }
527         ret = 0;
528 out_fclose:
529 #ifdef WITH_NTFS_3G
530         end_wim_resource_read(lte, ni);
531 #else
532         end_wim_resource_read(lte);
533 #endif
534 out:
535         FREE(chunk_tab);
536         return ret;
537 }
538
539 #ifdef ENABLE_MULTITHREADED_COMPRESSION
540 struct shared_queue {
541         sem_t filled_slots;
542         sem_t empty_slots;
543         pthread_mutex_t lock;
544         unsigned front;
545         unsigned back;
546         void **array;
547         unsigned size;
548 };
549
550 static int shared_queue_init(struct shared_queue *q, unsigned size)
551 {
552         q->array = CALLOC(sizeof(q->array[0]), size);
553         if (!q->array)
554                 return WIMLIB_ERR_NOMEM;
555
556         sem_init(&q->filled_slots, 0, 0);
557         sem_init(&q->empty_slots, 0, size);
558         pthread_mutex_init(&q->lock, NULL);
559         q->front = 0;
560         q->back = size - 1;
561         q->size = size;
562         return 0;
563 }
564
565 static void shared_queue_destroy(struct shared_queue *q)
566 {
567         sem_destroy(&q->filled_slots);
568         sem_destroy(&q->empty_slots);
569         pthread_mutex_destroy(&q->lock);
570         FREE(q->array);
571 }
572
573 static void shared_queue_put(struct shared_queue *q, void *obj)
574 {
575         sem_wait(&q->empty_slots);
576         pthread_mutex_lock(&q->lock);
577
578         q->back = (q->back + 1) % q->size;
579         q->array[q->back] = obj;
580
581         sem_post(&q->filled_slots);
582         pthread_mutex_unlock(&q->lock);
583 }
584
585 static void *shared_queue_get(struct shared_queue *q)
586 {
587         sem_wait(&q->filled_slots);
588         pthread_mutex_lock(&q->lock);
589
590         void *obj = q->array[q->front];
591         q->array[q->front] = NULL;
592         q->front = (q->front + 1) % q->size;
593
594         sem_post(&q->empty_slots);
595         pthread_mutex_unlock(&q->lock);
596         return obj;
597 }
598
599 struct compressor_thread_params {
600         struct shared_queue *res_to_compress_queue;
601         struct shared_queue *compressed_res_queue;
602         compress_func_t compress;
603 };
604
605 #define MAX_CHUNKS_PER_MSG 2
606
607 struct message {
608         struct lookup_table_entry *lte;
609         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
610         u8 *out_compressed_chunks[MAX_CHUNKS_PER_MSG];
611         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
612         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
613         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
614         unsigned num_chunks;
615         struct list_head list;
616         bool complete;
617         u64 begin_chunk;
618 };
619
620 static void compress_chunks(struct message *msg, compress_func_t compress)
621 {
622         for (unsigned i = 0; i < msg->num_chunks; i++) {
623                 DEBUG2("compress chunk %u of %u", i, msg->num_chunks);
624                 int ret = compress(msg->uncompressed_chunks[i],
625                                    msg->uncompressed_chunk_sizes[i],
626                                    msg->compressed_chunks[i],
627                                    &msg->compressed_chunk_sizes[i]);
628                 if (ret == 0) {
629                         msg->out_compressed_chunks[i] = msg->compressed_chunks[i];
630                 } else {
631                         msg->out_compressed_chunks[i] = msg->uncompressed_chunks[i];
632                         msg->compressed_chunk_sizes[i] = msg->uncompressed_chunk_sizes[i];
633                 }
634         }
635 }
636
637 static void *compressor_thread_proc(void *arg)
638 {
639         struct compressor_thread_params *params = arg;
640         struct shared_queue *res_to_compress_queue = params->res_to_compress_queue;
641         struct shared_queue *compressed_res_queue = params->compressed_res_queue;
642         compress_func_t compress = params->compress;
643         struct message *msg;
644
645         DEBUG("Compressor thread ready");
646         while ((msg = shared_queue_get(res_to_compress_queue)) != NULL) {
647                 compress_chunks(msg, compress);
648                 shared_queue_put(compressed_res_queue, msg);
649         }
650         DEBUG("Compressor thread terminating");
651         return NULL;
652 }
653 #endif
654
655 static int do_write_stream_list(struct list_head *my_resources,
656                                 FILE *out_fp,
657                                 int out_ctype,
658                                 wimlib_progress_func_t progress_func,
659                                 union wimlib_progress_info *progress,
660                                 int write_resource_flags)
661 {
662         int ret;
663         struct lookup_table_entry *lte, *tmp;
664
665         list_for_each_entry_safe(lte, tmp, my_resources, staging_list) {
666                 ret = write_wim_resource(lte,
667                                          out_fp,
668                                          out_ctype,
669                                          &lte->output_resource_entry,
670                                          write_resource_flags);
671                 if (ret != 0)
672                         return ret;
673                 list_del(&lte->staging_list);
674                 progress->write_streams.completed_bytes +=
675                         wim_resource_size(lte);
676                 progress->write_streams.completed_streams++;
677                 if (progress_func) {
678                         progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
679                                       progress);
680                 }
681         }
682         return 0;
683 }
684
685 static int write_stream_list_serial(struct list_head *stream_list,
686                                     FILE *out_fp,
687                                     int out_ctype,
688                                     int write_flags,
689                                     wimlib_progress_func_t progress_func,
690                                     union wimlib_progress_info *progress)
691 {
692         int write_resource_flags;
693
694         if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
695                 write_resource_flags = WIMLIB_RESOURCE_FLAG_RECOMPRESS;
696         else
697                 write_resource_flags = 0;
698         progress->write_streams.num_threads = 1;
699         if (progress_func)
700                 progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS, progress);
701         return do_write_stream_list(stream_list, out_fp,
702                                     out_ctype, progress_func,
703                                     progress, write_resource_flags);
704 }
705
706 #ifdef ENABLE_MULTITHREADED_COMPRESSION
707 static int write_wim_chunks(struct message *msg, FILE *out_fp,
708                             struct chunk_table *chunk_tab)
709 {
710         for (unsigned i = 0; i < msg->num_chunks; i++) {
711                 unsigned chunk_csize = msg->compressed_chunk_sizes[i];
712
713                 DEBUG2("Write wim chunk %u of %u (csize = %u)",
714                       i, msg->num_chunks, chunk_csize);
715
716                 if (fwrite(msg->out_compressed_chunks[i], 1, chunk_csize, out_fp)
717                     != chunk_csize)
718                 {
719                         ERROR_WITH_ERRNO("Failed to write WIM chunk");
720                         return WIMLIB_ERR_WRITE;
721                 }
722
723                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
724                 chunk_tab->cur_offset += chunk_csize;
725         }
726         return 0;
727 }
728
729 /*
730  * This function is executed by the main thread when the resources are being
731  * compressed in parallel.  The main thread is in change of all reading of the
732  * uncompressed data and writing of the compressed data.  The compressor threads
733  * *only* do compression from/to in-memory buffers.
734  *
735  * Each unit of work given to a compressor thread is up to MAX_CHUNKS_PER_MSG
736  * chunks of compressed data to compress, represented in a `struct message'.
737  * Each message is passed from the main thread to a worker thread through the
738  * res_to_compress_queue, and it is passed back through the
739  * compressed_res_queue.
740  */
741 static int main_writer_thread_proc(struct list_head *stream_list,
742                                    FILE *out_fp,
743                                    int out_ctype,
744                                    struct shared_queue *res_to_compress_queue,
745                                    struct shared_queue *compressed_res_queue,
746                                    size_t queue_size,
747                                    int write_flags,
748                                    wimlib_progress_func_t progress_func,
749                                    union wimlib_progress_info *progress)
750 {
751         int ret;
752
753         struct message msgs[queue_size];
754         ZERO_ARRAY(msgs);
755
756         // Initially, all the messages are available to use.
757         LIST_HEAD(available_msgs);
758         for (size_t i = 0; i < ARRAY_LEN(msgs); i++)
759                 list_add(&msgs[i].list, &available_msgs);
760
761         // outstanding_resources is the list of resources that currently have
762         // had chunks sent off for compression.
763         //
764         // The first stream in outstanding_resources is the stream that is
765         // currently being written (cur_lte).
766         //
767         // The last stream in outstanding_resources is the stream that is
768         // currently being read and chunks fed to the compressor threads
769         // (next_lte).
770         //
771         // Depending on the number of threads and the sizes of the resource,
772         // the outstanding streams list may contain streams between cur_lte and
773         // next_lte that have all their chunks compressed or being compressed,
774         // but haven't been written yet.
775         //
776         LIST_HEAD(outstanding_resources);
777         struct list_head *next_resource = stream_list->next;
778         struct lookup_table_entry *next_lte = container_of(next_resource,
779                                                            struct lookup_table_entry,
780                                                            staging_list);
781         next_resource = next_resource->next;
782         u64 next_chunk = 0;
783         u64 next_num_chunks = wim_resource_chunks(next_lte);
784         INIT_LIST_HEAD(&next_lte->msg_list);
785         list_add_tail(&next_lte->staging_list, &outstanding_resources);
786
787         // As in write_wim_resource(), each resource we read is checksummed.
788         SHA_CTX next_sha_ctx;
789         sha1_init(&next_sha_ctx);
790         u8 next_hash[SHA1_HASH_SIZE];
791
792         // Resources that don't need any chunks compressed are added to this
793         // list and written directly by the main thread.
794         LIST_HEAD(my_resources);
795
796         struct lookup_table_entry *cur_lte = next_lte;
797         struct chunk_table *cur_chunk_tab = NULL;
798         struct message *msg;
799
800 #ifdef WITH_NTFS_3G
801         ntfs_inode *ni = NULL;
802 #endif
803
804 #ifdef WITH_NTFS_3G
805         ret = prepare_resource_for_read(next_lte, &ni);
806 #else
807         ret = prepare_resource_for_read(next_lte);
808 #endif
809         if (ret != 0)
810                 goto out;
811
812         DEBUG("Initializing buffers for uncompressed "
813               "and compressed data (%zu bytes needed)",
814               queue_size * MAX_CHUNKS_PER_MSG * WIM_CHUNK_SIZE * 2);
815
816         // Pre-allocate all the buffers that will be needed to do the chunk
817         // compression.
818         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
819                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
820                         msgs[i].compressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
821                         msgs[i].uncompressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
822                         if (msgs[i].compressed_chunks[j] == NULL ||
823                             msgs[i].uncompressed_chunks[j] == NULL)
824                         {
825                                 ERROR("Could not allocate enough memory for "
826                                       "multi-threaded compression");
827                                 ret = WIMLIB_ERR_NOMEM;
828                                 goto out;
829                         }
830                 }
831         }
832
833         // This loop is executed until all resources have been written, except
834         // possibly a few that have been added to the @my_resources list for
835         // writing later.
836         while (1) {
837                 // Send chunks to the compressor threads until either (a) there
838                 // are no more messages available since they were all sent off,
839                 // or (b) there are no more resources that need to be
840                 // compressed.
841                 while (!list_empty(&available_msgs) && next_lte != NULL) {
842
843                         // Get a message from the available messages
844                         // list
845                         msg = container_of(available_msgs.next,
846                                            struct message,
847                                            list);
848
849                         // ... and delete it from the available messages
850                         // list
851                         list_del(&msg->list);
852
853                         // Initialize the message with the chunks to
854                         // compress.
855                         msg->num_chunks = min(next_num_chunks - next_chunk,
856                                               MAX_CHUNKS_PER_MSG);
857                         msg->lte = next_lte;
858                         msg->complete = false;
859                         msg->begin_chunk = next_chunk;
860
861                         unsigned size = WIM_CHUNK_SIZE;
862                         for (unsigned i = 0; i < msg->num_chunks; i++) {
863
864                                 // Read chunk @next_chunk of the stream into the
865                                 // message so that a compressor thread can
866                                 // compress it.
867
868                                 if (next_chunk == next_num_chunks - 1 &&
869                                      wim_resource_size(next_lte) % WIM_CHUNK_SIZE != 0)
870                                 {
871                                         size = wim_resource_size(next_lte) % WIM_CHUNK_SIZE;
872                                 }
873
874
875                                 DEBUG2("Read resource (size=%u, offset=%zu)",
876                                       size, next_chunk * WIM_CHUNK_SIZE);
877
878                                 msg->uncompressed_chunk_sizes[i] = size;
879
880                                 ret = read_wim_resource(next_lte,
881                                                         msg->uncompressed_chunks[i],
882                                                         size,
883                                                         next_chunk * WIM_CHUNK_SIZE,
884                                                         0);
885                                 if (ret != 0)
886                                         goto out;
887                                 sha1_update(&next_sha_ctx,
888                                             msg->uncompressed_chunks[i], size);
889                                 next_chunk++;
890                         }
891
892                         // Send the compression request
893                         list_add_tail(&msg->list, &next_lte->msg_list);
894                         shared_queue_put(res_to_compress_queue, msg);
895                         DEBUG2("Compression request sent");
896
897                         if (next_chunk != next_num_chunks)
898                                 // More chunks to send for this resource
899                                 continue;
900
901                         // Done sending compression requests for a resource!
902                         // Check the SHA1 message digest.
903                         DEBUG2("Finalize SHA1 md (next_num_chunks=%zu)", next_num_chunks);
904                         sha1_final(next_hash, &next_sha_ctx);
905                         if (!hashes_equal(next_lte->hash, next_hash)) {
906                                 ERROR("WIM resource has incorrect hash!");
907                                 if (next_lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
908                                         ERROR("We were reading it from `%s'; maybe it changed "
909                                               "while we were reading it.",
910                                               next_lte->file_on_disk);
911                                 }
912                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
913                                 goto out;
914                         }
915
916                         // Advance to the next resource.
917                         //
918                         // If the next resource needs no compression, just write
919                         // it with this thread (not now though--- we could be in
920                         // the middle of writing another resource.)  Keep doing
921                         // this until we either get to the end of the resources
922                         // list, or we get to a resource that needs compression.
923
924                         while (1) {
925                                 if (next_resource == stream_list) {
926                                         next_lte = NULL;
927                                         break;
928                                 }
929                         #ifdef WITH_NTFS_3G
930                                 end_wim_resource_read(next_lte, ni);
931                                 ni = NULL;
932                         #else
933                                 end_wim_resource_read(next_lte);
934                         #endif
935
936                                 next_lte = container_of(next_resource,
937                                                         struct lookup_table_entry,
938                                                         staging_list);
939                                 next_resource = next_resource->next;
940                                 if ((!(write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
941                                       && next_lte->resource_location == RESOURCE_IN_WIM
942                                       && wimlib_get_compression_type(next_lte->wim) == out_ctype)
943                                     || wim_resource_size(next_lte) == 0)
944                                 {
945                                         list_add_tail(&next_lte->staging_list,
946                                                       &my_resources);
947                                 } else {
948                                         list_add_tail(&next_lte->staging_list,
949                                                       &outstanding_resources);
950                                         next_chunk = 0;
951                                         next_num_chunks = wim_resource_chunks(next_lte);
952                                         sha1_init(&next_sha_ctx);
953                                         INIT_LIST_HEAD(&next_lte->msg_list);
954                                 #ifdef WITH_NTFS_3G
955                                         ret = prepare_resource_for_read(next_lte, &ni);
956                                 #else
957                                         ret = prepare_resource_for_read(next_lte);
958                                 #endif
959                                         if (ret != 0)
960                                                 goto out;
961                                         DEBUG2("Updated next_lte");
962                                         break;
963                                 }
964                         }
965                 }
966
967                 // If there are no outstanding resources, there are no more
968                 // resources that need to be written.
969                 if (list_empty(&outstanding_resources)) {
970                         DEBUG("No outstanding resources! Done");
971                         ret = 0;
972                         goto out;
973                 }
974
975                 // Get the next message from the queue and process it.
976                 // The message will contain 1 or more data chunks that have been
977                 // compressed.
978                 DEBUG2("Waiting for message");
979                 msg = shared_queue_get(compressed_res_queue);
980                 msg->complete = true;
981
982                 DEBUG2("Received msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
983
984                 list_for_each_entry(msg, &cur_lte->msg_list, list) {
985                         DEBUG2("complete=%d", msg->complete);
986                 }
987
988                 // Is this the next chunk in the current resource?  If it's not
989                 // (i.e., an earlier chunk in a same or different resource
990                 // hasn't been compressed yet), do nothing, and keep this
991                 // message around until all earlier chunks are received.
992                 //
993                 // Otherwise, write all the chunks we can.
994                 while (!list_empty(&cur_lte->msg_list)
995                         && (msg = container_of(cur_lte->msg_list.next,
996                                                struct message,
997                                                list))->complete)
998                 {
999                         DEBUG2("Complete msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1000                         if (msg->begin_chunk == 0) {
1001                                 DEBUG2("Begin chunk tab");
1002
1003                                 // This is the first set of chunks.  Leave space
1004                                 // for the chunk table in the output file.
1005                                 off_t cur_offset = ftello(out_fp);
1006                                 if (cur_offset == -1) {
1007                                         ret = WIMLIB_ERR_WRITE;
1008                                         goto out;
1009                                 }
1010                                 ret = begin_wim_resource_chunk_tab(cur_lte,
1011                                                                    out_fp,
1012                                                                    cur_offset,
1013                                                                    &cur_chunk_tab);
1014                                 if (ret != 0)
1015                                         goto out;
1016                         }
1017
1018                         // Write the compressed chunks from the message.
1019                         ret = write_wim_chunks(msg, out_fp, cur_chunk_tab);
1020                         if (ret != 0)
1021                                 goto out;
1022
1023                         list_del(&msg->list);
1024
1025                         // This message is available to use for different chunks
1026                         // now.
1027                         list_add(&msg->list, &available_msgs);
1028
1029                         // Was this the last chunk of the stream?  If so,
1030                         // finish it.
1031                         if (list_empty(&cur_lte->msg_list) &&
1032                             msg->begin_chunk + msg->num_chunks == cur_chunk_tab->num_chunks)
1033                         {
1034                                 DEBUG2("Finish wim chunk tab");
1035                                 u64 res_csize;
1036                                 ret = finish_wim_resource_chunk_tab(cur_chunk_tab,
1037                                                                     out_fp,
1038                                                                     &res_csize);
1039                                 if (ret != 0)
1040                                         goto out;
1041
1042                                 progress->write_streams.completed_bytes +=
1043                                                 wim_resource_size(cur_lte);
1044                                 progress->write_streams.completed_streams++;
1045
1046                                 if (progress_func) {
1047                                         progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
1048                                                       progress);
1049                                 }
1050
1051                                 cur_lte->output_resource_entry.size =
1052                                         res_csize;
1053
1054                                 cur_lte->output_resource_entry.original_size =
1055                                         cur_lte->resource_entry.original_size;
1056
1057                                 cur_lte->output_resource_entry.offset =
1058                                         cur_chunk_tab->file_offset;
1059
1060                                 cur_lte->output_resource_entry.flags =
1061                                         cur_lte->resource_entry.flags |
1062                                                 WIM_RESHDR_FLAG_COMPRESSED;
1063
1064                                 FREE(cur_chunk_tab);
1065                                 cur_chunk_tab = NULL;
1066
1067                                 struct list_head *next = cur_lte->staging_list.next;
1068                                 list_del(&cur_lte->staging_list);
1069
1070                                 if (next == &outstanding_resources) {
1071                                         DEBUG("No more outstanding resources");
1072                                         ret = 0;
1073                                         goto out;
1074                                 } else {
1075                                         cur_lte = container_of(cur_lte->staging_list.next,
1076                                                                struct lookup_table_entry,
1077                                                                staging_list);
1078                                 }
1079
1080                                 // Since we just finished writing a stream,
1081                                 // write any streams that have been added to the
1082                                 // my_resources list for direct writing by the
1083                                 // main thread (e.g. resources that don't need
1084                                 // to be compressed because the desired
1085                                 // compression type is the same as the previous
1086                                 // compression type).
1087                                 ret = do_write_stream_list(&my_resources,
1088                                                            out_fp,
1089                                                            out_ctype,
1090                                                            progress_func,
1091                                                            progress,
1092                                                            0);
1093                                 if (ret != 0)
1094                                         goto out;
1095                         }
1096                 }
1097         }
1098
1099 out:
1100 #ifdef WITH_NTFS_3G
1101         end_wim_resource_read(cur_lte, ni);
1102 #else
1103         end_wim_resource_read(cur_lte);
1104 #endif
1105         if (ret == 0) {
1106                 ret = do_write_stream_list(&my_resources, out_fp,
1107                                            out_ctype, progress_func,
1108                                            progress, 0);
1109         } else {
1110                 size_t num_available_msgs = 0;
1111                 struct list_head *cur;
1112
1113                 list_for_each(cur, &available_msgs) {
1114                         num_available_msgs++;
1115                 }
1116
1117                 while (num_available_msgs < ARRAY_LEN(msgs)) {
1118                         shared_queue_get(compressed_res_queue);
1119                         num_available_msgs++;
1120                 }
1121         }
1122
1123         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1124                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1125                         FREE(msgs[i].compressed_chunks[j]);
1126                         FREE(msgs[i].uncompressed_chunks[j]);
1127                 }
1128         }
1129
1130         if (cur_chunk_tab != NULL)
1131                 FREE(cur_chunk_tab);
1132         return ret;
1133 }
1134
1135
1136 static int write_stream_list_parallel(struct list_head *stream_list,
1137                                       FILE *out_fp,
1138                                       int out_ctype,
1139                                       int write_flags,
1140                                       unsigned num_threads,
1141                                       wimlib_progress_func_t progress_func,
1142                                       union wimlib_progress_info *progress)
1143 {
1144         int ret;
1145         struct shared_queue res_to_compress_queue;
1146         struct shared_queue compressed_res_queue;
1147         pthread_t *compressor_threads = NULL;
1148
1149         if (num_threads == 0) {
1150                 long nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1151                 if (nthreads < 1) {
1152                         WARNING("Could not determine number of processors! Assuming 1");
1153                         goto out_serial;
1154                 } else {
1155                         num_threads = nthreads;
1156                 }
1157         }
1158
1159         progress->write_streams.num_threads = num_threads;
1160         wimlib_assert(stream_list->next != stream_list);
1161
1162         static const double MESSAGES_PER_THREAD = 2.0;
1163         size_t queue_size = (size_t)(num_threads * MESSAGES_PER_THREAD);
1164
1165         DEBUG("Initializing shared queues (queue_size=%zu)", queue_size);
1166
1167         ret = shared_queue_init(&res_to_compress_queue, queue_size);
1168         if (ret != 0)
1169                 goto out_serial;
1170
1171         ret = shared_queue_init(&compressed_res_queue, queue_size);
1172         if (ret != 0)
1173                 goto out_destroy_res_to_compress_queue;
1174
1175         struct compressor_thread_params params;
1176         params.res_to_compress_queue = &res_to_compress_queue;
1177         params.compressed_res_queue = &compressed_res_queue;
1178         params.compress = get_compress_func(out_ctype);
1179
1180         compressor_threads = MALLOC(num_threads * sizeof(pthread_t));
1181
1182         for (unsigned i = 0; i < num_threads; i++) {
1183                 DEBUG("pthread_create thread %u", i);
1184                 ret = pthread_create(&compressor_threads[i], NULL,
1185                                      compressor_thread_proc, &params);
1186                 if (ret != 0) {
1187                         ret = -1;
1188                         ERROR_WITH_ERRNO("Failed to create compressor "
1189                                          "thread %u", i);
1190                         num_threads = i;
1191                         goto out_join;
1192                 }
1193         }
1194
1195         if (progress_func)
1196                 progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS, progress);
1197
1198         ret = main_writer_thread_proc(stream_list,
1199                                       out_fp,
1200                                       out_ctype,
1201                                       &res_to_compress_queue,
1202                                       &compressed_res_queue,
1203                                       queue_size,
1204                                       write_flags,
1205                                       progress_func,
1206                                       progress);
1207 out_join:
1208         for (unsigned i = 0; i < num_threads; i++)
1209                 shared_queue_put(&res_to_compress_queue, NULL);
1210
1211         for (unsigned i = 0; i < num_threads; i++) {
1212                 if (pthread_join(compressor_threads[i], NULL)) {
1213                         WARNING("Failed to join compressor thread %u: %s",
1214                                 i, strerror(errno));
1215                 }
1216         }
1217         FREE(compressor_threads);
1218         shared_queue_destroy(&compressed_res_queue);
1219 out_destroy_res_to_compress_queue:
1220         shared_queue_destroy(&res_to_compress_queue);
1221         if (ret >= 0 && ret != WIMLIB_ERR_NOMEM)
1222                 return ret;
1223 out_serial:
1224         WARNING("Falling back to single-threaded compression");
1225         return write_stream_list_serial(stream_list,
1226                                         out_fp,
1227                                         out_ctype,
1228                                         write_flags,
1229                                         progress_func,
1230                                         progress);
1231
1232 }
1233 #endif
1234
1235 /*
1236  * Write a list of streams to a WIM (@out_fp) using the compression type
1237  * @out_ctype and up to @num_threads compressor threads.
1238  */
1239 static int write_stream_list(struct list_head *stream_list, FILE *out_fp,
1240                              int out_ctype, int write_flags,
1241                              unsigned num_threads,
1242                              wimlib_progress_func_t progress_func)
1243 {
1244         struct lookup_table_entry *lte;
1245         size_t num_streams = 0;
1246         u64 total_bytes = 0;
1247         bool compression_needed = false;
1248         union wimlib_progress_info progress;
1249         int ret;
1250
1251         list_for_each_entry(lte, stream_list, staging_list) {
1252                 num_streams++;
1253                 total_bytes += wim_resource_size(lte);
1254                 if (!compression_needed
1255                     &&
1256                     (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
1257                        && (lte->resource_location != RESOURCE_IN_WIM
1258                            || wimlib_get_compression_type(lte->wim) != out_ctype
1259                            || (write_flags & WIMLIB_WRITE_FLAG_REBUILD)))
1260                     && wim_resource_size(lte) != 0)
1261                         compression_needed = true;
1262         }
1263         progress.write_streams.total_bytes       = total_bytes;
1264         progress.write_streams.total_streams     = num_streams;
1265         progress.write_streams.completed_bytes   = 0;
1266         progress.write_streams.completed_streams = 0;
1267         progress.write_streams.num_threads       = num_threads;
1268         progress.write_streams.compression_type  = out_ctype;
1269
1270         if (num_streams == 0) {
1271                 ret = 0;
1272                 goto out;
1273         }
1274
1275 #ifdef ENABLE_MULTITHREADED_COMPRESSION
1276         if (compression_needed && total_bytes >= 1000000 && num_threads != 1) {
1277                 ret = write_stream_list_parallel(stream_list,
1278                                                  out_fp,
1279                                                  out_ctype,
1280                                                  write_flags,
1281                                                  num_threads,
1282                                                  progress_func,
1283                                                  &progress);
1284         }
1285         else
1286 #endif
1287         {
1288                 ret = write_stream_list_serial(stream_list,
1289                                                out_fp,
1290                                                out_ctype,
1291                                                write_flags,
1292                                                progress_func,
1293                                                &progress);
1294         }
1295 out:
1296         return ret;
1297 }
1298
1299
1300 static int dentry_find_streams_to_write(struct dentry *dentry,
1301                                         void *wim)
1302 {
1303         WIMStruct *w = wim;
1304         struct list_head *stream_list = w->private;
1305         struct lookup_table_entry *lte;
1306         for (unsigned i = 0; i <= dentry->d_inode->num_ads; i++) {
1307                 lte = inode_stream_lte(dentry->d_inode, i, w->lookup_table);
1308                 if (lte && ++lte->out_refcnt == 1)
1309                         list_add_tail(&lte->staging_list, stream_list);
1310         }
1311         return 0;
1312 }
1313
1314 static int find_streams_to_write(WIMStruct *w)
1315 {
1316         return for_dentry_in_tree(wim_root_dentry(w),
1317                                   dentry_find_streams_to_write, w);
1318 }
1319
1320 static int write_wim_streams(WIMStruct *w, int image, int write_flags,
1321                              unsigned num_threads,
1322                              wimlib_progress_func_t progress_func)
1323 {
1324
1325         for_lookup_table_entry(w->lookup_table, lte_zero_out_refcnt, NULL);
1326         LIST_HEAD(stream_list);
1327         w->private = &stream_list;
1328         for_image(w, image, find_streams_to_write);
1329         return write_stream_list(&stream_list, w->out_fp,
1330                                  wimlib_get_compression_type(w), write_flags,
1331                                  num_threads, progress_func);
1332 }
1333
1334 /*
1335  * Finish writing a WIM file: write the lookup table, xml data, and integrity
1336  * table (optional), then overwrite the WIM header.
1337  *
1338  * write_flags is a bitwise OR of the following:
1339  *
1340  *      (public)  WIMLIB_WRITE_FLAG_CHECK_INTEGRITY:
1341  *              Include an integrity table.
1342  *
1343  *      (public)  WIMLIB_WRITE_FLAG_SHOW_PROGRESS:
1344  *              Show progress information when (if) writing the integrity table.
1345  *
1346  *      (private) WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE:
1347  *              Don't write the lookup table.
1348  *
1349  *      (private) WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE:
1350  *              When (if) writing the integrity table, re-use entries from the
1351  *              existing integrity table, if possible.
1352  *
1353  *      (private) WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML:
1354  *              After writing the XML data but before writing the integrity
1355  *              table, write a temporary WIM header and flush the stream so that
1356  *              the WIM is less likely to become corrupted upon abrupt program
1357  *              termination.
1358  *
1359  *      (private) WIMLIB_WRITE_FLAG_FSYNC:
1360  *              fsync() the output file before closing it.
1361  *
1362  */
1363 int finish_write(WIMStruct *w, int image, int write_flags,
1364                  wimlib_progress_func_t progress_func)
1365 {
1366         int ret;
1367         struct wim_header hdr;
1368         FILE *out = w->out_fp;
1369
1370         /* @hdr will be the header for the new WIM.  First copy all the data
1371          * from the header in the WIMStruct; then set all the fields that may
1372          * have changed, including the resource entries, boot index, and image
1373          * count.  */
1374         memcpy(&hdr, &w->hdr, sizeof(struct wim_header));
1375
1376         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1377                 ret = write_lookup_table(w->lookup_table, out, &hdr.lookup_table_res_entry);
1378                 if (ret != 0)
1379                         goto out;
1380         }
1381
1382         ret = write_xml_data(w->wim_info, image, out,
1383                              (write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE) ?
1384                               wim_info_get_total_bytes(w->wim_info) : 0,
1385                              &hdr.xml_res_entry);
1386         if (ret != 0)
1387                 goto out;
1388
1389         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
1390                 if (write_flags & WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) {
1391                         struct wim_header checkpoint_hdr;
1392                         memcpy(&checkpoint_hdr, &hdr, sizeof(struct wim_header));
1393                         memset(&checkpoint_hdr.integrity, 0, sizeof(struct resource_entry));
1394                         if (fseeko(out, 0, SEEK_SET) != 0) {
1395                                 ret = WIMLIB_ERR_WRITE;
1396                                 goto out;
1397                         }
1398                         ret = write_header(&checkpoint_hdr, out);
1399                         if (ret != 0)
1400                                 goto out;
1401
1402                         if (fflush(out) != 0) {
1403                                 ERROR_WITH_ERRNO("Can't write data to WIM");
1404                                 ret = WIMLIB_ERR_WRITE;
1405                                 goto out;
1406                         }
1407
1408                         if (fseeko(out, 0, SEEK_END) != 0) {
1409                                 ret = WIMLIB_ERR_WRITE;
1410                                 goto out;
1411                         }
1412                 }
1413
1414                 off_t old_lookup_table_end;
1415                 off_t new_lookup_table_end;
1416                 if (write_flags & WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE) {
1417                         old_lookup_table_end = w->hdr.lookup_table_res_entry.offset +
1418                                                w->hdr.lookup_table_res_entry.size;
1419                 } else {
1420                         old_lookup_table_end = 0;
1421                 }
1422                 new_lookup_table_end = hdr.lookup_table_res_entry.offset +
1423                                        hdr.lookup_table_res_entry.size;
1424
1425                 ret = write_integrity_table(out,
1426                                             &hdr.integrity,
1427                                             new_lookup_table_end,
1428                                             old_lookup_table_end,
1429                                             progress_func);
1430                 if (ret != 0)
1431                         goto out;
1432         } else {
1433                 memset(&hdr.integrity, 0, sizeof(struct resource_entry));
1434         }
1435
1436         /*
1437          * In the WIM header, there is room for the resource entry for a
1438          * metadata resource labeled as the "boot metadata".  This entry should
1439          * be zeroed out if there is no bootable image (boot_idx 0).  Otherwise,
1440          * it should be a copy of the resource entry for the image that is
1441          * marked as bootable.  This is not well documented...
1442          */
1443         if (hdr.boot_idx == 0 || !w->image_metadata
1444                         || (image != WIMLIB_ALL_IMAGES && image != hdr.boot_idx)) {
1445                 memset(&hdr.boot_metadata_res_entry, 0,
1446                        sizeof(struct resource_entry));
1447         } else {
1448                 memcpy(&hdr.boot_metadata_res_entry,
1449                        &w->image_metadata[
1450                           hdr.boot_idx - 1].metadata_lte->output_resource_entry,
1451                        sizeof(struct resource_entry));
1452         }
1453
1454         /* Set image count and boot index correctly for single image writes */
1455         if (image != WIMLIB_ALL_IMAGES) {
1456                 hdr.image_count = 1;
1457                 if (hdr.boot_idx == image)
1458                         hdr.boot_idx = 1;
1459                 else
1460                         hdr.boot_idx = 0;
1461         }
1462
1463         if (fseeko(out, 0, SEEK_SET) != 0) {
1464                 ret = WIMLIB_ERR_WRITE;
1465                 goto out;
1466         }
1467
1468         ret = write_header(&hdr, out);
1469         if (ret != 0)
1470                 goto out;
1471
1472         if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) {
1473                 if (fflush(out) != 0
1474                     || fsync(fileno(out)) != 0)
1475                 {
1476                         ERROR_WITH_ERRNO("Error flushing data to WIM file");
1477                         ret = WIMLIB_ERR_WRITE;
1478                 }
1479         }
1480 out:
1481         if (fclose(out) != 0) {
1482                 ERROR_WITH_ERRNO("Failed to close the WIM file");
1483                 if (ret == 0)
1484                         ret = WIMLIB_ERR_WRITE;
1485         }
1486         w->out_fp = NULL;
1487         return ret;
1488 }
1489
1490 static void close_wim_writable(WIMStruct *w)
1491 {
1492         if (w->out_fp) {
1493                 if (fclose(w->out_fp) != 0) {
1494                         WARNING("Failed to close output WIM: %s",
1495                                 strerror(errno));
1496                 }
1497                 w->out_fp = NULL;
1498         }
1499 }
1500
1501 /* Open file stream and write dummy header for WIM. */
1502 int begin_write(WIMStruct *w, const char *path, int write_flags)
1503 {
1504         int ret;
1505         bool need_readable = false;
1506         bool trunc = true;
1507         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
1508                 need_readable = true;
1509
1510         ret = open_wim_writable(w, path, trunc, need_readable);
1511         if (ret != 0)
1512                 return ret;
1513         /* Write dummy header. It will be overwritten later. */
1514         return write_header(&w->hdr, w->out_fp);
1515 }
1516
1517 /* Writes a stand-alone WIM to a file.  */
1518 WIMLIBAPI int wimlib_write(WIMStruct *w, const char *path,
1519                            int image, int write_flags, unsigned num_threads,
1520                            wimlib_progress_func_t progress_func)
1521 {
1522         int ret;
1523
1524         if (!w || !path)
1525                 return WIMLIB_ERR_INVALID_PARAM;
1526
1527         write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
1528
1529         if (image != WIMLIB_ALL_IMAGES &&
1530              (image < 1 || image > w->hdr.image_count))
1531                 return WIMLIB_ERR_INVALID_IMAGE;
1532
1533         if (w->hdr.total_parts != 1) {
1534                 ERROR("Cannot call wimlib_write() on part of a split WIM");
1535                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1536         }
1537
1538         ret = begin_write(w, path, write_flags);
1539         if (ret != 0)
1540                 goto out;
1541
1542         ret = write_wim_streams(w, image, write_flags, num_threads,
1543                                 progress_func);
1544         if (ret != 0)
1545                 goto out;
1546
1547         if (progress_func)
1548                 progress_func(WIMLIB_PROGRESS_MSG_WRITE_METADATA_BEGIN, NULL);
1549
1550         ret = for_image(w, image, write_metadata_resource);
1551         if (ret != 0)
1552                 goto out;
1553
1554         if (progress_func)
1555                 progress_func(WIMLIB_PROGRESS_MSG_WRITE_METADATA_END, NULL);
1556
1557         ret = finish_write(w, image, write_flags, progress_func);
1558 out:
1559         close_wim_writable(w);
1560         return ret;
1561 }
1562
1563 static int lte_overwrite_prepare(struct lookup_table_entry *lte,
1564                                  void *ignore)
1565 {
1566         memcpy(&lte->output_resource_entry, &lte->resource_entry,
1567                sizeof(struct resource_entry));
1568         lte->out_refcnt = 0;
1569         return 0;
1570 }
1571
1572 static int check_resource_offset(struct lookup_table_entry *lte, void *arg)
1573 {
1574         off_t end_offset = *(u64*)arg;
1575
1576         wimlib_assert(lte->out_refcnt <= lte->refcnt);
1577         if (lte->out_refcnt < lte->refcnt) {
1578                 if (lte->resource_entry.offset + lte->resource_entry.size > end_offset) {
1579                         ERROR("The following resource is after the XML data:");
1580                         print_lookup_table_entry(lte);
1581                         return WIMLIB_ERR_RESOURCE_ORDER;
1582                 }
1583         }
1584         return 0;
1585 }
1586
1587 static int find_new_streams(struct lookup_table_entry *lte, void *arg)
1588 {
1589         if (lte->out_refcnt == lte->refcnt)
1590                 list_add(&lte->staging_list, (struct list_head*)arg);
1591         else
1592                 lte->out_refcnt = lte->refcnt;
1593         return 0;
1594 }
1595
1596 /*
1597  * Overwrite a WIM, possibly appending streams to it.
1598  *
1599  * A WIM looks like (or is supposed to look like) the following:
1600  *
1601  *                   Header (212 bytes)
1602  *                   Streams and metadata resources (variable size)
1603  *                   Lookup table (variable size)
1604  *                   XML data (variable size)
1605  *                   Integrity table (optional) (variable size)
1606  *
1607  * If we are not adding any streams or metadata resources, the lookup table is
1608  * unchanged--- so we only need to overwrite the XML data, integrity table, and
1609  * header.  This operation is potentially unsafe if the program is abruptly
1610  * terminated while the XML data or integrity table are being overwritten, but
1611  * before the new header has been written.  To partially alleviate this problem,
1612  * a special flag (WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) is passed to
1613  * finish_write() to cause a temporary WIM header to be written after the XML
1614  * data has been written.  This may prevent the WIM from becoming corrupted if
1615  * the program is terminated while the integrity table is being calculated (but
1616  * no guarantees, due to write re-ordering...).
1617  *
1618  * If we are adding new streams or images (metadata resources), the lookup table
1619  * needs to be changed, and those streams need to be written.  In this case, we
1620  * try to perform a safe update of the WIM file by writing the streams *after*
1621  * the end of the previous WIM, then writing the new lookup table, XML data, and
1622  * (optionally) integrity table following the new streams.  This will produce a
1623  * layout like the following:
1624  *
1625  *                   Header (212 bytes)
1626  *                   (OLD) Streams and metadata resources (variable size)
1627  *                   (OLD) Lookup table (variable size)
1628  *                   (OLD) XML data (variable size)
1629  *                   (OLD) Integrity table (optional) (variable size)
1630  *                   (NEW) Streams and metadata resources (variable size)
1631  *                   (NEW) Lookup table (variable size)
1632  *                   (NEW) XML data (variable size)
1633  *                   (NEW) Integrity table (optional) (variable size)
1634  *
1635  * At all points, the WIM is valid as nothing points to the new data yet.  Then,
1636  * the header is overwritten to point to the new lookup table, XML data, and
1637  * integrity table, to produce the following layout:
1638  *
1639  *                   Header (212 bytes)
1640  *                   Streams and metadata resources (variable size)
1641  *                   Nothing (variable size)
1642  *                   More Streams and metadata resources (variable size)
1643  *                   Lookup table (variable size)
1644  *                   XML data (variable size)
1645  *                   Integrity table (optional) (variable size)
1646  *
1647  * This method allows an image to be appended to a large WIM very quickly, and
1648  * is is crash-safe except in the case of write re-ordering, but the
1649  * disadvantage is that a small hole is left in the WIM where the old lookup
1650  * table, xml data, and integrity table were.  (These usually only take up a
1651  * small amount of space compared to the streams, however.
1652  */
1653 static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
1654                                  unsigned num_threads,
1655                                  wimlib_progress_func_t progress_func,
1656                                  int modified_image_idx)
1657 {
1658         int ret;
1659         struct list_head stream_list;
1660         off_t old_wim_end;
1661
1662         DEBUG("Overwriting `%s' in-place", w->filename);
1663
1664         /* Make sure that the integrity table (if present) is after the XML
1665          * data, and that there are no stream resources, metadata resources, or
1666          * lookup tables after the XML data.  Otherwise, these data would be
1667          * overwritten. */
1668         if (w->hdr.integrity.offset != 0 &&
1669             w->hdr.integrity.offset < w->hdr.xml_res_entry.offset) {
1670                 ERROR("Didn't expect the integrity table to be before the XML data");
1671                 return WIMLIB_ERR_RESOURCE_ORDER;
1672         }
1673
1674         if (w->hdr.lookup_table_res_entry.offset > w->hdr.xml_res_entry.offset) {
1675                 ERROR("Didn't expect the lookup table to be after the XML data");
1676                 return WIMLIB_ERR_RESOURCE_ORDER;
1677         }
1678
1679         DEBUG("Identifying newly added streams");
1680         for_lookup_table_entry(w->lookup_table, lte_overwrite_prepare, NULL);
1681         INIT_LIST_HEAD(&stream_list);
1682         for (int i = modified_image_idx; i < w->hdr.image_count; i++) {
1683                 DEBUG("Identifiying streams in image %d", i + 1);
1684                 wimlib_assert(w->image_metadata[i].modified);
1685                 wimlib_assert(!w->image_metadata[i].has_been_mounted_rw);
1686                 wimlib_assert(w->image_metadata[i].root_dentry != NULL);
1687                 wimlib_assert(w->image_metadata[i].metadata_lte != NULL);
1688                 w->private = &stream_list;
1689                 for_dentry_in_tree(w->image_metadata[i].root_dentry,
1690                                    dentry_find_streams_to_write, w);
1691         }
1692
1693         if (w->hdr.integrity.offset)
1694                 old_wim_end = w->hdr.integrity.offset + w->hdr.integrity.size;
1695         else
1696                 old_wim_end = w->hdr.xml_res_entry.offset + w->hdr.xml_res_entry.size;
1697
1698         ret = for_lookup_table_entry(w->lookup_table, check_resource_offset,
1699                                      &old_wim_end);
1700         if (ret != 0)
1701                 return ret;
1702
1703         if (modified_image_idx == w->hdr.image_count && !w->deletion_occurred) {
1704                 /* If no images have been modified and no images have been
1705                  * deleted, a new lookup table does not need to be written. */
1706                 wimlib_assert(list_empty(&stream_list));
1707                 old_wim_end = w->hdr.lookup_table_res_entry.offset +
1708                               w->hdr.lookup_table_res_entry.size;
1709                 write_flags |= WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE |
1710                                WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML;
1711         }
1712
1713         INIT_LIST_HEAD(&stream_list);
1714         for_lookup_table_entry(w->lookup_table, find_new_streams,
1715                                &stream_list);
1716
1717         ret = open_wim_writable(w, w->filename, false,
1718                                 (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) != 0);
1719         if (ret != 0)
1720                 return ret;
1721
1722         if (fseeko(w->out_fp, old_wim_end, SEEK_SET) != 0) {
1723                 ERROR_WITH_ERRNO("Can't seek to end of WIM");
1724                 return WIMLIB_ERR_WRITE;
1725         }
1726
1727         if (!list_empty(&stream_list)) {
1728                 DEBUG("Writing newly added streams (offset = %"PRIu64")",
1729                       old_wim_end);
1730                 ret = write_stream_list(&stream_list, w->out_fp,
1731                                         wimlib_get_compression_type(w),
1732                                         write_flags, num_threads,
1733                                         progress_func);
1734                 if (ret != 0)
1735                         goto out_ftruncate;
1736         } else {
1737                 DEBUG("No new streams were added");
1738         }
1739
1740         for (int i = modified_image_idx; i < w->hdr.image_count; i++) {
1741                 select_wim_image(w, i + 1);
1742                 ret = write_metadata_resource(w);
1743                 if (ret != 0)
1744                         goto out_ftruncate;
1745         }
1746         write_flags |= WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE;
1747         ret = finish_write(w, WIMLIB_ALL_IMAGES, write_flags,
1748                            progress_func);
1749 out_ftruncate:
1750         close_wim_writable(w);
1751         if (ret != 0) {
1752                 WARNING("Truncating `%s' to its original size (%"PRIu64" bytes)",
1753                         w->filename, old_wim_end);
1754                 truncate(w->filename, old_wim_end);
1755         }
1756         return ret;
1757 }
1758
1759 static int overwrite_wim_via_tmpfile(WIMStruct *w, int write_flags,
1760                                      unsigned num_threads,
1761                                      wimlib_progress_func_t progress_func)
1762 {
1763         size_t wim_name_len;
1764         int ret;
1765
1766         DEBUG("Overwriting `%s' via a temporary file", w->filename);
1767
1768         /* Write the WIM to a temporary file in the same directory as the
1769          * original WIM. */
1770         wim_name_len = strlen(w->filename);
1771         char tmpfile[wim_name_len + 10];
1772         memcpy(tmpfile, w->filename, wim_name_len);
1773         randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
1774         tmpfile[wim_name_len + 9] = '\0';
1775
1776         ret = wimlib_write(w, tmpfile, WIMLIB_ALL_IMAGES,
1777                            write_flags | WIMLIB_WRITE_FLAG_FSYNC,
1778                            num_threads, progress_func);
1779         if (ret != 0) {
1780                 ERROR("Failed to write the WIM file `%s'", tmpfile);
1781                 goto err;
1782         }
1783
1784         /* Close the original WIM file that was opened for reading. */
1785         if (w->fp != NULL) {
1786                 fclose(w->fp);
1787                 w->fp = NULL;
1788         }
1789
1790         DEBUG("Renaming `%s' to `%s'", tmpfile, w->filename);
1791
1792         /* Rename the new file to the old file .*/
1793         if (rename(tmpfile, w->filename) != 0) {
1794                 ERROR_WITH_ERRNO("Failed to rename `%s' to `%s'",
1795                                  tmpfile, w->filename);
1796                 ret = WIMLIB_ERR_RENAME;
1797                 goto err;
1798         }
1799
1800         if (progress_func) {
1801                 union wimlib_progress_info progress;
1802                 progress.rename.from = tmpfile;
1803                 progress.rename.to = w->filename;
1804                 progress_func(WIMLIB_PROGRESS_MSG_RENAME, &progress);
1805         }
1806
1807         /* Re-open the WIM read-only. */
1808         w->fp = fopen(w->filename, "rb");
1809         if (w->fp == NULL) {
1810                 ret = WIMLIB_ERR_REOPEN;
1811                 WARNING("Failed to re-open `%s' read-only: %s",
1812                         w->filename, strerror(errno));
1813         }
1814         return ret;
1815 err:
1816         /* Remove temporary file. */
1817         if (unlink(tmpfile) != 0)
1818                 WARNING("Failed to remove `%s': %s", tmpfile, strerror(errno));
1819         return ret;
1820 }
1821
1822 /*
1823  * Writes a WIM file to the original file that it was read from, overwriting it.
1824  */
1825 WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
1826                                unsigned num_threads,
1827                                wimlib_progress_func_t progress_func)
1828 {
1829         if (!w)
1830                 return WIMLIB_ERR_INVALID_PARAM;
1831
1832         write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
1833
1834         if (!w->filename)
1835                 return WIMLIB_ERR_NO_FILENAME;
1836
1837         if (w->hdr.total_parts != 1) {
1838                 ERROR("Cannot modify a split WIM");
1839                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1840         }
1841
1842         if ((!w->deletion_occurred || (write_flags & WIMLIB_WRITE_FLAG_SOFT_DELETE))
1843             && !(write_flags & WIMLIB_WRITE_FLAG_REBUILD))
1844         {
1845                 int i, modified_image_idx;
1846                 for (i = 0; i < w->hdr.image_count && !w->image_metadata[i].modified; i++)
1847                         ;
1848                 modified_image_idx = i;
1849                 for (; i < w->hdr.image_count && w->image_metadata[i].modified &&
1850                         !w->image_metadata[i].has_been_mounted_rw; i++)
1851                         ;
1852                 if (i == w->hdr.image_count) {
1853                         return overwrite_wim_inplace(w, write_flags, num_threads,
1854                                                      progress_func,
1855                                                      modified_image_idx);
1856                 }
1857         }
1858         return overwrite_wim_via_tmpfile(w, write_flags, num_threads,
1859                                          progress_func);
1860 }