7315ec903dd513ece056448cfe99764acb01d4a1
[wimlib] / src / write.c
1 /*
2  * write.c
3  *
4  * Support for writing WIM files; write a WIM file, overwrite a WIM file, write
5  * compressed file resources, etc.
6  */
7
8 /*
9  * Copyright (C) 2010 Carl Thijssen
10  * Copyright (C) 2012 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "wimlib_internal.h"
29 #include "io.h"
30 #include "dentry.h"
31 #include "lookup_table.h"
32 #include "xml.h"
33 #include "lzx.h"
34 #include "xpress.h"
35 #include <unistd.h>
36
37 #ifdef ENABLE_MULTITHREADED_COMPRESSION
38 #include <semaphore.h>
39 #include <pthread.h>
40 #include <errno.h>
41 #endif
42
43 #ifdef WITH_NTFS_3G
44 #include <time.h>
45 #include <ntfs-3g/attrib.h>
46 #include <ntfs-3g/inode.h>
47 #include <ntfs-3g/dir.h>
48 #endif
49
50
51 #ifdef HAVE_ALLOCA_H
52 #include <alloca.h>
53 #else
54 #include <stdlib.h>
55 #endif
56
57 static int do_fflush(FILE *fp)
58 {
59         int ret = fflush(fp);
60         if (ret != 0) {
61                 ERROR_WITH_ERRNO("Failed to flush data to output WIM file");
62                 return WIMLIB_ERR_WRITE;
63         }
64         return 0;
65 }
66
67 static int fflush_and_ftruncate(FILE *fp, off_t size)
68 {
69         int ret;
70
71         ret = do_fflush(fp);
72         if (ret != 0)
73                 return ret;
74         ret = ftruncate(fileno(fp), size);
75         if (ret != 0) {
76                 ERROR_WITH_ERRNO("Failed to truncate output WIM file to "
77                                  "%"PRIu64" bytes", size);
78                 return WIMLIB_ERR_WRITE;
79         }
80         return 0;
81 }
82
83 /* Chunk table that's located at the beginning of each compressed resource in
84  * the WIM.  (This is not the on-disk format; the on-disk format just has an
85  * array of offsets.) */
86 struct chunk_table {
87         off_t file_offset;
88         u64 num_chunks;
89         u64 original_resource_size;
90         u64 bytes_per_chunk_entry;
91         u64 table_disk_size;
92         u64 cur_offset;
93         u64 *cur_offset_p;
94         u64 offsets[0];
95 };
96
97 /*
98  * Allocates and initializes a chunk table, and reserves space for it in the
99  * output file.
100  */
101 static int
102 begin_wim_resource_chunk_tab(const struct lookup_table_entry *lte,
103                              FILE *out_fp,
104                              off_t file_offset,
105                              struct chunk_table **chunk_tab_ret)
106 {
107         u64 size = wim_resource_size(lte);
108         u64 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
109         size_t alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
110         struct chunk_table *chunk_tab = CALLOC(1, alloc_size);
111         int ret;
112
113         if (!chunk_tab) {
114                 ERROR("Failed to allocate chunk table for %"PRIu64" byte "
115                       "resource", size);
116                 ret = WIMLIB_ERR_NOMEM;
117                 goto out;
118         }
119         chunk_tab->file_offset = file_offset;
120         chunk_tab->num_chunks = num_chunks;
121         chunk_tab->original_resource_size = size;
122         chunk_tab->bytes_per_chunk_entry = (size >= (1ULL << 32)) ? 8 : 4;
123         chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
124                                      (num_chunks - 1);
125         chunk_tab->cur_offset = 0;
126         chunk_tab->cur_offset_p = chunk_tab->offsets;
127
128         if (fwrite(chunk_tab, 1, chunk_tab->table_disk_size, out_fp) !=
129                    chunk_tab->table_disk_size) {
130                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
131                                  "file resource");
132                 ret = WIMLIB_ERR_WRITE;
133                 goto out;
134         }
135
136         ret = 0;
137 out:
138         *chunk_tab_ret = chunk_tab;
139         return ret;
140 }
141
142 /*
143  * Pointer to function to compresses a chunk of a WIM resource.
144  *
145  * @chunk:              Uncompressed data of the chunk.
146  * @chunk_size:         Size of the uncompressed chunk in bytes.
147  * @compressed_chunk:   Pointer to output buffer of size at least
148  *                              (@chunk_size - 1) bytes.
149  * @compressed_chunk_len_ret:   Pointer to an unsigned int into which the size
150  *                                      of the compressed chunk will be
151  *                                      returned.
152  *
153  * Returns zero if compressed succeeded, and nonzero if the chunk could not be
154  * compressed to any smaller than @chunk_size.  This function cannot fail for
155  * any other reasons.
156  */
157 typedef int (*compress_func_t)(const void *, unsigned, void *, unsigned *);
158
159 compress_func_t get_compress_func(int out_ctype)
160 {
161         if (out_ctype == WIM_COMPRESSION_TYPE_LZX)
162                 return lzx_compress;
163         else
164                 return xpress_compress;
165 }
166
167 /*
168  * Writes a chunk of a WIM resource to an output file.
169  *
170  * @chunk:        Uncompressed data of the chunk.
171  * @chunk_size:   Size of the chunk (<= WIM_CHUNK_SIZE)
172  * @out_fp:       FILE * to write tho chunk to.
173  * @out_ctype:    Compression type to use when writing the chunk (ignored if no
174  *                      chunk table provided)
175  * @chunk_tab:    Pointer to chunk table being created.  It is updated with the
176  *                      offset of the chunk we write.
177  *
178  * Returns 0 on success; nonzero on failure.
179  */
180 static int write_wim_resource_chunk(const u8 chunk[], unsigned chunk_size,
181                                     FILE *out_fp, compress_func_t compress,
182                                     struct chunk_table *chunk_tab)
183 {
184         const u8 *out_chunk;
185         unsigned out_chunk_size;
186         if (chunk_tab) {
187                 u8 *compressed_chunk = alloca(chunk_size);
188                 int ret;
189
190                 ret = compress(chunk, chunk_size, compressed_chunk,
191                                &out_chunk_size);
192                 if (ret == 0) {
193                         out_chunk = compressed_chunk;
194                 } else {
195                         out_chunk = chunk;
196                         out_chunk_size = chunk_size;
197                 }
198                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
199                 chunk_tab->cur_offset += out_chunk_size;
200         } else {
201                 out_chunk = chunk;
202                 out_chunk_size = chunk_size;
203         }
204         if (fwrite(out_chunk, 1, out_chunk_size, out_fp) != out_chunk_size) {
205                 ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
206                 return WIMLIB_ERR_WRITE;
207         }
208         return 0;
209 }
210
211 /*
212  * Finishes a WIM chunk tale and writes it to the output file at the correct
213  * offset.
214  *
215  * The final size of the full compressed resource is returned in the
216  * @compressed_size_p.
217  */
218 static int
219 finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
220                               FILE *out_fp, u64 *compressed_size_p)
221 {
222         size_t bytes_written;
223         if (fseeko(out_fp, chunk_tab->file_offset, SEEK_SET) != 0) {
224                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of output "
225                                  "WIM file", chunk_tab->file_offset);
226                 return WIMLIB_ERR_WRITE;
227         }
228
229         if (chunk_tab->bytes_per_chunk_entry == 8) {
230                 array_cpu_to_le64(chunk_tab->offsets, chunk_tab->num_chunks);
231         } else {
232                 for (u64 i = 0; i < chunk_tab->num_chunks; i++)
233                         ((u32*)chunk_tab->offsets)[i] =
234                                 cpu_to_le32(chunk_tab->offsets[i]);
235         }
236         bytes_written = fwrite((u8*)chunk_tab->offsets +
237                                         chunk_tab->bytes_per_chunk_entry,
238                                1, chunk_tab->table_disk_size, out_fp);
239         if (bytes_written != chunk_tab->table_disk_size) {
240                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
241                                  "file resource");
242                 return WIMLIB_ERR_WRITE;
243         }
244         if (fseeko(out_fp, 0, SEEK_END) != 0) {
245                 ERROR_WITH_ERRNO("Failed to seek to end of output WIM file");
246                 return WIMLIB_ERR_WRITE;
247         }
248         *compressed_size_p = chunk_tab->cur_offset + chunk_tab->table_disk_size;
249         return 0;
250 }
251
252 /* Prepare for multiple reads to a resource by caching a FILE * or NTFS
253  * attribute pointer in the lookup table entry. */
254 static int prepare_resource_for_read(struct lookup_table_entry *lte
255
256                                         #ifdef WITH_NTFS_3G
257                                         , ntfs_inode **ni_ret
258                                         #endif
259                 )
260 {
261         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
262              && !lte->file_on_disk_fp)
263         {
264                 wimlib_assert(lte->file_on_disk);
265                 lte->file_on_disk_fp = fopen(lte->file_on_disk, "rb");
266                 if (!lte->file_on_disk_fp) {
267                         ERROR_WITH_ERRNO("Failed to open the file `%s' for "
268                                          "reading", lte->file_on_disk);
269                         return WIMLIB_ERR_OPEN;
270                 }
271         }
272 #ifdef WITH_NTFS_3G
273         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME
274                   && !lte->attr)
275         {
276                 struct ntfs_location *loc = lte->ntfs_loc;
277                 ntfs_inode *ni;
278                 wimlib_assert(loc);
279                 ni = ntfs_pathname_to_inode(*loc->ntfs_vol_p, NULL, loc->path_utf8);
280                 if (!ni) {
281                         ERROR_WITH_ERRNO("Failed to open inode `%s' in NTFS "
282                                          "volume", loc->path_utf8);
283                         return WIMLIB_ERR_NTFS_3G;
284                 }
285                 lte->attr = ntfs_attr_open(ni,
286                                            loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
287                                            (ntfschar*)loc->stream_name_utf16,
288                                            loc->stream_name_utf16_num_chars);
289                 if (!lte->attr) {
290                         ERROR_WITH_ERRNO("Failed to open attribute of `%s' in "
291                                          "NTFS volume", loc->path_utf8);
292                         ntfs_inode_close(ni);
293                         return WIMLIB_ERR_NTFS_3G;
294                 }
295                 *ni_ret = ni;
296         }
297 #endif
298         return 0;
299 }
300
301 /* Undo prepare_resource_for_read() by closing the cached FILE * or NTFS
302  * attribute. */
303 static void end_wim_resource_read(struct lookup_table_entry *lte
304                                 #ifdef WITH_NTFS_3G
305                                         , ntfs_inode *ni
306                                 #endif
307                                         )
308 {
309         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
310             && lte->file_on_disk_fp) {
311                 fclose(lte->file_on_disk_fp);
312                 lte->file_on_disk_fp = NULL;
313         }
314 #ifdef WITH_NTFS_3G
315         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME) {
316                 if (lte->attr) {
317                         ntfs_attr_close(lte->attr);
318                         lte->attr = NULL;
319                 }
320                 if (ni)
321                         ntfs_inode_close(ni);
322         }
323 #endif
324 }
325
326 /*
327  * Writes a WIM resource to a FILE * opened for writing.  The resource may be
328  * written uncompressed or compressed depending on the @out_ctype parameter.
329  *
330  * If by chance the resource compresses to more than the original size (this may
331  * happen with random data or files than are pre-compressed), the resource is
332  * instead written uncompressed (and this is reflected in the @out_res_entry by
333  * removing the WIM_RESHDR_FLAG_COMPRESSED flag).
334  *
335  * @lte:        The lookup table entry for the WIM resource.
336  * @out_fp:     The FILE * to write the resource to.
337  * @out_ctype:  The compression type of the resource to write.  Note: if this is
338  *                      the same as the compression type of the WIM resource we
339  *                      need to read, we simply copy the data (i.e. we do not
340  *                      uncompress it, then compress it again).
341  * @out_res_entry:  If non-NULL, a resource entry that is filled in with the
342  *                  offset, original size, compressed size, and compression flag
343  *                  of the output resource.
344  *
345  * Returns 0 on success; nonzero on failure.
346  */
347 int write_wim_resource(struct lookup_table_entry *lte,
348                        FILE *out_fp, int out_ctype,
349                        struct resource_entry *out_res_entry,
350                        int flags)
351 {
352         u64 bytes_remaining;
353         u64 original_size;
354         u64 old_compressed_size;
355         u64 new_compressed_size;
356         u64 offset;
357         int ret;
358         struct chunk_table *chunk_tab = NULL;
359         bool raw;
360         off_t file_offset;
361         compress_func_t compress;
362 #ifdef WITH_NTFS_3G
363         ntfs_inode *ni = NULL;
364 #endif
365
366         wimlib_assert(lte);
367
368         /* Original size of the resource */
369         original_size = wim_resource_size(lte);
370
371         /* Compressed size of the resource (as it exists now) */
372         old_compressed_size = wim_resource_compressed_size(lte);
373
374         /* Current offset in output file */
375         file_offset = ftello(out_fp);
376         if (file_offset == -1) {
377                 ERROR_WITH_ERRNO("Failed to get offset in output "
378                                  "stream");
379                 return WIMLIB_ERR_WRITE;
380         }
381
382         /* Are the compression types the same?  If so, do a raw copy (copy
383          * without decompressing and recompressing the data). */
384         raw = (wim_resource_compression_type(lte) == out_ctype
385                && out_ctype != WIM_COMPRESSION_TYPE_NONE);
386
387         if (raw) {
388                 flags |= WIMLIB_RESOURCE_FLAG_RAW;
389                 bytes_remaining = old_compressed_size;
390         } else {
391                 flags &= ~WIMLIB_RESOURCE_FLAG_RAW;
392                 bytes_remaining = original_size;
393         }
394
395         /* Empty resource; nothing needs to be done, so just return success. */
396         if (bytes_remaining == 0)
397                 return 0;
398
399         /* Buffer for reading chunks for the resource */
400         u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
401
402         /* If we are writing a compressed resource and not doing a raw copy, we
403          * need to initialize the chunk table */
404         if (out_ctype != WIM_COMPRESSION_TYPE_NONE && !raw) {
405                 ret = begin_wim_resource_chunk_tab(lte, out_fp, file_offset,
406                                                    &chunk_tab);
407                 if (ret != 0)
408                         goto out;
409         }
410
411         /* If the WIM resource is in an external file, open a FILE * to it so we
412          * don't have to open a temporary one in read_wim_resource() for each
413          * chunk. */
414 #ifdef WITH_NTFS_3G
415         ret = prepare_resource_for_read(lte, &ni);
416 #else
417         ret = prepare_resource_for_read(lte);
418 #endif
419         if (ret != 0)
420                 goto out;
421
422         /* If we aren't doing a raw copy, we will compute the SHA1 message
423          * digest of the resource as we read it, and verify it's the same as the
424          * hash given in the lookup table entry once we've finished reading the
425          * resource. */
426         SHA_CTX ctx;
427         if (!raw) {
428                 sha1_init(&ctx);
429                 compress = get_compress_func(out_ctype);
430         }
431         offset = 0;
432
433         /* While there are still bytes remaining in the WIM resource, read a
434          * chunk of the resource, update SHA1, then write that chunk using the
435          * desired compression type. */
436         do {
437                 u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
438                 ret = read_wim_resource(lte, buf, to_read, offset, flags);
439                 if (ret != 0)
440                         goto out_fclose;
441                 if (!raw)
442                         sha1_update(&ctx, buf, to_read);
443                 ret = write_wim_resource_chunk(buf, to_read, out_fp,
444                                                compress, chunk_tab);
445                 if (ret != 0)
446                         goto out_fclose;
447                 bytes_remaining -= to_read;
448                 offset += to_read;
449         } while (bytes_remaining);
450
451         /* Raw copy:  The new compressed size is the same as the old compressed
452          * size
453          *
454          * Using WIM_COMPRESSION_TYPE_NONE:  The new compressed size is the
455          * original size
456          *
457          * Using a different compression type:  Call
458          * finish_wim_resource_chunk_tab() and it will provide the new
459          * compressed size.
460          */
461         if (raw) {
462                 new_compressed_size = old_compressed_size;
463         } else {
464                 if (out_ctype == WIM_COMPRESSION_TYPE_NONE)
465                         new_compressed_size = original_size;
466                 else {
467                         ret = finish_wim_resource_chunk_tab(chunk_tab, out_fp,
468                                                             &new_compressed_size);
469                         if (ret != 0)
470                                 goto out_fclose;
471                 }
472         }
473
474         /* Verify SHA1 message digest of the resource, unless we are doing a raw
475          * write (in which case we never even saw the uncompressed data).  Or,
476          * if the hash we had before is all 0's, just re-set it to be the new
477          * hash. */
478         if (!raw) {
479                 u8 md[SHA1_HASH_SIZE];
480                 sha1_final(md, &ctx);
481                 if (is_zero_hash(lte->hash)) {
482                         copy_hash(lte->hash, md);
483                 } else if (!hashes_equal(md, lte->hash)) {
484                         ERROR("WIM resource has incorrect hash!");
485                         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
486                                 ERROR("We were reading it from `%s'; maybe it changed "
487                                       "while we were reading it.",
488                                       lte->file_on_disk);
489                         }
490                         ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
491                         goto out_fclose;
492                 }
493         }
494
495         if (!raw && new_compressed_size >= original_size &&
496             out_ctype != WIM_COMPRESSION_TYPE_NONE)
497         {
498                 /* Oops!  We compressed the resource to larger than the original
499                  * size.  Write the resource uncompressed instead. */
500                 if (fseeko(out_fp, file_offset, SEEK_SET) != 0) {
501                         ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
502                                          "of output WIM file", file_offset);
503                         ret = WIMLIB_ERR_WRITE;
504                         goto out_fclose;
505                 }
506                 ret = write_wim_resource(lte, out_fp, WIM_COMPRESSION_TYPE_NONE,
507                                          out_res_entry, flags);
508                 if (ret != 0)
509                         goto out_fclose;
510
511                 ret = fflush_and_ftruncate(out_fp, file_offset + out_res_entry->size);
512                 if (ret != 0)
513                         goto out_fclose;
514         } else {
515                 if (out_res_entry) {
516                         out_res_entry->size          = new_compressed_size;
517                         out_res_entry->original_size = original_size;
518                         out_res_entry->offset        = file_offset;
519                         out_res_entry->flags         = lte->resource_entry.flags
520                                                         & ~WIM_RESHDR_FLAG_COMPRESSED;
521                         if (out_ctype != WIM_COMPRESSION_TYPE_NONE)
522                                 out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
523                 }
524         }
525         ret = 0;
526 out_fclose:
527 #ifdef WITH_NTFS_3G
528         end_wim_resource_read(lte, ni);
529 #else
530         end_wim_resource_read(lte);
531 #endif
532 out:
533         FREE(chunk_tab);
534         return ret;
535 }
536
537 #ifdef ENABLE_MULTITHREADED_COMPRESSION
538 struct shared_queue {
539         sem_t filled_slots;
540         sem_t empty_slots;
541         pthread_mutex_t lock;
542         unsigned front;
543         unsigned back;
544         void **array;
545         unsigned size;
546 };
547
548 static int shared_queue_init(struct shared_queue *q, unsigned size)
549 {
550         q->array = CALLOC(sizeof(q->array[0]), size);
551         if (!q->array)
552                 return WIMLIB_ERR_NOMEM;
553
554         sem_init(&q->filled_slots, 0, 0);
555         sem_init(&q->empty_slots, 0, size);
556         pthread_mutex_init(&q->lock, NULL);
557         q->front = 0;
558         q->back = size - 1;
559         q->size = size;
560         return 0;
561 }
562
563 static void shared_queue_destroy(struct shared_queue *q)
564 {
565         sem_destroy(&q->filled_slots);
566         sem_destroy(&q->empty_slots);
567         pthread_mutex_destroy(&q->lock);
568         FREE(q->array);
569 }
570
571 static void shared_queue_put(struct shared_queue *q, void *obj)
572 {
573         sem_wait(&q->empty_slots);
574         pthread_mutex_lock(&q->lock);
575
576         q->back = (q->back + 1) % q->size;
577         q->array[q->back] = obj;
578
579         sem_post(&q->filled_slots);
580         pthread_mutex_unlock(&q->lock);
581 }
582
583 static void *shared_queue_get(struct shared_queue *q)
584 {
585         sem_wait(&q->filled_slots);
586         pthread_mutex_lock(&q->lock);
587
588         void *obj = q->array[q->front];
589         q->array[q->front] = NULL;
590         q->front = (q->front + 1) % q->size;
591
592         sem_post(&q->empty_slots);
593         pthread_mutex_unlock(&q->lock);
594         return obj;
595 }
596
597 struct compressor_thread_params {
598         struct shared_queue *res_to_compress_queue;
599         struct shared_queue *compressed_res_queue;
600         compress_func_t compress;
601 };
602
603 #define MAX_CHUNKS_PER_MSG 2
604
605 struct message {
606         struct lookup_table_entry *lte;
607         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
608         u8 *out_compressed_chunks[MAX_CHUNKS_PER_MSG];
609         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
610         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
611         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
612         unsigned num_chunks;
613         struct list_head list;
614         bool complete;
615         u64 begin_chunk;
616 };
617
618 static void compress_chunks(struct message *msg, compress_func_t compress)
619 {
620         for (unsigned i = 0; i < msg->num_chunks; i++) {
621                 DEBUG2("compress chunk %u of %u", i, msg->num_chunks);
622                 int ret = compress(msg->uncompressed_chunks[i],
623                                    msg->uncompressed_chunk_sizes[i],
624                                    msg->compressed_chunks[i],
625                                    &msg->compressed_chunk_sizes[i]);
626                 if (ret == 0) {
627                         msg->out_compressed_chunks[i] = msg->compressed_chunks[i];
628                 } else {
629                         msg->out_compressed_chunks[i] = msg->uncompressed_chunks[i];
630                         msg->compressed_chunk_sizes[i] = msg->uncompressed_chunk_sizes[i];
631                 }
632         }
633 }
634
635 static void *compressor_thread_proc(void *arg)
636 {
637         struct compressor_thread_params *params = arg;
638         struct shared_queue *res_to_compress_queue = params->res_to_compress_queue;
639         struct shared_queue *compressed_res_queue = params->compressed_res_queue;
640         compress_func_t compress = params->compress;
641         struct message *msg;
642
643         DEBUG("Compressor thread ready");
644         while ((msg = shared_queue_get(res_to_compress_queue)) != NULL) {
645                 compress_chunks(msg, compress);
646                 shared_queue_put(compressed_res_queue, msg);
647         }
648         DEBUG("Compressor thread terminating");
649 }
650 #endif
651
652 static void show_stream_write_progress(u64 *cur_size, u64 *next_size,
653                                        u64 total_size, u64 one_percent,
654                                        unsigned *cur_percent,
655                                        const struct lookup_table_entry *cur_lte)
656 {
657         if (*cur_size >= *next_size) {
658                 printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
659                        "(uncompressed) written (%u%% done)",
660                        *cur_size >> 20,
661                        total_size >> 20, *cur_percent);
662                 fflush(stdout);
663                 *next_size += one_percent;
664                 (*cur_percent)++;
665         }
666         *cur_size += wim_resource_size(cur_lte);
667 }
668
669 static void finish_stream_write_progress(u64 total_size)
670 {
671         printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
672                "(uncompressed) written (100%% done)\n",
673                total_size >> 20, total_size >> 20);
674         fflush(stdout);
675 }
676
677 static int write_stream_list_serial(struct list_head *stream_list,
678                                     FILE *out_fp, int out_ctype,
679                                     int write_flags, u64 total_size)
680 {
681         struct lookup_table_entry *lte;
682         int ret;
683
684         u64 one_percent = total_size / 100;
685         u64 cur_size = 0;
686         u64 next_size = 0;
687         unsigned cur_percent = 0;
688
689         list_for_each_entry(lte, stream_list, staging_list) {
690                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
691                         show_stream_write_progress(&cur_size, &next_size,
692                                                    total_size, one_percent,
693                                                    &cur_percent, lte);
694                 }
695                 ret = write_wim_resource(lte, out_fp, out_ctype,
696                                          &lte->output_resource_entry, 0);
697                 if (ret != 0)
698                         return ret;
699         }
700         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
701                 finish_stream_write_progress(total_size);
702         return 0;
703 }
704
705 #ifdef ENABLE_MULTITHREADED_COMPRESSION
706 static int write_wim_chunks(struct message *msg, FILE *out_fp,
707                             struct chunk_table *chunk_tab)
708 {
709         for (unsigned i = 0; i < msg->num_chunks; i++) {
710                 unsigned chunk_csize = msg->compressed_chunk_sizes[i];
711
712                 DEBUG2("Write wim chunk %u of %u (csize = %u)",
713                       i, msg->num_chunks, chunk_csize);
714
715                 if (fwrite(msg->out_compressed_chunks[i], 1, chunk_csize, out_fp)
716                     != chunk_csize)
717                 {
718                         ERROR_WITH_ERRNO("Failed to write WIM chunk");
719                         return WIMLIB_ERR_WRITE;
720                 }
721
722                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
723                 chunk_tab->cur_offset += chunk_csize;
724         }
725         return 0;
726 }
727
728 /*
729  * This function is executed by the main thread when the resources are being
730  * compressed in parallel.  The main thread is in change of all reading of the
731  * uncompressed data and writing of the compressed data.  The compressor threads
732  * *only* do compression from/to in-memory buffers.
733  *
734  * Each unit of work given to a compressor thread is up to MAX_CHUNKS_PER_MSG
735  * chunks of compressed data to compress, represented in a `struct message'.
736  * Each message is passed from the main thread to a worker thread through the
737  * res_to_compress_queue, and it is passed back through the
738  * compressed_res_queue.
739  */
740 static int main_writer_thread_proc(struct list_head *stream_list,
741                                    FILE *out_fp,
742                                    int out_ctype,
743                                    struct shared_queue *res_to_compress_queue,
744                                    struct shared_queue *compressed_res_queue,
745                                    size_t queue_size,
746                                    int write_flags,
747                                    u64 total_size)
748 {
749         int ret;
750
751
752         struct message msgs[queue_size];
753         ZERO_ARRAY(msgs);
754
755         // Initially, all the messages are available to use.
756         LIST_HEAD(available_msgs);
757         for (size_t i = 0; i < ARRAY_LEN(msgs); i++)
758                 list_add(&msgs[i].list, &available_msgs);
759
760         // outstanding_resources is the list of resources that currently have
761         // had chunks sent off for compression.
762         //
763         // The first stream in outstanding_resources is the stream that is
764         // currently being written (cur_lte).
765         //
766         // The last stream in outstanding_resources is the stream that is
767         // currently being read and chunks fed to the compressor threads
768         // (next_lte).
769         //
770         // Depending on the number of threads and the sizes of the resource,
771         // the outstanding streams list may contain streams between cur_lte and
772         // next_lte that have all their chunks compressed or being compressed,
773         // but haven't been written yet.
774         //
775         LIST_HEAD(outstanding_resources);
776         struct list_head *next_resource = stream_list->next;
777         struct lookup_table_entry *next_lte = container_of(next_resource,
778                                                            struct lookup_table_entry,
779                                                            staging_list);
780         next_resource = next_resource->next;
781         u64 next_chunk = 0;
782         u64 next_num_chunks = wim_resource_chunks(next_lte);
783         INIT_LIST_HEAD(&next_lte->msg_list);
784         list_add_tail(&next_lte->staging_list, &outstanding_resources);
785
786         // As in write_wim_resource(), each resource we read is checksummed.
787         SHA_CTX next_sha_ctx;
788         sha1_init(&next_sha_ctx);
789         u8 next_hash[SHA1_HASH_SIZE];
790
791         // Resources that don't need any chunks compressed are added to this
792         // list and written directly by the main thread.
793         LIST_HEAD(my_resources);
794
795         struct lookup_table_entry *cur_lte = next_lte;
796         struct chunk_table *cur_chunk_tab = NULL;
797         struct lookup_table_entry *lte;
798         struct message *msg;
799
800         u64 one_percent = total_size / 100;
801         u64 cur_size = 0;
802         u64 next_size = 0;
803         unsigned cur_percent = 0;
804
805 #ifdef WITH_NTFS_3G
806         ntfs_inode *ni = NULL;
807 #endif
808
809 #ifdef WITH_NTFS_3G
810         ret = prepare_resource_for_read(next_lte, &ni);
811 #else
812         ret = prepare_resource_for_read(next_lte);
813 #endif
814         if (ret != 0)
815                 goto out;
816
817         DEBUG("Initializing buffers for uncompressed "
818               "and compressed data (%zu bytes needed)",
819               queue_size * MAX_CHUNKS_PER_MSG * WIM_CHUNK_SIZE * 2);
820
821         // Pre-allocate all the buffers that will be needed to do the chunk
822         // compression.
823         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
824                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
825                         msgs[i].compressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
826                         msgs[i].uncompressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
827                         if (msgs[i].compressed_chunks[j] == NULL ||
828                             msgs[i].uncompressed_chunks[j] == NULL)
829                         {
830                                 ERROR("Could not allocate enough memory for "
831                                       "multi-threaded compression");
832                                 ret = WIMLIB_ERR_NOMEM;
833                                 goto out;
834                         }
835                 }
836         }
837
838         // This loop is executed until all resources have been written, except
839         // possibly a few that have been added to the @my_resources list for
840         // writing later.
841         while (1) {
842                 // Send chunks to the compressor threads until either (a) there
843                 // are no more messages available since they were all sent off,
844                 // or (b) there are no more resources that need to be
845                 // compressed.
846                 while (!list_empty(&available_msgs) && next_lte != NULL) {
847
848                         // Get a message from the available messages
849                         // list
850                         msg = container_of(available_msgs.next,
851                                            struct message,
852                                            list);
853
854                         // ... and delete it from the available messages
855                         // list
856                         list_del(&msg->list);
857
858                         // Initialize the message with the chunks to
859                         // compress.
860                         msg->num_chunks = min(next_num_chunks - next_chunk,
861                                               MAX_CHUNKS_PER_MSG);
862                         msg->lte = next_lte;
863                         msg->complete = false;
864                         msg->begin_chunk = next_chunk;
865
866                         unsigned size = WIM_CHUNK_SIZE;
867                         for (unsigned i = 0; i < msg->num_chunks; i++) {
868
869                                 // Read chunk @next_chunk of the stream into the
870                                 // message so that a compressor thread can
871                                 // compress it.
872
873                                 if (next_chunk == next_num_chunks - 1 &&
874                                      wim_resource_size(next_lte) % WIM_CHUNK_SIZE != 0)
875                                 {
876                                         size = wim_resource_size(next_lte) % WIM_CHUNK_SIZE;
877                                 }
878
879
880                                 DEBUG2("Read resource (size=%u, offset=%zu)",
881                                       size, next_chunk * WIM_CHUNK_SIZE);
882
883                                 msg->uncompressed_chunk_sizes[i] = size;
884
885                                 ret = read_wim_resource(next_lte,
886                                                         msg->uncompressed_chunks[i],
887                                                         size,
888                                                         next_chunk * WIM_CHUNK_SIZE,
889                                                         0);
890                                 if (ret != 0)
891                                         goto out;
892                                 sha1_update(&next_sha_ctx,
893                                             msg->uncompressed_chunks[i], size);
894                                 next_chunk++;
895                         }
896
897                         // Send the compression request
898                         list_add_tail(&msg->list, &next_lte->msg_list);
899                         shared_queue_put(res_to_compress_queue, msg);
900                         DEBUG2("Compression request sent");
901
902                         if (next_chunk != next_num_chunks)
903                                 // More chunks to send for this resource
904                                 continue;
905
906                         // Done sending compression requests for a resource!
907                         // Check the SHA1 message digest.
908                         DEBUG2("Finalize SHA1 md (next_num_chunks=%zu)", next_num_chunks);
909                         sha1_final(next_hash, &next_sha_ctx);
910                         if (!hashes_equal(next_lte->hash, next_hash)) {
911                                 ERROR("WIM resource has incorrect hash!");
912                                 if (next_lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
913                                         ERROR("We were reading it from `%s'; maybe it changed "
914                                               "while we were reading it.",
915                                               next_lte->file_on_disk);
916                                 }
917                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
918                                 goto out;
919                         }
920
921                         // Advance to the next resource.
922                         //
923                         // If the next resource needs no compression, just write
924                         // it with this thread (not now though--- we could be in
925                         // the middle of writing another resource.)  Keep doing
926                         // this until we either get to the end of the resources
927                         // list, or we get to a resource that needs compression.
928
929                         while (1) {
930                                 if (next_resource == stream_list) {
931                                         next_lte = NULL;
932                                         break;
933                                 }
934                         #ifdef WITH_NTFS_3G
935                                 end_wim_resource_read(next_lte, ni);
936                                 ni = NULL;
937                         #else
938                                 end_wim_resource_read(next_lte);
939                         #endif
940
941                                 next_lte = container_of(next_resource,
942                                                         struct lookup_table_entry,
943                                                         staging_list);
944                                 next_resource = next_resource->next;
945                                 if ((next_lte->resource_location == RESOURCE_IN_WIM
946                                     && wimlib_get_compression_type(next_lte->wim) == out_ctype)
947                                     || wim_resource_size(next_lte) == 0)
948                                 {
949                                         list_add_tail(&next_lte->staging_list,
950                                                       &my_resources);
951                                 } else {
952                                         list_add_tail(&next_lte->staging_list,
953                                                       &outstanding_resources);
954                                         next_chunk = 0;
955                                         next_num_chunks = wim_resource_chunks(next_lte);
956                                         sha1_init(&next_sha_ctx);
957                                         INIT_LIST_HEAD(&next_lte->msg_list);
958                                 #ifdef WITH_NTFS_3G
959                                         ret = prepare_resource_for_read(next_lte, &ni);
960                                 #else
961                                         ret = prepare_resource_for_read(next_lte);
962                                 #endif
963                                         if (ret != 0)
964                                                 goto out;
965                                         DEBUG2("Updated next_lte");
966                                         break;
967                                 }
968                         }
969                 }
970
971                 // If there are no outstanding resources, there are no more
972                 // resources that need to be written.
973                 if (list_empty(&outstanding_resources)) {
974                         DEBUG("No outstanding resources! Done");
975                         ret = 0;
976                         goto out;
977                 }
978
979                 // Get the next message from the queue and process it.
980                 // The message will contain 1 or more data chunks that have been
981                 // compressed.
982                 DEBUG2("Waiting for message");
983                 msg = shared_queue_get(compressed_res_queue);
984                 msg->complete = true;
985
986                 DEBUG2("Received msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
987
988                 list_for_each_entry(msg, &cur_lte->msg_list, list) {
989                         DEBUG2("complete=%d", msg->complete);
990                 }
991
992                 // Is this the next chunk in the current resource?  If it's not
993                 // (i.e., an earlier chunk in a same or different resource
994                 // hasn't been compressed yet), do nothing, and keep this
995                 // message around until all earlier chunks are received.
996                 //
997                 // Otherwise, write all the chunks we can.
998                 while (!list_empty(&cur_lte->msg_list)
999                         && (msg = container_of(cur_lte->msg_list.next,
1000                                                struct message,
1001                                                list))->complete)
1002                 {
1003                         DEBUG2("Complete msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1004                         if (msg->begin_chunk == 0) {
1005                                 DEBUG2("Begin chunk tab");
1006                                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1007                                         show_stream_write_progress(&cur_size,
1008                                                                    &next_size,
1009                                                                    total_size,
1010                                                                    one_percent,
1011                                                                    &cur_percent,
1012                                                                    cur_lte);
1013                                 }
1014
1015                                 // This is the first set of chunks.  Leave space
1016                                 // for the chunk table in the output file.
1017                                 off_t cur_offset = ftello(out_fp);
1018                                 if (cur_offset == -1) {
1019                                         ret = WIMLIB_ERR_WRITE;
1020                                         goto out;
1021                                 }
1022                                 ret = begin_wim_resource_chunk_tab(cur_lte,
1023                                                                    out_fp,
1024                                                                    cur_offset,
1025                                                                    &cur_chunk_tab);
1026                                 if (ret != 0)
1027                                         goto out;
1028                         }
1029
1030                         // Write the compressed chunks from the message.
1031                         ret = write_wim_chunks(msg, out_fp, cur_chunk_tab);
1032                         if (ret != 0)
1033                                 goto out;
1034
1035                         list_del(&msg->list);
1036
1037                         // This message is available to use for different chunks
1038                         // now.
1039                         list_add(&msg->list, &available_msgs);
1040
1041                         // Was this the last chunk of the stream?  If so,
1042                         // finish it.
1043                         if (list_empty(&cur_lte->msg_list) &&
1044                             msg->begin_chunk + msg->num_chunks == cur_chunk_tab->num_chunks)
1045                         {
1046                                 DEBUG2("Finish wim chunk tab");
1047                                 u64 res_csize;
1048                                 ret = finish_wim_resource_chunk_tab(cur_chunk_tab,
1049                                                                     out_fp,
1050                                                                     &res_csize);
1051                                 if (ret != 0)
1052                                         goto out;
1053
1054
1055                                 cur_lte->output_resource_entry.size =
1056                                         res_csize;
1057
1058                                 cur_lte->output_resource_entry.original_size =
1059                                         cur_lte->resource_entry.original_size;
1060
1061                                 cur_lte->output_resource_entry.offset =
1062                                         cur_chunk_tab->file_offset;
1063
1064                                 cur_lte->output_resource_entry.flags =
1065                                         cur_lte->resource_entry.flags |
1066                                                 WIM_RESHDR_FLAG_COMPRESSED;
1067
1068                                 FREE(cur_chunk_tab);
1069                                 cur_chunk_tab = NULL;
1070
1071                                 struct list_head *next = cur_lte->staging_list.next;
1072                                 list_del(&cur_lte->staging_list);
1073
1074                                 if (next == &outstanding_resources) {
1075                                         DEBUG("No more outstanding resources");
1076                                         ret = 0;
1077                                         goto out;
1078                                 } else {
1079                                         cur_lte = container_of(cur_lte->staging_list.next,
1080                                                                struct lookup_table_entry,
1081                                                                staging_list);
1082                                 }
1083
1084                                 // Since we just finished writing a stream,
1085                                 // write any streams that have been added to the
1086                                 // my_resources list for direct writing by the
1087                                 // main thread (e.g. resources that don't need
1088                                 // to be compressed because the desired
1089                                 // compression type is the same as the previous
1090                                 // compression type).
1091                                 struct lookup_table_entry *tmp;
1092                                 list_for_each_entry_safe(lte,
1093                                                          tmp,
1094                                                          &my_resources,
1095                                                          staging_list)
1096                                 {
1097                                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1098                                                 show_stream_write_progress(&cur_size,
1099                                                                            &next_size,
1100                                                                            total_size,
1101                                                                            one_percent,
1102                                                                            &cur_percent,
1103                                                                            lte);
1104                                         }
1105
1106                                         ret = write_wim_resource(lte,
1107                                                                  out_fp,
1108                                                                  out_ctype,
1109                                                                  &lte->output_resource_entry,
1110                                                                  0);
1111                                         list_del(&lte->staging_list);
1112                                         if (ret != 0)
1113                                                 goto out;
1114                                 }
1115                         }
1116                 }
1117         }
1118
1119 out:
1120 #ifdef WITH_NTFS_3G
1121         end_wim_resource_read(cur_lte, ni);
1122 #else
1123         end_wim_resource_read(cur_lte);
1124 #endif
1125         if (ret == 0) {
1126                 list_for_each_entry(lte, &my_resources, staging_list) {
1127                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1128                                 show_stream_write_progress(&cur_size,
1129                                                            &next_size,
1130                                                            total_size,
1131                                                            one_percent,
1132                                                            &cur_percent,
1133                                                            lte);
1134                         }
1135                         ret = write_wim_resource(lte, out_fp,
1136                                                  out_ctype,
1137                                                  &lte->output_resource_entry,
1138                                                  0);
1139                         if (ret != 0)
1140                                 break;
1141                 }
1142                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1143                         finish_stream_write_progress(total_size);
1144         } else {
1145                 size_t num_available_msgs = 0;
1146                 struct list_head *cur;
1147
1148                 list_for_each(cur, &available_msgs) {
1149                         num_available_msgs++;
1150                 }
1151
1152                 while (num_available_msgs < ARRAY_LEN(msgs)) {
1153                         shared_queue_get(compressed_res_queue);
1154                         num_available_msgs++;
1155                 }
1156         }
1157
1158         DEBUG("Freeing messages");
1159
1160         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1161                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1162                         FREE(msgs[i].compressed_chunks[j]);
1163                         FREE(msgs[i].uncompressed_chunks[j]);
1164                 }
1165         }
1166
1167         if (cur_chunk_tab != NULL)
1168                 FREE(cur_chunk_tab);
1169         return ret;
1170 }
1171
1172
1173 static const char *get_data_type(int ctype)
1174 {
1175         switch (ctype) {
1176         case WIM_COMPRESSION_TYPE_NONE:
1177                 return "uncompressed";
1178         case WIM_COMPRESSION_TYPE_LZX:
1179                 return "LZX-compressed";
1180         case WIM_COMPRESSION_TYPE_XPRESS:
1181                 return "XPRESS-compressed";
1182         }
1183 }
1184
1185 static int write_stream_list_parallel(struct list_head *stream_list,
1186                                       FILE *out_fp, int out_ctype,
1187                                       int write_flags, u64 total_size,
1188                                       unsigned num_threads)
1189 {
1190         int ret;
1191         struct shared_queue res_to_compress_queue;
1192         struct shared_queue compressed_res_queue;
1193         pthread_t *compressor_threads = NULL;
1194
1195         if (num_threads == 0) {
1196                 long nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1197                 if (nthreads < 1) {
1198                         WARNING("Could not determine number of processors! Assuming 1");
1199                         goto out_serial;
1200                 } else {
1201                         num_threads = nthreads;
1202                 }
1203         }
1204
1205         wimlib_assert(stream_list->next != stream_list);
1206
1207         static const double MESSAGES_PER_THREAD = 2.0;
1208         size_t queue_size = (size_t)(num_threads * MESSAGES_PER_THREAD);
1209
1210         DEBUG("Initializing shared queues (queue_size=%zu)", queue_size);
1211
1212         ret = shared_queue_init(&res_to_compress_queue, queue_size);
1213         if (ret != 0)
1214                 goto out_serial;
1215
1216         ret = shared_queue_init(&compressed_res_queue, queue_size);
1217         if (ret != 0)
1218                 goto out_destroy_res_to_compress_queue;
1219
1220         struct compressor_thread_params params;
1221         params.res_to_compress_queue = &res_to_compress_queue;
1222         params.compressed_res_queue = &compressed_res_queue;
1223         params.compress = get_compress_func(out_ctype);
1224
1225         compressor_threads = MALLOC(num_threads * sizeof(pthread_t));
1226
1227         for (unsigned i = 0; i < num_threads; i++) {
1228                 DEBUG("pthread_create thread %u", i);
1229                 ret = pthread_create(&compressor_threads[i], NULL,
1230                                      compressor_thread_proc, &params);
1231                 if (ret != 0) {
1232                         ret = -1;
1233                         ERROR_WITH_ERRNO("Failed to create compressor "
1234                                          "thread %u", i);
1235                         num_threads = i;
1236                         goto out_join;
1237                 }
1238         }
1239
1240         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1241                 printf("Writing %s compressed data using %u threads...\n",
1242                        get_data_type(out_ctype), num_threads);
1243         }
1244
1245         ret = main_writer_thread_proc(stream_list,
1246                                       out_fp,
1247                                       out_ctype,
1248                                       &res_to_compress_queue,
1249                                       &compressed_res_queue,
1250                                       queue_size,
1251                                       write_flags,
1252                                       total_size);
1253
1254 out_join:
1255         for (unsigned i = 0; i < num_threads; i++)
1256                 shared_queue_put(&res_to_compress_queue, NULL);
1257
1258         for (unsigned i = 0; i < num_threads; i++) {
1259                 if (pthread_join(compressor_threads[i], NULL)) {
1260                         WARNING("Failed to join compressor thread %u: %s",
1261                                 i, strerror(errno));
1262                 }
1263         }
1264         FREE(compressor_threads);
1265         shared_queue_destroy(&compressed_res_queue);
1266 out_destroy_res_to_compress_queue:
1267         shared_queue_destroy(&res_to_compress_queue);
1268         if (ret >= 0 && ret != WIMLIB_ERR_NOMEM)
1269                 return ret;
1270 out_serial:
1271         WARNING("Falling back to single-threaded compression");
1272         return write_stream_list_serial(stream_list, out_fp,
1273                                         out_ctype, write_flags, total_size);
1274 }
1275 #endif
1276
1277 /*
1278  * Write a list of streams to a WIM (@out_fp) using the compression type
1279  * @out_ctype and up to @num_threads compressor threads.
1280  */
1281 static int write_stream_list(struct list_head *stream_list, FILE *out_fp,
1282                              int out_ctype, int write_flags,
1283                              unsigned num_threads)
1284 {
1285         struct lookup_table_entry *lte;
1286         size_t num_streams = 0;
1287         u64 total_size = 0;
1288         bool compression_needed = false;
1289
1290         list_for_each_entry(lte, stream_list, staging_list) {
1291                 num_streams++;
1292                 total_size += wim_resource_size(lte);
1293                 if (!compression_needed
1294                     && out_ctype != WIM_COMPRESSION_TYPE_NONE
1295                     && (lte->resource_location != RESOURCE_IN_WIM
1296                         || wimlib_get_compression_type(lte->wim) != out_ctype)
1297                     && wim_resource_size(lte) != 0)
1298                         compression_needed = true;
1299         }
1300
1301         if (num_streams == 0) {
1302                 if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE)
1303                         printf("No streams to write\n");
1304                 return 0;
1305         }
1306
1307         if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE) {
1308                 printf("Preparing to write %zu streams "
1309                        "(%"PRIu64" total bytes uncompressed)\n",
1310                        num_streams, total_size);
1311                 printf("Using compression type %s\n",
1312                        wimlib_get_compression_type_string(out_ctype));
1313         }
1314
1315 #ifdef ENABLE_MULTITHREADED_COMPRESSION
1316         if (compression_needed && total_size >= 1000000 && num_threads != 1) {
1317                 return write_stream_list_parallel(stream_list, out_fp,
1318                                                   out_ctype, write_flags,
1319                                                   total_size, num_threads);
1320         }
1321         else
1322 #endif
1323         {
1324                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1325                         const char *reason = "";
1326                         if (!compression_needed)
1327                                 reason = " (no compression needed)";
1328                         printf("Writing %s data using 1 thread%s\n",
1329                                get_data_type(out_ctype), reason);
1330                 }
1331
1332                 return write_stream_list_serial(stream_list, out_fp,
1333                                                 out_ctype, write_flags,
1334                                                 total_size);
1335         }
1336 }
1337
1338
1339 static int dentry_find_streams_to_write(struct dentry *dentry,
1340                                         void *wim)
1341 {
1342         WIMStruct *w = wim;
1343         struct list_head *stream_list = w->private;
1344         struct lookup_table_entry *lte;
1345         for (unsigned i = 0; i <= dentry->d_inode->num_ads; i++) {
1346                 lte = inode_stream_lte(dentry->d_inode, i, w->lookup_table);
1347                 if (lte && ++lte->out_refcnt == 1)
1348                         list_add_tail(&lte->staging_list, stream_list);
1349         }
1350         return 0;
1351 }
1352
1353 static int find_streams_to_write(WIMStruct *w)
1354 {
1355         return for_dentry_in_tree(wim_root_dentry(w),
1356                                   dentry_find_streams_to_write, w);
1357 }
1358
1359 static int write_wim_streams(WIMStruct *w, int image, int write_flags,
1360                              unsigned num_threads)
1361 {
1362
1363         for_lookup_table_entry(w->lookup_table, lte_zero_out_refcnt, NULL);
1364         LIST_HEAD(stream_list);
1365         w->private = &stream_list;
1366         for_image(w, image, find_streams_to_write);
1367         return write_stream_list(&stream_list, w->out_fp,
1368                                  wimlib_get_compression_type(w), write_flags,
1369                                  num_threads);
1370 }
1371
1372 /*
1373  * Write the lookup table, xml data, and integrity table, then overwrite the WIM
1374  * header.
1375  */
1376 int finish_write(WIMStruct *w, int image, int write_flags)
1377 {
1378         int ret;
1379         struct wim_header hdr;
1380         FILE *out = w->out_fp;
1381
1382         /* @hdr will be the header for the new WIM.  First copy all the data
1383          * from the header in the WIMStruct; then set all the fields that may
1384          * have changed, including the resource entries, boot index, and image
1385          * count.  */
1386         memcpy(&hdr, &w->hdr, sizeof(struct wim_header));
1387
1388         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1389                 ret = write_lookup_table(w->lookup_table, out, &hdr.lookup_table_res_entry);
1390                 if (ret != 0)
1391                         goto out;
1392         }
1393
1394         ret = write_xml_data(w->wim_info, image, out,
1395                              (write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE) ?
1396                               wim_info_get_total_bytes(w->wim_info) : 0,
1397                              &hdr.xml_res_entry);
1398         if (ret != 0)
1399                 goto out;
1400
1401         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
1402                 off_t old_lookup_table_end;
1403                 off_t new_lookup_table_end;
1404                 bool show_progress;
1405                 if (write_flags & WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE) {
1406                         old_lookup_table_end = w->hdr.lookup_table_res_entry.offset +
1407                                                w->hdr.lookup_table_res_entry.size;
1408                 } else {
1409                         old_lookup_table_end = 0;
1410                 }
1411                 new_lookup_table_end = hdr.lookup_table_res_entry.offset +
1412                                        hdr.lookup_table_res_entry.size;
1413                 show_progress = ((write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) != 0);
1414
1415                 ret = write_integrity_table(out,
1416                                             &hdr.integrity,
1417                                             new_lookup_table_end,
1418                                             old_lookup_table_end,
1419                                             show_progress);
1420                 if (ret != 0)
1421                         goto out;
1422         } else {
1423                 memset(&hdr.integrity, 0, sizeof(struct resource_entry));
1424         }
1425
1426         /*
1427          * In the WIM header, there is room for the resource entry for a
1428          * metadata resource labeled as the "boot metadata".  This entry should
1429          * be zeroed out if there is no bootable image (boot_idx 0).  Otherwise,
1430          * it should be a copy of the resource entry for the image that is
1431          * marked as bootable.  This is not well documented...
1432          */
1433         if (hdr.boot_idx == 0 || !w->image_metadata
1434                         || (image != WIM_ALL_IMAGES && image != hdr.boot_idx)) {
1435                 memset(&hdr.boot_metadata_res_entry, 0,
1436                        sizeof(struct resource_entry));
1437         } else {
1438                 memcpy(&hdr.boot_metadata_res_entry,
1439                        &w->image_metadata[
1440                           hdr.boot_idx - 1].metadata_lte->output_resource_entry,
1441                        sizeof(struct resource_entry));
1442         }
1443
1444         /* Set image count and boot index correctly for single image writes */
1445         if (image != WIM_ALL_IMAGES) {
1446                 hdr.image_count = 1;
1447                 if (hdr.boot_idx == image)
1448                         hdr.boot_idx = 1;
1449                 else
1450                         hdr.boot_idx = 0;
1451         }
1452
1453         if (fseeko(out, 0, SEEK_SET) != 0) {
1454                 ret = WIMLIB_ERR_WRITE;
1455                 ERROR_WITH_ERRNO("Failed to seek to beginning of WIM "
1456                                  "to overwrite header");
1457                 goto out;
1458         }
1459
1460         ret = write_header(&hdr, out);
1461         if (ret != 0)
1462                 goto out;
1463
1464         if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) {
1465                 if (fflush(out) != 0
1466                     || fsync(fileno(out)) != 0)
1467                 {
1468                         ERROR_WITH_ERRNO("Error flushing data to WIM file");
1469                         ret = WIMLIB_ERR_WRITE;
1470                 }
1471         }
1472 out:
1473         if (fclose(out) != 0) {
1474                 ERROR_WITH_ERRNO("Failed to close the WIM file");
1475                 if (ret == 0)
1476                         ret = WIMLIB_ERR_WRITE;
1477         }
1478         w->out_fp = NULL;
1479         return ret;
1480 }
1481
1482 static void close_wim_writable(WIMStruct *w)
1483 {
1484         if (w->out_fp) {
1485                 if (fclose(w->out_fp) != 0) {
1486                         WARNING("Failed to close output WIM: %s",
1487                                 strerror(errno));
1488                 }
1489                 w->out_fp = NULL;
1490         }
1491 }
1492
1493 /* Open file stream and write dummy header for WIM. */
1494 int begin_write(WIMStruct *w, const char *path, int write_flags)
1495 {
1496         int ret;
1497         bool need_readable = false;
1498         bool trunc = true;
1499         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
1500                 need_readable = true;
1501
1502         ret = open_wim_writable(w, path, trunc, need_readable);
1503         if (ret != 0)
1504                 return ret;
1505         /* Write dummy header. It will be overwritten later. */
1506         return write_header(&w->hdr, w->out_fp);
1507 }
1508
1509 /* Writes a stand-alone WIM to a file.  */
1510 WIMLIBAPI int wimlib_write(WIMStruct *w, const char *path,
1511                            int image, int write_flags, unsigned num_threads)
1512 {
1513         int ret;
1514
1515         if (!w || !path)
1516                 return WIMLIB_ERR_INVALID_PARAM;
1517
1518         write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
1519
1520         if (image != WIM_ALL_IMAGES &&
1521              (image < 1 || image > w->hdr.image_count))
1522                 return WIMLIB_ERR_INVALID_IMAGE;
1523
1524         if (w->hdr.total_parts != 1) {
1525                 ERROR("Cannot call wimlib_write() on part of a split WIM");
1526                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1527         }
1528
1529         if (image == WIM_ALL_IMAGES)
1530                 DEBUG("Writing all images to `%s'.", path);
1531         else
1532                 DEBUG("Writing image %d to `%s'.", image, path);
1533
1534         ret = begin_write(w, path, write_flags);
1535         if (ret != 0)
1536                 goto out;
1537
1538         ret = write_wim_streams(w, image, write_flags, num_threads);
1539         if (ret != 0)
1540                 goto out;
1541
1542         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1543                 printf("Writing image metadata...\n");
1544
1545         ret = for_image(w, image, write_metadata_resource);
1546         if (ret != 0)
1547                 goto out;
1548
1549         ret = finish_write(w, image, write_flags);
1550         if (ret == 0 && (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS))
1551                 printf("Successfully wrote `%s'\n", path);
1552 out:
1553         close_wim_writable(w);
1554         return ret;
1555 }
1556
1557 static int lte_overwrite_prepare(struct lookup_table_entry *lte,
1558                                  void *ignore)
1559 {
1560         memcpy(&lte->output_resource_entry, &lte->resource_entry,
1561                sizeof(struct resource_entry));
1562         lte->out_refcnt = 0;
1563         return 0;
1564 }
1565
1566 static int check_resource_offset(struct lookup_table_entry *lte, void *arg)
1567 {
1568         off_t end_offset = *(u64*)arg;
1569
1570         wimlib_assert(lte->out_refcnt <= lte->refcnt);
1571         if (lte->out_refcnt < lte->refcnt) {
1572                 if (lte->resource_entry.offset + lte->resource_entry.size > end_offset) {
1573                         ERROR("The following resource is after the XML data:");
1574                         print_lookup_table_entry(lte);
1575                         return WIMLIB_ERR_RESOURCE_ORDER;
1576                 }
1577         }
1578         return 0;
1579 }
1580
1581 static int find_new_streams(struct lookup_table_entry *lte, void *arg)
1582 {
1583         if (lte->out_refcnt == lte->refcnt)
1584                 list_add(&lte->staging_list, (struct list_head*)arg);
1585         else
1586                 lte->out_refcnt = lte->refcnt;
1587         return 0;
1588 }
1589
1590 static int overwrite_wim_inplace(WIMStruct *w, int write_flags,
1591                                  unsigned num_threads,
1592                                  int modified_image_idx)
1593 {
1594         int ret;
1595         struct list_head stream_list;
1596         off_t old_wim_end;
1597
1598         DEBUG("Overwriting `%s' in-place", w->filename);
1599
1600         /* Make sure that the integrity table (if present) is after the XML
1601          * data, and that there are no stream resources, metadata resources, or
1602          * lookup tables after the XML data.  Otherwise, these data would be
1603          * overwritten. */
1604         if (w->hdr.integrity.offset != 0 &&
1605             w->hdr.integrity.offset < w->hdr.xml_res_entry.offset) {
1606                 ERROR("Didn't expect the integrity table to be before the XML data");
1607                 return WIMLIB_ERR_RESOURCE_ORDER;
1608         }
1609
1610         if (w->hdr.lookup_table_res_entry.offset > w->hdr.xml_res_entry.offset) {
1611                 ERROR("Didn't expect the lookup table to be after the XML data");
1612                 return WIMLIB_ERR_RESOURCE_ORDER;
1613         }
1614
1615         DEBUG("Identifying newly added streams");
1616         for_lookup_table_entry(w->lookup_table, lte_overwrite_prepare, NULL);
1617         INIT_LIST_HEAD(&stream_list);
1618         for (int i = modified_image_idx; i < w->hdr.image_count; i++) {
1619                 DEBUG("Identifiying streams in image %d", i + 1);
1620                 wimlib_assert(w->image_metadata[i].modified);
1621                 wimlib_assert(!w->image_metadata[i].has_been_mounted_rw);
1622                 wimlib_assert(w->image_metadata[i].root_dentry != NULL);
1623                 wimlib_assert(w->image_metadata[i].metadata_lte != NULL);
1624                 w->private = &stream_list;
1625                 for_dentry_in_tree(w->image_metadata[i].root_dentry,
1626                                    dentry_find_streams_to_write, w);
1627         }
1628
1629         if (w->hdr.integrity.offset)
1630                 old_wim_end = w->hdr.integrity.offset + w->hdr.integrity.size;
1631         else
1632                 old_wim_end = w->hdr.xml_res_entry.offset + w->hdr.xml_res_entry.size;
1633
1634         ret = for_lookup_table_entry(w->lookup_table, check_resource_offset,
1635                                      &old_wim_end);
1636         if (ret != 0)
1637                 return ret;
1638
1639         DEBUG("old_wim_end = %"PRIu64, old_wim_end);
1640
1641         INIT_LIST_HEAD(&stream_list);
1642         for_lookup_table_entry(w->lookup_table, find_new_streams,
1643                                &stream_list);
1644
1645         {
1646                 u64 num_new_streams = 0;
1647                 struct list_head *cur;
1648                 list_for_each(cur, &stream_list)
1649                         num_new_streams++;
1650                 DEBUG("%"PRIu64" new streams to write", num_new_streams);
1651         }
1652
1653         {
1654                 bool trunc = false;
1655                 bool need_readable = false;
1656                 if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
1657                         need_readable = true;
1658                 ret = open_wim_writable(w, w->filename, trunc, need_readable);
1659                 if (ret != 0)
1660                         return ret;
1661         }
1662
1663         if (fseeko(w->out_fp, old_wim_end, SEEK_SET) != 0) {
1664                 ERROR_WITH_ERRNO("Can't seek to end of WIM");
1665                 return WIMLIB_ERR_WRITE;
1666         }
1667
1668         if (!list_empty(&stream_list)) {
1669                 DEBUG("Writing newly added streams (offset = %"PRIu64")",
1670                       old_wim_end);
1671                 ret = write_stream_list(&stream_list, w->out_fp,
1672                                         wimlib_get_compression_type(w),
1673                                         write_flags, num_threads);
1674                 if (ret != 0)
1675                         goto out_ftruncate;
1676         } else {
1677                 DEBUG("No new streams were added");
1678         }
1679
1680         for (int i = modified_image_idx; i < w->hdr.image_count; i++) {
1681                 select_wim_image(w, i + 1);
1682                 ret = write_metadata_resource(w);
1683                 if (ret != 0)
1684                         goto out_ftruncate;
1685         }
1686         write_flags |= WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE;
1687         ret = finish_write(w, WIM_ALL_IMAGES, write_flags);
1688 out_ftruncate:
1689         close_wim_writable(w);
1690         if (ret != 0) {
1691                 WARNING("Truncating `%s' to its original size (%"PRIu64" bytes)",
1692                         w->filename, old_wim_end);
1693                 truncate(w->filename, old_wim_end);
1694         }
1695         return ret;
1696 }
1697
1698 static int overwrite_wim_via_tmpfile(WIMStruct *w, int write_flags,
1699                                      unsigned num_threads)
1700 {
1701         size_t wim_name_len;
1702         int ret;
1703
1704         DEBUG("Overwrining `%s' via a temporary file", w->filename);
1705
1706         /* Write the WIM to a temporary file in the same directory as the
1707          * original WIM. */
1708         wim_name_len = strlen(w->filename);
1709         char tmpfile[wim_name_len + 10];
1710         memcpy(tmpfile, w->filename, wim_name_len);
1711         randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
1712         tmpfile[wim_name_len + 9] = '\0';
1713
1714         ret = wimlib_write(w, tmpfile, WIM_ALL_IMAGES,
1715                            write_flags | WIMLIB_WRITE_FLAG_FSYNC,
1716                            num_threads);
1717         if (ret != 0) {
1718                 ERROR("Failed to write the WIM file `%s'", tmpfile);
1719                 goto err;
1720         }
1721
1722         /* Close the original WIM file that was opened for reading. */
1723         if (w->fp != NULL) {
1724                 fclose(w->fp);
1725                 w->fp = NULL;
1726         }
1727
1728         DEBUG("Renaming `%s' to `%s'", tmpfile, w->filename);
1729
1730         /* Rename the new file to the old file .*/
1731         if (rename(tmpfile, w->filename) != 0) {
1732                 ERROR_WITH_ERRNO("Failed to rename `%s' to `%s'",
1733                                  tmpfile, w->filename);
1734                 ret = WIMLIB_ERR_RENAME;
1735                 goto err;
1736         }
1737
1738         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1739                 printf("Successfully renamed `%s' to `%s'\n", tmpfile, w->filename);
1740
1741         /* Re-open the WIM read-only. */
1742         w->fp = fopen(w->filename, "rb");
1743         if (w->fp == NULL) {
1744                 ret = WIMLIB_ERR_REOPEN;
1745                 WARNING("Failed to re-open `%s' read-only: %s",
1746                         w->filename, strerror(errno));
1747         }
1748         return ret;
1749 err:
1750         /* Remove temporary file. */
1751         if (unlink(tmpfile) != 0)
1752                 WARNING("Failed to remove `%s': %s", tmpfile, strerror(errno));
1753         return ret;
1754 }
1755
1756 /*
1757  * Writes a WIM file to the original file that it was read from, overwriting it.
1758  */
1759 WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
1760                                unsigned num_threads)
1761 {
1762         if (!w)
1763                 return WIMLIB_ERR_INVALID_PARAM;
1764
1765         write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
1766
1767         if (!w->filename)
1768                 return WIMLIB_ERR_NO_FILENAME;
1769
1770         if (w->hdr.total_parts != 1) {
1771                 ERROR("Cannot modify a split WIM");
1772                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1773         }
1774
1775         if (!w->deletion_occurred && !(write_flags & WIMLIB_WRITE_FLAG_REBUILD)) {
1776                 int i, modified_image_idx;
1777                 for (i = 0; i < w->hdr.image_count && !w->image_metadata[i].modified; i++)
1778                         ;
1779                 modified_image_idx = i;
1780                 for (; i < w->hdr.image_count && w->image_metadata[i].modified &&
1781                         !w->image_metadata[i].has_been_mounted_rw; i++)
1782                         ;
1783                 if (i == w->hdr.image_count) {
1784                         return overwrite_wim_inplace(w, write_flags, num_threads,
1785                                                      modified_image_idx);
1786                 }
1787         }
1788         return overwrite_wim_via_tmpfile(w, write_flags, num_threads);
1789 }
1790
1791 /* Deprecated */
1792 WIMLIBAPI int wimlib_overwrite_xml_and_header(WIMStruct *wim, int write_flags)
1793 {
1794         return wimlib_overwrite(wim, write_flags, 1);
1795 }