wimlib_write(): Add WIMLIB_WRITE_FLAG_FSYNC
[wimlib] / src / write.c
1 /*
2  * write.c
3  *
4  * Support for writing WIM files; write a WIM file, overwrite a WIM file, write
5  * compressed file resources, etc.
6  */
7
8 /*
9  * Copyright (C) 2010 Carl Thijssen
10  * Copyright (C) 2012 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "wimlib_internal.h"
29 #include "io.h"
30 #include "dentry.h"
31 #include "lookup_table.h"
32 #include "xml.h"
33 #include "lzx.h"
34 #include "xpress.h"
35 #include <unistd.h>
36
37 #ifdef ENABLE_MULTITHREADED_COMPRESSION
38 #include <semaphore.h>
39 #include <pthread.h>
40 #include <errno.h>
41 #endif
42
43 #ifdef WITH_NTFS_3G
44 #include <time.h>
45 #include <ntfs-3g/attrib.h>
46 #include <ntfs-3g/inode.h>
47 #include <ntfs-3g/dir.h>
48 #endif
49
50
51 #ifdef HAVE_ALLOCA_H
52 #include <alloca.h>
53 #else
54 #include <stdlib.h>
55 #endif
56
57 /* Reopens the FILE* for a WIM read-write. */
58 static int reopen_rw(WIMStruct *w)
59 {
60         FILE *fp;
61
62         if (fclose(w->fp) != 0)
63                 ERROR_WITH_ERRNO("Failed to close the file `%s'", w->filename);
64         w->fp = NULL;
65         fp = fopen(w->filename, "r+b");
66         if (!fp) {
67                 ERROR_WITH_ERRNO("Failed to open `%s' for reading and writing",
68                                  w->filename);
69                 return WIMLIB_ERR_OPEN;
70         }
71         w->fp = fp;
72         return 0;
73 }
74
75
76
77 /*
78  * Writes a WIM file to the original file that it was read from, overwriting it.
79  */
80 WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
81                                unsigned num_threads)
82 {
83         size_t wim_name_len;
84         int ret;
85
86         if (!w)
87                 return WIMLIB_ERR_INVALID_PARAM;
88
89         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
90         if (!w->filename)
91                 return WIMLIB_ERR_NO_FILENAME;
92
93         DEBUG("Replacing WIM file `%s'.", w->filename);
94
95         /* Write the WIM to a temporary file. */
96         /* XXX should the temporary file be somewhere else? */
97         wim_name_len = strlen(w->filename);
98         char tmpfile[wim_name_len + 10];
99         memcpy(tmpfile, w->filename, wim_name_len);
100         randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
101         tmpfile[wim_name_len + 9] = '\0';
102
103         ret = wimlib_write(w, tmpfile, WIM_ALL_IMAGES,
104                            write_flags | WIMLIB_WRITE_FLAG_FSYNC,
105                            num_threads);
106         if (ret != 0) {
107                 ERROR("Failed to write the WIM file `%s'", tmpfile);
108                 if (unlink(tmpfile) != 0)
109                         WARNING("Failed to remove `%s'", tmpfile);
110                 return ret;
111         }
112
113         DEBUG("Closing original WIM file.");
114         /* Close the original WIM file that was opened for reading. */
115         if (w->fp) {
116                 if (fclose(w->fp) != 0) {
117                         WARNING("Failed to close the file `%s'", w->filename);
118                 }
119                 w->fp = NULL;
120         }
121
122         DEBUG("Renaming `%s' to `%s'", tmpfile, w->filename);
123
124         /* Rename the new file to the old file .*/
125         if (rename(tmpfile, w->filename) != 0) {
126                 ERROR_WITH_ERRNO("Failed to rename `%s' to `%s'",
127                                  tmpfile, w->filename);
128                 ret = WIMLIB_ERR_RENAME;
129                 goto err;
130         }
131
132         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
133                 printf("Successfully renamed `%s' to `%s'\n", tmpfile, w->filename);
134
135         return 0;
136 err:
137         /* Remove temporary file. */
138         if (unlink(tmpfile) != 0)
139                 ERROR_WITH_ERRNO("Failed to remove `%s'", tmpfile);
140         return ret;
141 }
142
143 static int check_resource_offset(struct lookup_table_entry *lte, void *arg)
144 {
145         u64 xml_data_offset = *(u64*)arg;
146         if (lte->resource_entry.offset > xml_data_offset) {
147                 ERROR("The following resource is *after* the XML data:");
148                 print_lookup_table_entry(lte);
149                 return WIMLIB_ERR_RESOURCE_ORDER;
150         }
151         return 0;
152 }
153
154 WIMLIBAPI int wimlib_overwrite_xml_and_header(WIMStruct *w, int write_flags)
155 {
156         int ret;
157         FILE *fp;
158         u8 *integrity_table = NULL;
159         off_t xml_end;
160         off_t xml_size;
161         size_t bytes_written;
162
163         DEBUG("Overwriting XML and header of `%s', write_flags = %#x",
164               w->filename, write_flags);
165
166         if (!w->filename)
167                 return WIMLIB_ERR_NO_FILENAME;
168
169         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
170
171         /* Make sure that the integrity table (if present) is after the XML
172          * data, and that there are no stream resources, metadata resources, or
173          * lookup tables after the XML data.  Otherwise, these data would be
174          * destroyed by this function. */
175         if (w->hdr.integrity.offset != 0 &&
176             w->hdr.integrity.offset < w->hdr.xml_res_entry.offset) {
177                 ERROR("Didn't expect the integrity table to be before the XML data");
178                 return WIMLIB_ERR_RESOURCE_ORDER;
179         }
180
181         if (w->hdr.lookup_table_res_entry.offset >
182             w->hdr.xml_res_entry.offset) {
183                 ERROR("Didn't expect the lookup table to be after the XML data");
184                 return WIMLIB_ERR_RESOURCE_ORDER;
185         }
186
187         ret = for_lookup_table_entry(w->lookup_table, check_resource_offset,
188                                      &w->hdr.xml_res_entry.offset);
189         if (ret != 0)
190                 return ret;
191
192         ret = reopen_rw(w);
193         if (ret != 0)
194                 return ret;
195
196         fp = w->fp;
197
198         /* The old integrity table is still OK, as the SHA1 message digests in
199          * the integrity table include neither the header nor the XML data.
200          * Save it for later if it exists and an integrity table was required.
201          * */
202         if ((write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
203              && w->hdr.integrity.offset != 0)
204         {
205                 DEBUG("Reading existing integrity table.");
206                 integrity_table = MALLOC(w->hdr.integrity.size);
207                 if (!integrity_table)
208                         return WIMLIB_ERR_NOMEM;
209
210                 ret = read_uncompressed_resource(fp, w->hdr.integrity.offset,
211                                                  w->hdr.integrity.original_size,
212                                                  integrity_table);
213                 if (ret != 0)
214                         goto err;
215                 DEBUG("Done reading existing integrity table.");
216         }
217
218         DEBUG("Overwriting XML data.");
219         /* Overwrite the XML data. */
220         if (fseeko(fp, w->hdr.xml_res_entry.offset, SEEK_SET) != 0) {
221                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
222                                  "for XML data", w->hdr.xml_res_entry.offset);
223                 ret = WIMLIB_ERR_WRITE;
224                 goto err;
225         }
226         ret = write_xml_data(w->wim_info, WIM_ALL_IMAGES, fp, 0);
227         if (ret != 0)
228                 goto err;
229
230         DEBUG("Updating XML resource entry.");
231         /* Update the XML resource entry in the WIM header. */
232         xml_end = ftello(fp);
233         if (xml_end == -1) {
234                 ret = WIMLIB_ERR_WRITE;
235                 goto err;
236         }
237         xml_size = xml_end - w->hdr.xml_res_entry.offset;
238         w->hdr.xml_res_entry.size = xml_size;
239         w->hdr.xml_res_entry.original_size = xml_size;
240         /* XML data offset is unchanged. */
241
242         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
243                 DEBUG("Writing integrity table.");
244                 w->hdr.integrity.offset = xml_end;
245                 if (integrity_table) {
246                         /* The existing integrity table was saved. */
247                         bytes_written = fwrite(integrity_table, 1,
248                                                w->hdr.integrity.size, fp);
249                         if (bytes_written != w->hdr.integrity.size) {
250                                 ERROR_WITH_ERRNO("Failed to write integrity "
251                                                  "table");
252                                 ret = WIMLIB_ERR_WRITE;
253                                 goto err;
254                         }
255                         FREE(integrity_table);
256                 } else {
257                         /* There was no existing integrity table, so a new one
258                          * must be calculated. */
259                         ret = write_integrity_table(fp, WIM_HEADER_DISK_SIZE,
260                                         w->hdr.lookup_table_res_entry.offset +
261                                         w->hdr.lookup_table_res_entry.size,
262                                         write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS);
263                         if (ret != 0)
264                                 return ret;
265
266                         off_t end_integrity = ftello(fp);
267                         if (end_integrity == -1)
268                                 return WIMLIB_ERR_WRITE;
269
270                         off_t integrity_size           = end_integrity - xml_end;
271                         w->hdr.integrity.size          = integrity_size;
272                         w->hdr.integrity.original_size = integrity_size;
273                         w->hdr.integrity.flags         = 0;
274                 }
275         } else {
276                 DEBUG("Truncating file to end of XML data.");
277                 /* No integrity table to write.  The file should be truncated
278                  * because it's possible that the old file was longer (due to it
279                  * including an integrity table, or due to its XML data being
280                  * longer) */
281                 if (fflush(fp) != 0) {
282                         ERROR_WITH_ERRNO("Failed to flush stream for file `%s'",
283                                          w->filename);
284                         return WIMLIB_ERR_WRITE;
285                 }
286                 if (ftruncate(fileno(fp), xml_end) != 0) {
287                         ERROR_WITH_ERRNO("Failed to truncate `%s' to %"PRIu64" "
288                                          "bytes", w->filename, xml_end);
289                         return WIMLIB_ERR_WRITE;
290                 }
291                 memset(&w->hdr.integrity, 0, sizeof(struct resource_entry));
292         }
293
294         DEBUG("Overwriting header.");
295         /* Overwrite the header. */
296         if (fseeko(fp, 0, SEEK_SET) != 0) {
297                 ERROR_WITH_ERRNO("Failed to seek to beginning of `%s'",
298                                  w->filename);
299                 return WIMLIB_ERR_WRITE;
300         }
301
302         ret = write_header(&w->hdr, fp);
303         if (ret != 0)
304                 return ret;
305
306         DEBUG("Closing `%s'.", w->filename);
307         if (fclose(fp) != 0) {
308                 ERROR_WITH_ERRNO("Failed to close `%s'", w->filename);
309                 return WIMLIB_ERR_WRITE;
310         }
311         w->fp = NULL;
312         DEBUG("Done.");
313         return 0;
314 err:
315         FREE(integrity_table);
316         return ret;
317 }
318
319
320 /* Chunk table that's located at the beginning of each compressed resource in
321  * the WIM.  (This is not the on-disk format; the on-disk format just has an
322  * array of offsets.) */
323 struct chunk_table {
324         off_t file_offset;
325         u64 num_chunks;
326         u64 original_resource_size;
327         u64 bytes_per_chunk_entry;
328         u64 table_disk_size;
329         u64 cur_offset;
330         u64 *cur_offset_p;
331         u64 offsets[0];
332 };
333
334 /*
335  * Allocates and initializes a chunk table, and reserves space for it in the
336  * output file.
337  */
338 static int
339 begin_wim_resource_chunk_tab(const struct lookup_table_entry *lte,
340                              FILE *out_fp,
341                              off_t file_offset,
342                              struct chunk_table **chunk_tab_ret)
343 {
344         u64 size = wim_resource_size(lte);
345         u64 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
346         size_t alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
347         struct chunk_table *chunk_tab = CALLOC(1, alloc_size);
348         int ret;
349
350         if (!chunk_tab) {
351                 ERROR("Failed to allocate chunk table for %"PRIu64" byte "
352                       "resource", size);
353                 ret = WIMLIB_ERR_NOMEM;
354                 goto out;
355         }
356         chunk_tab->file_offset = file_offset;
357         chunk_tab->num_chunks = num_chunks;
358         chunk_tab->original_resource_size = size;
359         chunk_tab->bytes_per_chunk_entry = (size >= (1ULL << 32)) ? 8 : 4;
360         chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
361                                      (num_chunks - 1);
362         chunk_tab->cur_offset = 0;
363         chunk_tab->cur_offset_p = chunk_tab->offsets;
364
365         if (fwrite(chunk_tab, 1, chunk_tab->table_disk_size, out_fp) !=
366                    chunk_tab->table_disk_size) {
367                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
368                                  "file resource");
369                 ret = WIMLIB_ERR_WRITE;
370                 goto out;
371         }
372
373         ret = 0;
374 out:
375         *chunk_tab_ret = chunk_tab;
376         return ret;
377 }
378
379 typedef int (*compress_func_t)(const void *, unsigned, void *, unsigned *);
380
381 compress_func_t get_compress_func(int out_ctype)
382 {
383         if (out_ctype == WIM_COMPRESSION_TYPE_LZX)
384                 return lzx_compress;
385         else
386                 return xpress_compress;
387 }
388
389
390 /*
391  * Compresses a chunk of a WIM resource.
392  *
393  * @chunk:              Uncompressed data of the chunk.
394  * @chunk_size:         Size of the uncompressed chunk in bytes.
395  * @compressed_chunk:   Pointer to output buffer of size at least
396  *                              (@chunk_size - 1) bytes.
397  * @compressed_chunk_len_ret:   Pointer to an unsigned int into which the size
398  *                                      of the compressed chunk will be
399  *                                      returned.
400  * @ctype:      Type of compression to use.  Must be WIM_COMPRESSION_TYPE_LZX
401  *              or WIM_COMPRESSION_TYPE_XPRESS.
402  *
403  * Returns zero if compressed succeeded, and nonzero if the chunk could not be
404  * compressed to any smaller than @chunk_size.  This function cannot fail for
405  * any other reasons.
406  */
407 static int compress_chunk(const u8 chunk[], unsigned chunk_size,
408                           u8 compressed_chunk[],
409                           unsigned *compressed_chunk_len_ret,
410                           int ctype)
411 {
412         compress_func_t compress = get_compress_func(ctype);
413         return (*compress)(chunk, chunk_size, compressed_chunk,
414                            compressed_chunk_len_ret);
415 }
416
417 /*
418  * Writes a chunk of a WIM resource to an output file.
419  *
420  * @chunk:        Uncompressed data of the chunk.
421  * @chunk_size:   Size of the chunk (<= WIM_CHUNK_SIZE)
422  * @out_fp:       FILE * to write tho chunk to.
423  * @out_ctype:    Compression type to use when writing the chunk (ignored if no
424  *                      chunk table provided)
425  * @chunk_tab:    Pointer to chunk table being created.  It is updated with the
426  *                      offset of the chunk we write.
427  *
428  * Returns 0 on success; nonzero on failure.
429  */
430 static int write_wim_resource_chunk(const u8 chunk[], unsigned chunk_size,
431                                     FILE *out_fp, int out_ctype,
432                                     struct chunk_table *chunk_tab)
433 {
434         const u8 *out_chunk;
435         unsigned out_chunk_size;
436
437         wimlib_assert(chunk_size <= WIM_CHUNK_SIZE);
438
439         if (!chunk_tab) {
440                 out_chunk = chunk;
441                 out_chunk_size = chunk_size;
442         } else {
443                 u8 *compressed_chunk = alloca(chunk_size);
444                 int ret;
445
446                 ret = compress_chunk(chunk, chunk_size, compressed_chunk,
447                                      &out_chunk_size, out_ctype);
448                 if (ret == 0) {
449                         out_chunk = compressed_chunk;
450                 } else {
451                         out_chunk = chunk;
452                         out_chunk_size = chunk_size;
453                 }
454                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
455                 chunk_tab->cur_offset += out_chunk_size;
456         }
457
458         if (fwrite(out_chunk, 1, out_chunk_size, out_fp) != out_chunk_size) {
459                 ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
460                 return WIMLIB_ERR_WRITE;
461         }
462         return 0;
463 }
464
465 /*
466  * Finishes a WIM chunk tale and writes it to the output file at the correct
467  * offset.
468  *
469  * The final size of the full compressed resource is returned in the
470  * @compressed_size_p.
471  */
472 static int
473 finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
474                               FILE *out_fp, u64 *compressed_size_p)
475 {
476         size_t bytes_written;
477         if (fseeko(out_fp, chunk_tab->file_offset, SEEK_SET) != 0) {
478                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of output "
479                                  "WIM file", chunk_tab->file_offset);
480                 return WIMLIB_ERR_WRITE;
481         }
482
483         if (chunk_tab->bytes_per_chunk_entry == 8) {
484                 array_cpu_to_le64(chunk_tab->offsets, chunk_tab->num_chunks);
485         } else {
486                 for (u64 i = 0; i < chunk_tab->num_chunks; i++)
487                         ((u32*)chunk_tab->offsets)[i] =
488                                 cpu_to_le32(chunk_tab->offsets[i]);
489         }
490         bytes_written = fwrite((u8*)chunk_tab->offsets +
491                                         chunk_tab->bytes_per_chunk_entry,
492                                1, chunk_tab->table_disk_size, out_fp);
493         if (bytes_written != chunk_tab->table_disk_size) {
494                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
495                                  "file resource");
496                 return WIMLIB_ERR_WRITE;
497         }
498         if (fseeko(out_fp, 0, SEEK_END) != 0) {
499                 ERROR_WITH_ERRNO("Failed to seek to end of output WIM file");
500                 return WIMLIB_ERR_WRITE;
501         }
502         *compressed_size_p = chunk_tab->cur_offset + chunk_tab->table_disk_size;
503         return 0;
504 }
505
506 static int prepare_resource_for_read(struct lookup_table_entry *lte
507
508                                         #ifdef WITH_NTFS_3G
509                                         , ntfs_inode **ni_ret
510                                         #endif
511                 )
512 {
513         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
514              && !lte->file_on_disk_fp)
515         {
516                 wimlib_assert(lte->file_on_disk);
517                 lte->file_on_disk_fp = fopen(lte->file_on_disk, "rb");
518                 if (!lte->file_on_disk_fp) {
519                         ERROR_WITH_ERRNO("Failed to open the file `%s' for "
520                                          "reading", lte->file_on_disk);
521                         return WIMLIB_ERR_OPEN;
522                 }
523         }
524 #ifdef WITH_NTFS_3G
525         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME
526                   && !lte->attr)
527         {
528                 struct ntfs_location *loc = lte->ntfs_loc;
529                 ntfs_inode *ni;
530                 wimlib_assert(loc);
531                 ni = ntfs_pathname_to_inode(*loc->ntfs_vol_p, NULL, loc->path_utf8);
532                 if (!ni) {
533                         ERROR_WITH_ERRNO("Failed to open inode `%s' in NTFS "
534                                          "volume", loc->path_utf8);
535                         return WIMLIB_ERR_NTFS_3G;
536                 }
537                 lte->attr = ntfs_attr_open(ni,
538                                            loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
539                                            (ntfschar*)loc->stream_name_utf16,
540                                            loc->stream_name_utf16_num_chars);
541                 if (!lte->attr) {
542                         ERROR_WITH_ERRNO("Failed to open attribute of `%s' in "
543                                          "NTFS volume", loc->path_utf8);
544                         ntfs_inode_close(ni);
545                         return WIMLIB_ERR_NTFS_3G;
546                 }
547                 *ni_ret = ni;
548         }
549 #endif
550         return 0;
551 }
552
553 static void end_wim_resource_read(struct lookup_table_entry *lte
554                                 #ifdef WITH_NTFS_3G
555                                         , ntfs_inode *ni
556                                 #endif
557                                         )
558 {
559         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
560             && lte->file_on_disk_fp) {
561                 fclose(lte->file_on_disk_fp);
562                 lte->file_on_disk_fp = NULL;
563         }
564 #ifdef WITH_NTFS_3G
565         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME) {
566                 if (lte->attr) {
567                         ntfs_attr_close(lte->attr);
568                         lte->attr = NULL;
569                 }
570                 if (ni)
571                         ntfs_inode_close(ni);
572         }
573 #endif
574 }
575
576 /*
577  * Writes a WIM resource to a FILE * opened for writing.  The resource may be
578  * written uncompressed or compressed depending on the @out_ctype parameter.
579  *
580  * If by chance the resource compresses to more than the original size (this may
581  * happen with random data or files than are pre-compressed), the resource is
582  * instead written uncompressed (and this is reflected in the @out_res_entry by
583  * removing the WIM_RESHDR_FLAG_COMPRESSED flag).
584  *
585  * @lte:        The lookup table entry for the WIM resource.
586  * @out_fp:     The FILE * to write the resource to.
587  * @out_ctype:  The compression type of the resource to write.  Note: if this is
588  *                      the same as the compression type of the WIM resource we
589  *                      need to read, we simply copy the data (i.e. we do not
590  *                      uncompress it, then compress it again).
591  * @out_res_entry:  If non-NULL, a resource entry that is filled in with the
592  *                  offset, original size, compressed size, and compression flag
593  *                  of the output resource.
594  *
595  * Returns 0 on success; nonzero on failure.
596  */
597 int write_wim_resource(struct lookup_table_entry *lte,
598                        FILE *out_fp, int out_ctype,
599                        struct resource_entry *out_res_entry,
600                        int flags)
601 {
602         u64 bytes_remaining;
603         u64 original_size;
604         u64 old_compressed_size;
605         u64 new_compressed_size;
606         u64 offset;
607         int ret;
608         struct chunk_table *chunk_tab = NULL;
609         bool raw;
610         off_t file_offset;
611 #ifdef WITH_NTFS_3G
612         ntfs_inode *ni = NULL;
613 #endif
614
615         wimlib_assert(lte);
616
617         /* Original size of the resource */
618         original_size = wim_resource_size(lte);
619
620         /* Compressed size of the resource (as it exists now) */
621         old_compressed_size = wim_resource_compressed_size(lte);
622
623         /* Current offset in output file */
624         file_offset = ftello(out_fp);
625         if (file_offset == -1) {
626                 ERROR_WITH_ERRNO("Failed to get offset in output "
627                                  "stream");
628                 return WIMLIB_ERR_WRITE;
629         }
630
631         /* Are the compression types the same?  If so, do a raw copy (copy
632          * without decompressing and recompressing the data). */
633         raw = (wim_resource_compression_type(lte) == out_ctype
634                && out_ctype != WIM_COMPRESSION_TYPE_NONE);
635
636         if (raw) {
637                 flags |= WIMLIB_RESOURCE_FLAG_RAW;
638                 bytes_remaining = old_compressed_size;
639         } else {
640                 flags &= ~WIMLIB_RESOURCE_FLAG_RAW;
641                 bytes_remaining = original_size;
642         }
643
644         /* Empty resource; nothing needs to be done, so just return success. */
645         if (bytes_remaining == 0)
646                 return 0;
647
648         /* Buffer for reading chunks for the resource */
649         u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
650
651         /* If we are writing a compressed resource and not doing a raw copy, we
652          * need to initialize the chunk table */
653         if (out_ctype != WIM_COMPRESSION_TYPE_NONE && !raw) {
654                 ret = begin_wim_resource_chunk_tab(lte, out_fp, file_offset,
655                                                    &chunk_tab);
656                 if (ret != 0)
657                         goto out;
658         }
659
660         /* If the WIM resource is in an external file, open a FILE * to it so we
661          * don't have to open a temporary one in read_wim_resource() for each
662          * chunk. */
663 #ifdef WITH_NTFS_3G
664         ret = prepare_resource_for_read(lte, &ni);
665 #else
666         ret = prepare_resource_for_read(lte);
667 #endif
668         if (ret != 0)
669                 goto out;
670
671         /* If we aren't doing a raw copy, we will compute the SHA1 message
672          * digest of the resource as we read it, and verify it's the same as the
673          * hash given in the lookup table entry once we've finished reading the
674          * resource. */
675         SHA_CTX ctx;
676         if (!raw)
677                 sha1_init(&ctx);
678
679         /* While there are still bytes remaining in the WIM resource, read a
680          * chunk of the resource, update SHA1, then write that chunk using the
681          * desired compression type. */
682         offset = 0;
683         do {
684                 u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
685                 ret = read_wim_resource(lte, buf, to_read, offset, flags);
686                 if (ret != 0)
687                         goto out_fclose;
688                 if (!raw)
689                         sha1_update(&ctx, buf, to_read);
690                 ret = write_wim_resource_chunk(buf, to_read, out_fp,
691                                                out_ctype, chunk_tab);
692                 if (ret != 0)
693                         goto out_fclose;
694                 bytes_remaining -= to_read;
695                 offset += to_read;
696         } while (bytes_remaining);
697
698         /* Raw copy:  The new compressed size is the same as the old compressed
699          * size
700          *
701          * Using WIM_COMPRESSION_TYPE_NONE:  The new compressed size is the
702          * original size
703          *
704          * Using a different compression type:  Call
705          * finish_wim_resource_chunk_tab() and it will provide the new
706          * compressed size.
707          */
708         if (raw) {
709                 new_compressed_size = old_compressed_size;
710         } else {
711                 if (out_ctype == WIM_COMPRESSION_TYPE_NONE)
712                         new_compressed_size = original_size;
713                 else {
714                         ret = finish_wim_resource_chunk_tab(chunk_tab, out_fp,
715                                                             &new_compressed_size);
716                         if (ret != 0)
717                                 goto out_fclose;
718                 }
719         }
720
721         /* Verify SHA1 message digest of the resource, unless we are doing a raw
722          * write (in which case we never even saw the uncompressed data).  Or,
723          * if the hash we had before is all 0's, just re-set it to be the new
724          * hash. */
725         if (!raw) {
726                 u8 md[SHA1_HASH_SIZE];
727                 sha1_final(md, &ctx);
728                 if (is_zero_hash(lte->hash)) {
729                         copy_hash(lte->hash, md);
730                 } else if (!hashes_equal(md, lte->hash)) {
731                         ERROR("WIM resource has incorrect hash!");
732                         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
733                                 ERROR("We were reading it from `%s'; maybe it changed "
734                                       "while we were reading it.",
735                                       lte->file_on_disk);
736                         }
737                         ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
738                         goto out_fclose;
739                 }
740         }
741
742         if (!raw && new_compressed_size >= original_size &&
743             out_ctype != WIM_COMPRESSION_TYPE_NONE)
744         {
745                 /* Oops!  We compressed the resource to larger than the original
746                  * size.  Write the resource uncompressed instead. */
747                 if (fseeko(out_fp, file_offset, SEEK_SET) != 0) {
748                         ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
749                                          "of output WIM file", file_offset);
750                         ret = WIMLIB_ERR_WRITE;
751                         goto out_fclose;
752                 }
753                 ret = write_wim_resource(lte, out_fp, WIM_COMPRESSION_TYPE_NONE,
754                                          out_res_entry, flags);
755                 if (ret != 0)
756                         goto out_fclose;
757                 if (fflush(out_fp) != 0) {
758                         ERROR_WITH_ERRNO("Failed to flush output WIM file");
759                         ret = WIMLIB_ERR_WRITE;
760                         goto out_fclose;
761                 }
762                 if (ftruncate(fileno(out_fp), file_offset + out_res_entry->size) != 0) {
763                         ERROR_WITH_ERRNO("Failed to truncate output WIM file");
764                         ret = WIMLIB_ERR_WRITE;
765                         goto out_fclose;
766                 }
767         } else {
768                 if (out_res_entry) {
769                         out_res_entry->size          = new_compressed_size;
770                         out_res_entry->original_size = original_size;
771                         out_res_entry->offset        = file_offset;
772                         out_res_entry->flags         = lte->resource_entry.flags
773                                                         & ~WIM_RESHDR_FLAG_COMPRESSED;
774                         if (out_ctype != WIM_COMPRESSION_TYPE_NONE)
775                                 out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
776                 }
777         }
778         ret = 0;
779 out_fclose:
780 #ifdef WITH_NTFS_3G
781         end_wim_resource_read(lte, ni);
782 #else
783         end_wim_resource_read(lte);
784 #endif
785 out:
786         FREE(chunk_tab);
787         return ret;
788 }
789
790
791 #ifdef ENABLE_MULTITHREADED_COMPRESSION
792 struct shared_queue {
793         sem_t filled_slots;
794         sem_t empty_slots;
795         pthread_mutex_t lock;
796         unsigned front;
797         unsigned back;
798         void **array;
799         unsigned size;
800 };
801
802 static int shared_queue_init(struct shared_queue *q, unsigned size)
803 {
804         q->array = CALLOC(sizeof(q->array[0]), size);
805         if (!q->array)
806                 return WIMLIB_ERR_NOMEM;
807
808         sem_init(&q->filled_slots, 0, 0);
809         sem_init(&q->empty_slots, 0, size);
810         pthread_mutex_init(&q->lock, NULL);
811         q->front = 0;
812         q->back = size - 1;
813         q->size = size;
814         return 0;
815 }
816
817 static void shared_queue_destroy(struct shared_queue *q)
818 {
819         sem_destroy(&q->filled_slots);
820         sem_destroy(&q->empty_slots);
821         pthread_mutex_destroy(&q->lock);
822         FREE(q->array);
823 }
824
825 static void shared_queue_put(struct shared_queue *q, void *obj)
826 {
827         sem_wait(&q->empty_slots);
828         pthread_mutex_lock(&q->lock);
829
830         q->back = (q->back + 1) % q->size;
831         q->array[q->back] = obj;
832
833         sem_post(&q->filled_slots);
834         pthread_mutex_unlock(&q->lock);
835 }
836
837 static void *shared_queue_get(struct shared_queue *q)
838 {
839         sem_wait(&q->filled_slots);
840         pthread_mutex_lock(&q->lock);
841
842         void *obj = q->array[q->front];
843         q->array[q->front] = NULL;
844         q->front = (q->front + 1) % q->size;
845
846         sem_post(&q->empty_slots);
847         pthread_mutex_unlock(&q->lock);
848         return obj;
849 }
850
851 static inline int shared_queue_get_filled(struct shared_queue *q)
852 {
853         int sval;
854         sem_getvalue(&q->filled_slots, &sval);
855         return sval;
856 }
857
858 struct compressor_thread_params {
859         struct shared_queue *res_to_compress_queue;
860         struct shared_queue *compressed_res_queue;
861         compress_func_t compress;
862 };
863
864 #define MAX_CHUNKS_PER_MSG 2
865
866 struct message {
867         struct lookup_table_entry *lte;
868         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
869         u8 *out_compressed_chunks[MAX_CHUNKS_PER_MSG];
870         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
871         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
872         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
873         unsigned num_chunks;
874         struct list_head list;
875         bool complete;
876         u64 begin_chunk;
877 };
878
879 static void compress_chunks(struct message *msg, compress_func_t compress)
880 {
881         for (unsigned i = 0; i < msg->num_chunks; i++) {
882                 DEBUG2("compress chunk %u of %u", i, msg->num_chunks);
883                 int ret = compress(msg->uncompressed_chunks[i],
884                                    msg->uncompressed_chunk_sizes[i],
885                                    msg->compressed_chunks[i],
886                                    &msg->compressed_chunk_sizes[i]);
887                 if (ret == 0) {
888                         msg->out_compressed_chunks[i] = msg->compressed_chunks[i];
889                 } else {
890                         msg->out_compressed_chunks[i] = msg->uncompressed_chunks[i];
891                         msg->compressed_chunk_sizes[i] = msg->uncompressed_chunk_sizes[i];
892                 }
893         }
894 }
895
896 static void *compressor_thread_proc(void *arg)
897 {
898         struct compressor_thread_params *params = arg;
899         struct shared_queue *res_to_compress_queue = params->res_to_compress_queue;
900         struct shared_queue *compressed_res_queue = params->compressed_res_queue;
901         compress_func_t compress = params->compress;
902         struct message *msg;
903
904         DEBUG("Compressor thread ready");
905         while ((msg = shared_queue_get(res_to_compress_queue)) != NULL) {
906                 compress_chunks(msg, compress);
907                 shared_queue_put(compressed_res_queue, msg);
908         }
909         DEBUG("Compressor thread terminating");
910 }
911 #endif
912
913 static void show_stream_write_progress(u64 *cur_size, u64 *next_size,
914                                        u64 total_size, u64 one_percent,
915                                        unsigned *cur_percent,
916                                        const struct lookup_table_entry *cur_lte)
917 {
918         if (*cur_size >= *next_size) {
919                 printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
920                        "(uncompressed) written (%u%% done)",
921                        *cur_size >> 20,
922                        total_size >> 20, *cur_percent);
923                 fflush(stdout);
924                 *next_size += one_percent;
925                 (*cur_percent)++;
926         }
927         *cur_size += wim_resource_size(cur_lte);
928 }
929
930 static void finish_stream_write_progress(u64 total_size)
931 {
932         printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
933                "(uncompressed) written (100%% done)\n",
934                total_size >> 20, total_size >> 20);
935         fflush(stdout);
936 }
937
938 static int write_stream_list_serial(struct list_head *stream_list,
939                                     FILE *out_fp, int out_ctype,
940                                     int write_flags, u64 total_size)
941 {
942         struct lookup_table_entry *lte;
943         int ret;
944
945         u64 one_percent = total_size / 100;
946         u64 cur_size = 0;
947         u64 next_size = 0;
948         unsigned cur_percent = 0;
949
950         list_for_each_entry(lte, stream_list, staging_list) {
951                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
952                         show_stream_write_progress(&cur_size, &next_size,
953                                                    total_size, one_percent,
954                                                    &cur_percent, lte);
955                 }
956                 ret = write_wim_resource(lte, out_fp, out_ctype,
957                                          &lte->output_resource_entry, 0);
958                 if (ret != 0)
959                         return ret;
960         }
961         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
962                 finish_stream_write_progress(total_size);
963         return 0;
964 }
965
966 #ifdef ENABLE_MULTITHREADED_COMPRESSION
967 static int write_wim_chunks(struct message *msg, FILE *out_fp,
968                             struct chunk_table *chunk_tab)
969 {
970         for (unsigned i = 0; i < msg->num_chunks; i++) {
971                 unsigned chunk_csize = msg->compressed_chunk_sizes[i];
972
973                 DEBUG2("Write wim chunk %u of %u (csize = %u)",
974                       i, msg->num_chunks, chunk_csize);
975
976                 if (fwrite(msg->out_compressed_chunks[i], 1, chunk_csize, out_fp)
977                     != chunk_csize)
978                 {
979                         ERROR_WITH_ERRNO("Failed to write WIM");
980                         return WIMLIB_ERR_WRITE;
981                 }
982
983                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
984                 chunk_tab->cur_offset += chunk_csize;
985         }
986         return 0;
987 }
988
989 /*
990  * This function is executed by the main thread when the resources are being
991  * compressed in parallel.  The main thread is in change of all reading of the
992  * uncompressed data and writing of the compressed data.  The compressor threads
993  * *only* do compression from/to in-memory buffers.
994  *
995  * Each unit of work given to a compressor thread is up to MAX_CHUNKS_PER_MSG
996  * chunks of compressed data to compress, represented in a `struct message'.
997  * Each message is passed from the main thread to a worker thread through the
998  * res_to_compress_queue, and it is passed back through the
999  * compressed_res_queue.
1000  */
1001 static int main_writer_thread_proc(struct list_head *stream_list,
1002                                    FILE *out_fp,
1003                                    int out_ctype,
1004                                    struct shared_queue *res_to_compress_queue,
1005                                    struct shared_queue *compressed_res_queue,
1006                                    size_t queue_size,
1007                                    int write_flags,
1008                                    u64 total_size)
1009 {
1010         int ret;
1011
1012
1013         struct message msgs[queue_size];
1014         ZERO_ARRAY(msgs);
1015
1016         // Initially, all the messages are available to use.
1017         LIST_HEAD(available_msgs);
1018         for (size_t i = 0; i < ARRAY_LEN(msgs); i++)
1019                 list_add(&msgs[i].list, &available_msgs);
1020
1021         // outstanding_resources is the list of resources that currently have
1022         // had chunks sent off for compression.
1023         //
1024         // The first stream in outstanding_resources is the stream that is
1025         // currently being written (cur_lte).
1026         //
1027         // The last stream in outstanding_resources is the stream that is
1028         // currently being read and chunks fed to the compressor threads
1029         // (next_lte).
1030         //
1031         // Depending on the number of threads and the sizes of the resource,
1032         // the outstanding streams list may contain streams between cur_lte and
1033         // next_lte that have all their chunks compressed or being compressed,
1034         // but haven't been written yet.
1035         //
1036         LIST_HEAD(outstanding_resources);
1037         struct list_head *next_resource = stream_list->next;
1038         struct lookup_table_entry *next_lte = container_of(next_resource,
1039                                                            struct lookup_table_entry,
1040                                                            staging_list);
1041         next_resource = next_resource->next;
1042         u64 next_chunk = 0;
1043         u64 next_num_chunks = wim_resource_chunks(next_lte);
1044         INIT_LIST_HEAD(&next_lte->msg_list);
1045         list_add_tail(&next_lte->staging_list, &outstanding_resources);
1046
1047         // As in write_wim_resource(), each resource we read is checksummed.
1048         SHA_CTX next_sha_ctx;
1049         sha1_init(&next_sha_ctx);
1050         u8 next_hash[SHA1_HASH_SIZE];
1051
1052         // Resources that don't need any chunks compressed are added to this
1053         // list and written directly by the main thread.
1054         LIST_HEAD(my_resources);
1055
1056         struct lookup_table_entry *cur_lte = next_lte;
1057         struct chunk_table *cur_chunk_tab = NULL;
1058         struct lookup_table_entry *lte;
1059         struct message *msg;
1060
1061         u64 one_percent = total_size / 100;
1062         u64 cur_size = 0;
1063         u64 next_size = 0;
1064         unsigned cur_percent = 0;
1065
1066 #ifdef WITH_NTFS_3G
1067         ntfs_inode *ni = NULL;
1068 #endif
1069
1070 #ifdef WITH_NTFS_3G
1071         ret = prepare_resource_for_read(next_lte, &ni);
1072 #else
1073         ret = prepare_resource_for_read(next_lte);
1074 #endif
1075         if (ret != 0)
1076                 goto out;
1077
1078         DEBUG("Initializing buffers for uncompressed "
1079               "and compressed data (%zu bytes needed)",
1080               queue_size * MAX_CHUNKS_PER_MSG * WIM_CHUNK_SIZE * 2);
1081
1082         // Pre-allocate all the buffers that will be needed to do the chunk
1083         // compression.
1084         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1085                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1086                         msgs[i].compressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
1087                         msgs[i].uncompressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
1088                         if (msgs[i].compressed_chunks[j] == NULL ||
1089                             msgs[i].uncompressed_chunks[j] == NULL)
1090                         {
1091                                 ERROR("Could not allocate enough memory for "
1092                                       "multi-threaded compression");
1093                                 ret = WIMLIB_ERR_NOMEM;
1094                                 goto out;
1095                         }
1096                 }
1097         }
1098
1099         // This loop is executed until all resources have been written, except
1100         // possibly a few that have been added to the @my_resources list for
1101         // writing later.
1102         while (1) {
1103                 // Send chunks to the compressor threads until either (a) there
1104                 // are no more messages available since they were all sent off,
1105                 // or (b) there are no more resources that need to be
1106                 // compressed.
1107                 while (!list_empty(&available_msgs) && next_lte != NULL) {
1108
1109                         // Get a message from the available messages
1110                         // list
1111                         msg = container_of(available_msgs.next,
1112                                            struct message,
1113                                            list);
1114
1115                         // ... and delete it from the available messages
1116                         // list
1117                         list_del(&msg->list);
1118
1119                         // Initialize the message with the chunks to
1120                         // compress.
1121                         msg->num_chunks = min(next_num_chunks - next_chunk,
1122                                               MAX_CHUNKS_PER_MSG);
1123                         msg->lte = next_lte;
1124                         msg->complete = false;
1125                         msg->begin_chunk = next_chunk;
1126
1127                         unsigned size = WIM_CHUNK_SIZE;
1128                         for (unsigned i = 0; i < msg->num_chunks; i++) {
1129
1130                                 // Read chunk @next_chunk of the stream into the
1131                                 // message so that a compressor thread can
1132                                 // compress it.
1133
1134                                 if (next_chunk == next_num_chunks - 1 &&
1135                                      wim_resource_size(next_lte) % WIM_CHUNK_SIZE != 0)
1136                                 {
1137                                         size = wim_resource_size(next_lte) % WIM_CHUNK_SIZE;
1138                                 }
1139
1140
1141                                 DEBUG2("Read resource (size=%u, offset=%zu)",
1142                                       size, next_chunk * WIM_CHUNK_SIZE);
1143
1144                                 msg->uncompressed_chunk_sizes[i] = size;
1145
1146                                 ret = read_wim_resource(next_lte,
1147                                                         msg->uncompressed_chunks[i],
1148                                                         size,
1149                                                         next_chunk * WIM_CHUNK_SIZE,
1150                                                         0);
1151                                 if (ret != 0)
1152                                         goto out;
1153                                 sha1_update(&next_sha_ctx,
1154                                             msg->uncompressed_chunks[i], size);
1155                                 next_chunk++;
1156                         }
1157
1158                         // Send the compression request
1159                         list_add_tail(&msg->list, &next_lte->msg_list);
1160                         shared_queue_put(res_to_compress_queue, msg);
1161                         DEBUG2("Compression request sent");
1162
1163                         if (next_chunk != next_num_chunks)
1164                                 // More chunks to send for this resource
1165                                 continue;
1166
1167                         // Done sending compression requests for a resource!
1168                         // Check the SHA1 message digest.
1169                         DEBUG2("Finalize SHA1 md (next_num_chunks=%zu)", next_num_chunks);
1170                         sha1_final(next_hash, &next_sha_ctx);
1171                         if (!hashes_equal(next_lte->hash, next_hash)) {
1172                                 ERROR("WIM resource has incorrect hash!");
1173                                 if (next_lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
1174                                         ERROR("We were reading it from `%s'; maybe it changed "
1175                                               "while we were reading it.",
1176                                               next_lte->file_on_disk);
1177                                 }
1178                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
1179                                 goto out;
1180                         }
1181
1182                         // Advance to the next resource.
1183                         //
1184                         // If the next resource needs no compression, just write
1185                         // it with this thread (not now though--- we could be in
1186                         // the middle of writing another resource.)  Keep doing
1187                         // this until we either get to the end of the resources
1188                         // list, or we get to a resource that needs compression.
1189
1190                         while (1) {
1191                                 if (next_resource == stream_list) {
1192                                         next_lte = NULL;
1193                                         break;
1194                                 }
1195                         #ifdef WITH_NTFS_3G
1196                                 end_wim_resource_read(next_lte, ni);
1197                                 ni = NULL;
1198                         #else
1199                                 end_wim_resource_read(next_lte);
1200                         #endif
1201
1202                                 next_lte = container_of(next_resource,
1203                                                         struct lookup_table_entry,
1204                                                         staging_list);
1205                                 next_resource = next_resource->next;
1206                                 if ((next_lte->resource_location == RESOURCE_IN_WIM
1207                                     && wimlib_get_compression_type(next_lte->wim) == out_ctype)
1208                                     || wim_resource_size(next_lte) == 0)
1209                                 {
1210                                         list_add_tail(&next_lte->staging_list,
1211                                                       &my_resources);
1212                                 } else {
1213                                         list_add_tail(&next_lte->staging_list,
1214                                                       &outstanding_resources);
1215                                         next_chunk = 0;
1216                                         next_num_chunks = wim_resource_chunks(next_lte);
1217                                         sha1_init(&next_sha_ctx);
1218                                         INIT_LIST_HEAD(&next_lte->msg_list);
1219                                 #ifdef WITH_NTFS_3G
1220                                         ret = prepare_resource_for_read(next_lte, &ni);
1221                                 #else
1222                                         ret = prepare_resource_for_read(next_lte);
1223                                 #endif
1224                                         if (ret != 0)
1225                                                 goto out;
1226                                         DEBUG2("Updated next_lte");
1227                                         break;
1228                                 }
1229                         }
1230                 }
1231
1232                 // If there are no outstanding resources, there are no more
1233                 // resources that need to be written.
1234                 if (list_empty(&outstanding_resources)) {
1235                         DEBUG("No outstanding resources! Done");
1236                         ret = 0;
1237                         goto out;
1238                 }
1239
1240                 // Get the next message from the queue and process it.
1241                 // The message will contain 1 or more data chunks that have been
1242                 // compressed.
1243                 DEBUG2("Waiting for message");
1244                 msg = shared_queue_get(compressed_res_queue);
1245                 msg->complete = true;
1246
1247                 DEBUG2("Received msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1248
1249                 list_for_each_entry(msg, &cur_lte->msg_list, list) {
1250                         DEBUG2("complete=%d", msg->complete);
1251                 }
1252
1253                 // Is this the next chunk in the current resource?  If it's not
1254                 // (i.e., an earlier chunk in a same or different resource
1255                 // hasn't been compressed yet), do nothing, and keep this
1256                 // message around until all earlier chunks are received.
1257                 //
1258                 // Otherwise, write all the chunks we can.
1259                 while (!list_empty(&cur_lte->msg_list)
1260                         && (msg = container_of(cur_lte->msg_list.next,
1261                                                struct message,
1262                                                list))->complete)
1263                 {
1264                         DEBUG2("Complete msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1265                         if (msg->begin_chunk == 0) {
1266                                 DEBUG2("Begin chunk tab");
1267                                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1268                                         show_stream_write_progress(&cur_size,
1269                                                                    &next_size,
1270                                                                    total_size,
1271                                                                    one_percent,
1272                                                                    &cur_percent,
1273                                                                    cur_lte);
1274                                 }
1275
1276                                 // This is the first set of chunks.  Leave space
1277                                 // for the chunk table in the output file.
1278                                 off_t cur_offset = ftello(out_fp);
1279                                 if (cur_offset == -1) {
1280                                         ret = WIMLIB_ERR_WRITE;
1281                                         goto out;
1282                                 }
1283                                 ret = begin_wim_resource_chunk_tab(cur_lte,
1284                                                                    out_fp,
1285                                                                    cur_offset,
1286                                                                    &cur_chunk_tab);
1287                                 if (ret != 0)
1288                                         goto out;
1289                         }
1290
1291                         // Write the compressed chunks from the message.
1292                         ret = write_wim_chunks(msg, out_fp, cur_chunk_tab);
1293                         if (ret != 0)
1294                                 goto out;
1295
1296                         list_del(&msg->list);
1297
1298                         // This message is available to use for different chunks
1299                         // now.
1300                         list_add(&msg->list, &available_msgs);
1301
1302                         // Was this the last chunk of the stream?  If so,
1303                         // finish it.
1304                         if (list_empty(&cur_lte->msg_list) &&
1305                             msg->begin_chunk + msg->num_chunks == cur_chunk_tab->num_chunks)
1306                         {
1307                                 DEBUG2("Finish wim chunk tab");
1308                                 u64 res_csize;
1309                                 ret = finish_wim_resource_chunk_tab(cur_chunk_tab,
1310                                                                     out_fp,
1311                                                                     &res_csize);
1312                                 if (ret != 0)
1313                                         goto out;
1314
1315
1316                                 cur_lte->output_resource_entry.size =
1317                                         res_csize;
1318
1319                                 cur_lte->output_resource_entry.original_size =
1320                                         cur_lte->resource_entry.original_size;
1321
1322                                 cur_lte->output_resource_entry.offset =
1323                                         cur_chunk_tab->file_offset;
1324
1325                                 cur_lte->output_resource_entry.flags =
1326                                         cur_lte->resource_entry.flags |
1327                                                 WIM_RESHDR_FLAG_COMPRESSED;
1328
1329                                 FREE(cur_chunk_tab);
1330                                 cur_chunk_tab = NULL;
1331
1332                                 struct list_head *next = cur_lte->staging_list.next;
1333                                 list_del(&cur_lte->staging_list);
1334
1335                                 if (next == &outstanding_resources) {
1336                                         DEBUG("No more outstanding resources");
1337                                         ret = 0;
1338                                         goto out;
1339                                 } else {
1340                                         cur_lte = container_of(cur_lte->staging_list.next,
1341                                                                struct lookup_table_entry,
1342                                                                staging_list);
1343                                 }
1344
1345                                 // Since we just finished writing a stream,
1346                                 // write any streams that have been added to the
1347                                 // my_resources list for direct writing by the
1348                                 // main thread (e.g. resources that don't need
1349                                 // to be compressed because the desired
1350                                 // compression type is the same as the previous
1351                                 // compression type).
1352                                 struct lookup_table_entry *tmp;
1353                                 list_for_each_entry_safe(lte,
1354                                                          tmp,
1355                                                          &my_resources,
1356                                                          staging_list)
1357                                 {
1358                                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1359                                                 show_stream_write_progress(&cur_size,
1360                                                                            &next_size,
1361                                                                            total_size,
1362                                                                            one_percent,
1363                                                                            &cur_percent,
1364                                                                            lte);
1365                                         }
1366
1367                                         ret = write_wim_resource(lte,
1368                                                                  out_fp,
1369                                                                  out_ctype,
1370                                                                  &lte->output_resource_entry,
1371                                                                  0);
1372                                         list_del(&lte->staging_list);
1373                                         if (ret != 0)
1374                                                 goto out;
1375                                 }
1376                         }
1377                 }
1378         }
1379
1380 out:
1381 #ifdef WITH_NTFS_3G
1382         end_wim_resource_read(cur_lte, ni);
1383 #else
1384         end_wim_resource_read(cur_lte);
1385 #endif
1386         if (ret == 0) {
1387                 list_for_each_entry(lte, &my_resources, staging_list) {
1388                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1389                                 show_stream_write_progress(&cur_size,
1390                                                            &next_size,
1391                                                            total_size,
1392                                                            one_percent,
1393                                                            &cur_percent,
1394                                                            lte);
1395                         }
1396                         ret = write_wim_resource(lte, out_fp,
1397                                                  out_ctype,
1398                                                  &lte->output_resource_entry,
1399                                                  0);
1400                         if (ret != 0)
1401                                 break;
1402                 }
1403                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1404                         finish_stream_write_progress(total_size);
1405         } else {
1406                 size_t num_available_msgs = 0;
1407                 struct list_head *cur;
1408
1409                 list_for_each(cur, &available_msgs) {
1410                         num_available_msgs++;
1411                 }
1412
1413                 while (num_available_msgs < ARRAY_LEN(msgs)) {
1414                         shared_queue_get(compressed_res_queue);
1415                         num_available_msgs++;
1416                 }
1417         }
1418
1419         DEBUG("Freeing messages");
1420
1421         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1422                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1423                         FREE(msgs[i].compressed_chunks[j]);
1424                         FREE(msgs[i].uncompressed_chunks[j]);
1425                 }
1426         }
1427
1428         if (cur_chunk_tab != NULL)
1429                 FREE(cur_chunk_tab);
1430         return ret;
1431 }
1432
1433
1434 static const char *get_data_type(int ctype)
1435 {
1436         switch (ctype) {
1437         case WIM_COMPRESSION_TYPE_NONE:
1438                 return "uncompressed";
1439         case WIM_COMPRESSION_TYPE_LZX:
1440                 return "LZX-compressed";
1441         case WIM_COMPRESSION_TYPE_XPRESS:
1442                 return "XPRESS-compressed";
1443         }
1444 }
1445
1446 static int write_stream_list_parallel(struct list_head *stream_list,
1447                                       FILE *out_fp, int out_ctype,
1448                                       int write_flags, u64 total_size,
1449                                       unsigned num_threads)
1450 {
1451         int ret;
1452         struct shared_queue res_to_compress_queue;
1453         struct shared_queue compressed_res_queue;
1454         pthread_t *compressor_threads = NULL;
1455
1456         if (num_threads == 0) {
1457                 long nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1458                 if (nthreads < 1) {
1459                         WARNING("Could not determine number of processors! Assuming 1");
1460                         goto out_serial;
1461                 } else {
1462                         num_threads = nthreads;
1463                 }
1464         }
1465
1466         wimlib_assert(stream_list->next != stream_list);
1467
1468
1469         static const double MESSAGES_PER_THREAD = 2.0;
1470         size_t queue_size = (size_t)(num_threads * MESSAGES_PER_THREAD);
1471
1472         DEBUG("Initializing shared queues (queue_size=%zu)", queue_size);
1473
1474         ret = shared_queue_init(&res_to_compress_queue, queue_size);
1475         if (ret != 0)
1476                 goto out_serial;
1477
1478         ret = shared_queue_init(&compressed_res_queue, queue_size);
1479         if (ret != 0)
1480                 goto out_destroy_res_to_compress_queue;
1481
1482         struct compressor_thread_params params;
1483         params.res_to_compress_queue = &res_to_compress_queue;
1484         params.compressed_res_queue = &compressed_res_queue;
1485         params.compress = get_compress_func(out_ctype);
1486
1487         compressor_threads = MALLOC(num_threads * sizeof(pthread_t));
1488
1489         for (unsigned i = 0; i < num_threads; i++) {
1490                 DEBUG("pthread_create thread %u", i);
1491                 ret = pthread_create(&compressor_threads[i], NULL,
1492                                      compressor_thread_proc, &params);
1493                 if (ret != 0) {
1494                         ret = -1;
1495                         ERROR_WITH_ERRNO("Failed to create compressor "
1496                                          "thread %u", i);
1497                         num_threads = i;
1498                         goto out_join;
1499                 }
1500         }
1501
1502         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1503                 printf("Writing %s compressed data using %u threads...\n",
1504                        get_data_type(out_ctype), num_threads);
1505         }
1506
1507         ret = main_writer_thread_proc(stream_list,
1508                                       out_fp,
1509                                       out_ctype,
1510                                       &res_to_compress_queue,
1511                                       &compressed_res_queue,
1512                                       queue_size,
1513                                       write_flags,
1514                                       total_size);
1515
1516 out_join:
1517         for (unsigned i = 0; i < num_threads; i++)
1518                 shared_queue_put(&res_to_compress_queue, NULL);
1519
1520         for (unsigned i = 0; i < num_threads; i++) {
1521                 if (pthread_join(compressor_threads[i], NULL)) {
1522                         WARNING("Failed to join compressor thread %u: %s",
1523                                 i, strerror(errno));
1524                 }
1525         }
1526         FREE(compressor_threads);
1527         shared_queue_destroy(&compressed_res_queue);
1528 out_destroy_res_to_compress_queue:
1529         shared_queue_destroy(&res_to_compress_queue);
1530         if (ret >= 0 && ret != WIMLIB_ERR_NOMEM)
1531                 return ret;
1532 out_serial:
1533         WARNING("Falling back to single-threaded compression");
1534         return write_stream_list_serial(stream_list, out_fp,
1535                                         out_ctype, write_flags, total_size);
1536 }
1537 #endif
1538
1539 static int write_stream_list(struct list_head *stream_list, FILE *out_fp,
1540                              int out_ctype, int write_flags,
1541                              unsigned num_threads)
1542 {
1543         struct lookup_table_entry *lte;
1544         size_t num_streams = 0;
1545         u64 total_size = 0;
1546         bool compression_needed = false;
1547
1548         list_for_each_entry(lte, stream_list, staging_list) {
1549                 num_streams++;
1550                 total_size += wim_resource_size(lte);
1551                 if (!compression_needed
1552                     && out_ctype != WIM_COMPRESSION_TYPE_NONE
1553                     && (lte->resource_location != RESOURCE_IN_WIM
1554                         || wimlib_get_compression_type(lte->wim) != out_ctype)
1555                     && wim_resource_size(lte) != 0)
1556                         compression_needed = true;
1557         }
1558
1559         if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE) {
1560                 printf("Preparing to write %zu streams "
1561                        "(%"PRIu64" total bytes uncompressed)\n",
1562                        num_streams, total_size);
1563                 printf("Using compression type %s\n",
1564                        wimlib_get_compression_type_string(out_ctype));
1565         }
1566
1567 #ifdef ENABLE_MULTITHREADED_COMPRESSION
1568         if (compression_needed && total_size >= 1000000 && num_threads != 1) {
1569                 return write_stream_list_parallel(stream_list, out_fp,
1570                                                   out_ctype, write_flags,
1571                                                   total_size, num_threads);
1572         }
1573         else
1574 #endif
1575         {
1576                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1577                         const char *reason = "";
1578                         if (!compression_needed)
1579                                 reason = " (no compression needed)";
1580                         printf("Writing %s data using 1 thread%s\n",
1581                                get_data_type(out_ctype), reason);
1582                 }
1583
1584                 return write_stream_list_serial(stream_list, out_fp,
1585                                                 out_ctype, write_flags,
1586                                                 total_size);
1587         }
1588 }
1589
1590
1591 static int dentry_find_streams_to_write(struct dentry *dentry,
1592                                         void *wim)
1593 {
1594         WIMStruct *w = wim;
1595         struct list_head *stream_list = w->private;
1596         struct lookup_table_entry *lte;
1597         for (unsigned i = 0; i <= dentry->d_inode->num_ads; i++) {
1598                 lte = inode_stream_lte(dentry->d_inode, i, w->lookup_table);
1599                 if (lte && ++lte->out_refcnt == 1)
1600                         list_add(&lte->staging_list, stream_list);
1601         }
1602         return 0;
1603 }
1604
1605 static int find_streams_to_write(WIMStruct *w)
1606 {
1607         return for_dentry_in_tree(wim_root_dentry(w),
1608                                   dentry_find_streams_to_write, w);
1609 }
1610
1611 static int write_wim_streams(WIMStruct *w, int image, int write_flags,
1612                              unsigned num_threads)
1613 {
1614
1615         LIST_HEAD(stream_list);
1616
1617         w->private = &stream_list;
1618         for_image(w, image, find_streams_to_write);
1619         return write_stream_list(&stream_list, w->out_fp,
1620                                  wimlib_get_compression_type(w), write_flags,
1621                                  num_threads);
1622 }
1623
1624 /*
1625  * Write the lookup table, xml data, and integrity table, then overwrite the WIM
1626  * header.
1627  */
1628 int finish_write(WIMStruct *w, int image, int write_flags)
1629 {
1630         off_t lookup_table_offset;
1631         off_t xml_data_offset;
1632         off_t lookup_table_size;
1633         off_t integrity_offset;
1634         off_t xml_data_size;
1635         off_t end_offset;
1636         off_t integrity_size;
1637         int ret;
1638         struct wim_header hdr;
1639         FILE *out = w->out_fp;
1640
1641         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1642                 /* Write the lookup table. */
1643                 lookup_table_offset = ftello(out);
1644                 if (lookup_table_offset == -1)
1645                         return WIMLIB_ERR_WRITE;
1646
1647                 DEBUG("Writing lookup table (offset %"PRIu64")",
1648                       lookup_table_offset);
1649                 ret = write_lookup_table(w->lookup_table, out);
1650                 if (ret != 0)
1651                         return ret;
1652         }
1653
1654         xml_data_offset = ftello(out);
1655         if (xml_data_offset == -1)
1656                 return WIMLIB_ERR_WRITE;
1657
1658         /* @hdr will be the header for the new WIM.  First copy all the data
1659          * from the header in the WIMStruct; then set all the fields that may
1660          * have changed, including the resource entries, boot index, and image
1661          * count.  */
1662         memcpy(&hdr, &w->hdr, sizeof(struct wim_header));
1663         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1664                 lookup_table_size = xml_data_offset - lookup_table_offset;
1665                 hdr.lookup_table_res_entry.offset = lookup_table_offset;
1666                 hdr.lookup_table_res_entry.size = lookup_table_size;
1667         }
1668         hdr.lookup_table_res_entry.original_size = hdr.lookup_table_res_entry.size;
1669         hdr.lookup_table_res_entry.flags = WIM_RESHDR_FLAG_METADATA;
1670
1671         DEBUG("Writing XML data (offset %"PRIu64")", xml_data_offset);
1672         ret = write_xml_data(w->wim_info, image, out,
1673                              (write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE) ?
1674                                 wim_info_get_total_bytes(w->wim_info) : 0);
1675         if (ret != 0)
1676                 return ret;
1677
1678         integrity_offset = ftello(out);
1679         if (integrity_offset == -1)
1680                 return WIMLIB_ERR_WRITE;
1681         xml_data_size = integrity_offset - xml_data_offset;
1682
1683         hdr.xml_res_entry.offset                 = xml_data_offset;
1684         hdr.xml_res_entry.size                   = xml_data_size;
1685         hdr.xml_res_entry.original_size          = xml_data_size;
1686         hdr.xml_res_entry.flags                  = 0;
1687
1688         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
1689                 ret = write_integrity_table(out, WIM_HEADER_DISK_SIZE,
1690                                             xml_data_offset,
1691                                             write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS);
1692                 if (ret != 0)
1693                         return ret;
1694                 end_offset = ftello(out);
1695                 if (end_offset == -1)
1696                         return WIMLIB_ERR_WRITE;
1697                 integrity_size              = end_offset - integrity_offset;
1698                 hdr.integrity.offset        = integrity_offset;
1699                 hdr.integrity.size          = integrity_size;
1700                 hdr.integrity.original_size = integrity_size;
1701         } else {
1702                 hdr.integrity.offset        = 0;
1703                 hdr.integrity.size          = 0;
1704                 hdr.integrity.original_size = 0;
1705         }
1706         hdr.integrity.flags = 0;
1707
1708         DEBUG("Updating WIM header.");
1709
1710         /*
1711          * In the WIM header, there is room for the resource entry for a
1712          * metadata resource labeled as the "boot metadata".  This entry should
1713          * be zeroed out if there is no bootable image (boot_idx 0).  Otherwise,
1714          * it should be a copy of the resource entry for the image that is
1715          * marked as bootable.  This is not well documented...
1716          */
1717         if (hdr.boot_idx == 0 || !w->image_metadata
1718                         || (image != WIM_ALL_IMAGES && image != hdr.boot_idx)) {
1719                 memset(&hdr.boot_metadata_res_entry, 0,
1720                        sizeof(struct resource_entry));
1721         } else {
1722                 memcpy(&hdr.boot_metadata_res_entry,
1723                        &w->image_metadata[
1724                           hdr.boot_idx - 1].metadata_lte->output_resource_entry,
1725                        sizeof(struct resource_entry));
1726         }
1727
1728         /* Set image count and boot index correctly for single image writes */
1729         if (image != WIM_ALL_IMAGES) {
1730                 hdr.image_count = 1;
1731                 if (hdr.boot_idx == image)
1732                         hdr.boot_idx = 1;
1733                 else
1734                         hdr.boot_idx = 0;
1735         }
1736
1737
1738         if (fseeko(out, 0, SEEK_SET) != 0)
1739                 return WIMLIB_ERR_WRITE;
1740
1741         ret = write_header(&hdr, out);
1742         if (ret != 0)
1743                 return ret;
1744
1745         if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) {
1746                 DEBUG("fsync output WIM file");
1747                 if (fflush(out) != 0
1748                     || fsync(fileno(out)) != 0)
1749                 {
1750                         ERROR_WITH_ERRNO("Error flushing data to WIM file");
1751                         ret = WIMLIB_ERR_WRITE;
1752                 }
1753         }
1754
1755         DEBUG("Closing output WIM file.");
1756
1757         if (fclose(out) != 0) {
1758                 ERROR_WITH_ERRNO("Failed to close the WIM file");
1759                 ret = WIMLIB_ERR_WRITE;
1760         }
1761         w->out_fp = NULL;
1762         return ret;
1763 }
1764
1765 /* Open file stream and write dummy header for WIM. */
1766 int begin_write(WIMStruct *w, const char *path, int write_flags)
1767 {
1768         const char *mode;
1769         DEBUG("Opening `%s' for new WIM", path);
1770
1771         /* checking the integrity requires going back over the file to read it.
1772          * XXX
1773          * (It also would be possible to keep a running sha1sum as the file is
1774          * written-- this would be faster, but a bit more complicated) */
1775         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
1776                 mode = "w+b";
1777         else
1778                 mode = "wb";
1779
1780         if (w->out_fp)
1781                 fclose(w->out_fp);
1782
1783         w->out_fp = fopen(path, mode);
1784         if (!w->out_fp) {
1785                 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
1786                                  path);
1787                 return WIMLIB_ERR_OPEN;
1788         }
1789
1790         /* Write dummy header. It will be overwritten later. */
1791         return write_header(&w->hdr, w->out_fp);
1792 }
1793
1794 /* Writes a stand-alone WIM to a file.  */
1795 WIMLIBAPI int wimlib_write(WIMStruct *w, const char *path,
1796                            int image, int write_flags, unsigned num_threads)
1797 {
1798         int ret;
1799
1800         if (!w || !path)
1801                 return WIMLIB_ERR_INVALID_PARAM;
1802
1803         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
1804
1805         if (image != WIM_ALL_IMAGES &&
1806              (image < 1 || image > w->hdr.image_count))
1807                 return WIMLIB_ERR_INVALID_IMAGE;
1808
1809
1810
1811         if (w->hdr.total_parts != 1) {
1812                 ERROR("Cannot call wimlib_write() on part of a split WIM");
1813                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1814         }
1815
1816         if (image == WIM_ALL_IMAGES)
1817                 DEBUG("Writing all images to `%s'.", path);
1818         else
1819                 DEBUG("Writing image %d to `%s'.", image, path);
1820
1821         ret = begin_write(w, path, write_flags);
1822         if (ret != 0)
1823                 return ret;
1824
1825         for_lookup_table_entry(w->lookup_table, lte_zero_out_refcnt, NULL);
1826
1827         ret = write_wim_streams(w, image, write_flags, num_threads);
1828
1829         if (ret != 0) {
1830                 /*ERROR("Failed to write WIM file resources to `%s'", path);*/
1831                 return ret;
1832         }
1833
1834         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1835                 printf("Writing image metadata...\n");
1836
1837         ret = for_image(w, image, write_metadata_resource);
1838
1839         if (ret != 0) {
1840                 /*ERROR("Failed to write WIM image metadata to `%s'", path);*/
1841                 return ret;
1842         }
1843
1844         ret = finish_write(w, image, write_flags);
1845         if (ret != 0)
1846                 return ret;
1847
1848         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1849                 printf("Successfully wrote `%s'\n", path);
1850         return 0;
1851 }