--disable-multithreaded-compression option
[wimlib] / src / write.c
1 /*
2  * write.c
3  *
4  * Support for writing WIM files; write a WIM file, overwrite a WIM file, write
5  * compressed file resources, etc.
6  */
7
8 /*
9  * Copyright (C) 2010 Carl Thijssen
10  * Copyright (C) 2012 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "wimlib_internal.h"
29 #include "io.h"
30 #include "dentry.h"
31 #include "lookup_table.h"
32 #include "xml.h"
33 #include "lzx.h"
34 #include "xpress.h"
35 #include <unistd.h>
36
37 #ifdef ENABLE_MULTITHREADED_COMPRESSION
38 #include <semaphore.h>
39 #include <pthread.h>
40 #include <errno.h>
41 #endif
42
43 #ifdef WITH_NTFS_3G
44 #include <time.h>
45 #include <ntfs-3g/attrib.h>
46 #include <ntfs-3g/inode.h>
47 #include <ntfs-3g/dir.h>
48 #endif
49
50
51 #ifdef HAVE_ALLOCA_H
52 #include <alloca.h>
53 #endif
54
55
56 /* Reopens the FILE* for a WIM read-write. */
57 static int reopen_rw(WIMStruct *w)
58 {
59         FILE *fp;
60
61         if (fclose(w->fp) != 0)
62                 ERROR_WITH_ERRNO("Failed to close the file `%s'", w->filename);
63         w->fp = NULL;
64         fp = fopen(w->filename, "r+b");
65         if (!fp) {
66                 ERROR_WITH_ERRNO("Failed to open `%s' for reading and writing",
67                                  w->filename);
68                 return WIMLIB_ERR_OPEN;
69         }
70         w->fp = fp;
71         return 0;
72 }
73
74
75
76 /*
77  * Writes a WIM file to the original file that it was read from, overwriting it.
78  */
79 WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
80                                unsigned num_threads)
81 {
82         const char *wimfile_name;
83         size_t wim_name_len;
84         int ret;
85
86         if (!w)
87                 return WIMLIB_ERR_INVALID_PARAM;
88
89         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
90
91         wimfile_name = w->filename;
92
93         DEBUG("Replacing WIM file `%s'.", wimfile_name);
94
95         if (!wimfile_name)
96                 return WIMLIB_ERR_NO_FILENAME;
97
98         /* Write the WIM to a temporary file. */
99         /* XXX should the temporary file be somewhere else? */
100         wim_name_len = strlen(wimfile_name);
101         char tmpfile[wim_name_len + 10];
102         memcpy(tmpfile, wimfile_name, wim_name_len);
103         randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
104         tmpfile[wim_name_len + 9] = '\0';
105
106         ret = wimlib_write(w, tmpfile, WIM_ALL_IMAGES, write_flags,
107                            num_threads);
108         if (ret != 0) {
109                 ERROR("Failed to write the WIM file `%s'", tmpfile);
110                 if (unlink(tmpfile) != 0)
111                         WARNING("Failed to remove `%s'", tmpfile);
112                 return ret;
113         }
114
115         DEBUG("Closing original WIM file.");
116         /* Close the original WIM file that was opened for reading. */
117         if (w->fp) {
118                 if (fclose(w->fp) != 0) {
119                         WARNING("Failed to close the file `%s'", wimfile_name);
120                 }
121                 w->fp = NULL;
122         }
123
124         DEBUG("Renaming `%s' to `%s'", tmpfile, wimfile_name);
125
126         /* Rename the new file to the old file .*/
127         if (rename(tmpfile, wimfile_name) != 0) {
128                 ERROR_WITH_ERRNO("Failed to rename `%s' to `%s'",
129                                  tmpfile, wimfile_name);
130                 /* Remove temporary file. */
131                 if (unlink(tmpfile) != 0)
132                         ERROR_WITH_ERRNO("Failed to remove `%s'", tmpfile);
133                 return WIMLIB_ERR_RENAME;
134         }
135
136         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
137                 printf("Successfully renamed `%s' to `%s'\n", tmpfile, wimfile_name);
138
139         return 0;
140 }
141
142 static int check_resource_offset(struct lookup_table_entry *lte, void *arg)
143 {
144         u64 xml_data_offset = *(u64*)arg;
145         if (lte->resource_entry.offset > xml_data_offset) {
146                 ERROR("The following resource is *after* the XML data:");
147                 print_lookup_table_entry(lte);
148                 return WIMLIB_ERR_RESOURCE_ORDER;
149         }
150         return 0;
151 }
152
153 WIMLIBAPI int wimlib_overwrite_xml_and_header(WIMStruct *w, int write_flags)
154 {
155         int ret;
156         FILE *fp;
157         u8 *integrity_table = NULL;
158         off_t xml_end;
159         off_t xml_size;
160         size_t bytes_written;
161
162         DEBUG("Overwriting XML and header of `%s', write_flags = %#x",
163               w->filename, write_flags);
164
165         if (!w->filename)
166                 return WIMLIB_ERR_NO_FILENAME;
167
168         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
169
170         /* Make sure that the integrity table (if present) is after the XML
171          * data, and that there are no stream resources, metadata resources, or
172          * lookup tables after the XML data.  Otherwise, these data would be
173          * destroyed by this function. */
174         if (w->hdr.integrity.offset != 0 &&
175             w->hdr.integrity.offset < w->hdr.xml_res_entry.offset) {
176                 ERROR("Didn't expect the integrity table to be before the XML data");
177                 return WIMLIB_ERR_RESOURCE_ORDER;
178         }
179
180         if (w->hdr.lookup_table_res_entry.offset >
181             w->hdr.xml_res_entry.offset) {
182                 ERROR("Didn't expect the lookup table to be after the XML data");
183                 return WIMLIB_ERR_RESOURCE_ORDER;
184         }
185
186         ret = for_lookup_table_entry(w->lookup_table, check_resource_offset,
187                                      &w->hdr.xml_res_entry.offset);
188         if (ret != 0)
189                 return ret;
190
191         ret = reopen_rw(w);
192         if (ret != 0)
193                 return ret;
194
195         fp = w->fp;
196
197         /* The old integrity table is still OK, as the SHA1 message digests in
198          * the integrity table include neither the header nor the XML data.
199          * Save it for later if it exists and an integrity table was required.
200          * */
201         if ((write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
202              && w->hdr.integrity.offset != 0)
203         {
204                 DEBUG("Reading existing integrity table.");
205                 integrity_table = MALLOC(w->hdr.integrity.size);
206                 if (!integrity_table)
207                         return WIMLIB_ERR_NOMEM;
208
209                 ret = read_uncompressed_resource(fp, w->hdr.integrity.offset,
210                                                  w->hdr.integrity.original_size,
211                                                  integrity_table);
212                 if (ret != 0)
213                         goto err;
214                 DEBUG("Done reading existing integrity table.");
215         }
216
217         DEBUG("Overwriting XML data.");
218         /* Overwrite the XML data. */
219         if (fseeko(fp, w->hdr.xml_res_entry.offset, SEEK_SET) != 0) {
220                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
221                                  "for XML data", w->hdr.xml_res_entry.offset);
222                 ret = WIMLIB_ERR_WRITE;
223                 goto err;
224         }
225         ret = write_xml_data(w->wim_info, WIM_ALL_IMAGES, fp, 0);
226         if (ret != 0)
227                 goto err;
228
229         DEBUG("Updating XML resource entry.");
230         /* Update the XML resource entry in the WIM header. */
231         xml_end = ftello(fp);
232         if (xml_end == -1) {
233                 ret = WIMLIB_ERR_WRITE;
234                 goto err;
235         }
236         xml_size = xml_end - w->hdr.xml_res_entry.offset;
237         w->hdr.xml_res_entry.size = xml_size;
238         w->hdr.xml_res_entry.original_size = xml_size;
239         /* XML data offset is unchanged. */
240
241         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
242                 DEBUG("Writing integrity table.");
243                 w->hdr.integrity.offset = xml_end;
244                 if (integrity_table) {
245                         /* The existing integrity table was saved. */
246                         bytes_written = fwrite(integrity_table, 1,
247                                                w->hdr.integrity.size, fp);
248                         if (bytes_written != w->hdr.integrity.size) {
249                                 ERROR_WITH_ERRNO("Failed to write integrity "
250                                                  "table");
251                                 ret = WIMLIB_ERR_WRITE;
252                                 goto err;
253                         }
254                         FREE(integrity_table);
255                 } else {
256                         /* There was no existing integrity table, so a new one
257                          * must be calculated. */
258                         ret = write_integrity_table(fp, WIM_HEADER_DISK_SIZE,
259                                         w->hdr.lookup_table_res_entry.offset +
260                                         w->hdr.lookup_table_res_entry.size,
261                                         write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS);
262                         if (ret != 0)
263                                 return ret;
264
265                         off_t end_integrity = ftello(fp);
266                         if (end_integrity == -1)
267                                 return WIMLIB_ERR_WRITE;
268
269                         off_t integrity_size           = end_integrity - xml_end;
270                         w->hdr.integrity.size          = integrity_size;
271                         w->hdr.integrity.original_size = integrity_size;
272                         w->hdr.integrity.flags         = 0;
273                 }
274         } else {
275                 DEBUG("Truncating file to end of XML data.");
276                 /* No integrity table to write.  The file should be truncated
277                  * because it's possible that the old file was longer (due to it
278                  * including an integrity table, or due to its XML data being
279                  * longer) */
280                 if (fflush(fp) != 0) {
281                         ERROR_WITH_ERRNO("Failed to flush stream for file `%s'",
282                                          w->filename);
283                         return WIMLIB_ERR_WRITE;
284                 }
285                 if (ftruncate(fileno(fp), xml_end) != 0) {
286                         ERROR_WITH_ERRNO("Failed to truncate `%s' to %"PRIu64" "
287                                          "bytes", w->filename, xml_end);
288                         return WIMLIB_ERR_WRITE;
289                 }
290                 memset(&w->hdr.integrity, 0, sizeof(struct resource_entry));
291         }
292
293         DEBUG("Overwriting header.");
294         /* Overwrite the header. */
295         if (fseeko(fp, 0, SEEK_SET) != 0) {
296                 ERROR_WITH_ERRNO("Failed to seek to beginning of `%s'",
297                                  w->filename);
298                 return WIMLIB_ERR_WRITE;
299         }
300
301         ret = write_header(&w->hdr, fp);
302         if (ret != 0)
303                 return ret;
304
305         DEBUG("Closing `%s'.", w->filename);
306         if (fclose(fp) != 0) {
307                 ERROR_WITH_ERRNO("Failed to close `%s'", w->filename);
308                 return WIMLIB_ERR_WRITE;
309         }
310         w->fp = NULL;
311         DEBUG("Done.");
312         return 0;
313 err:
314         FREE(integrity_table);
315         return ret;
316 }
317
318
319 /* Chunk table that's located at the beginning of each compressed resource in
320  * the WIM.  (This is not the on-disk format; the on-disk format just has an
321  * array of offsets.) */
322 struct chunk_table {
323         off_t file_offset;
324         u64 num_chunks;
325         u64 original_resource_size;
326         u64 bytes_per_chunk_entry;
327         u64 table_disk_size;
328         u64 cur_offset;
329         u64 *cur_offset_p;
330         u64 offsets[0];
331 };
332
333 /*
334  * Allocates and initializes a chunk table, and reserves space for it in the
335  * output file.
336  */
337 static int
338 begin_wim_resource_chunk_tab(const struct lookup_table_entry *lte,
339                              FILE *out_fp,
340                              off_t file_offset,
341                              struct chunk_table **chunk_tab_ret)
342 {
343         u64 size = wim_resource_size(lte);
344         u64 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
345         size_t alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
346         struct chunk_table *chunk_tab = CALLOC(1, alloc_size);
347         int ret;
348
349         if (!chunk_tab) {
350                 ERROR("Failed to allocate chunk table for %"PRIu64" byte "
351                       "resource", size);
352                 ret = WIMLIB_ERR_NOMEM;
353                 goto out;
354         }
355         chunk_tab->file_offset = file_offset;
356         chunk_tab->num_chunks = num_chunks;
357         chunk_tab->original_resource_size = size;
358         chunk_tab->bytes_per_chunk_entry = (size >= (1ULL << 32)) ? 8 : 4;
359         chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
360                                      (num_chunks - 1);
361         chunk_tab->cur_offset = 0;
362         chunk_tab->cur_offset_p = chunk_tab->offsets;
363
364         if (fwrite(chunk_tab, 1, chunk_tab->table_disk_size, out_fp) !=
365                    chunk_tab->table_disk_size) {
366                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
367                                  "file resource");
368                 ret = WIMLIB_ERR_WRITE;
369                 goto out;
370         }
371
372         ret = 0;
373 out:
374         *chunk_tab_ret = chunk_tab;
375         return ret;
376 }
377
378 typedef int (*compress_func_t)(const void *, unsigned, void *, unsigned *);
379
380 compress_func_t get_compress_func(int out_ctype)
381 {
382         if (out_ctype == WIM_COMPRESSION_TYPE_LZX)
383                 return lzx_compress;
384         else
385                 return xpress_compress;
386 }
387
388
389 /*
390  * Compresses a chunk of a WIM resource.
391  *
392  * @chunk:              Uncompressed data of the chunk.
393  * @chunk_size:         Size of the uncompressed chunk in bytes.
394  * @compressed_chunk:   Pointer to output buffer of size at least
395  *                              (@chunk_size - 1) bytes.
396  * @compressed_chunk_len_ret:   Pointer to an unsigned int into which the size
397  *                                      of the compressed chunk will be
398  *                                      returned.
399  * @ctype:      Type of compression to use.  Must be WIM_COMPRESSION_TYPE_LZX
400  *              or WIM_COMPRESSION_TYPE_XPRESS.
401  *
402  * Returns zero if compressed succeeded, and nonzero if the chunk could not be
403  * compressed to any smaller than @chunk_size.  This function cannot fail for
404  * any other reasons.
405  */
406 static int compress_chunk(const u8 chunk[], unsigned chunk_size,
407                           u8 compressed_chunk[],
408                           unsigned *compressed_chunk_len_ret,
409                           int ctype)
410 {
411         compress_func_t compress = get_compress_func(ctype);
412         return (*compress)(chunk, chunk_size, compressed_chunk,
413                            compressed_chunk_len_ret);
414 }
415
416 /*
417  * Writes a chunk of a WIM resource to an output file.
418  *
419  * @chunk:        Uncompressed data of the chunk.
420  * @chunk_size:   Size of the chunk (<= WIM_CHUNK_SIZE)
421  * @out_fp:       FILE * to write tho chunk to.
422  * @out_ctype:    Compression type to use when writing the chunk (ignored if no
423  *                      chunk table provided)
424  * @chunk_tab:    Pointer to chunk table being created.  It is updated with the
425  *                      offset of the chunk we write.
426  *
427  * Returns 0 on success; nonzero on failure.
428  */
429 static int write_wim_resource_chunk(const u8 chunk[], unsigned chunk_size,
430                                     FILE *out_fp, int out_ctype,
431                                     struct chunk_table *chunk_tab)
432 {
433         const u8 *out_chunk;
434         unsigned out_chunk_size;
435
436         wimlib_assert(chunk_size <= WIM_CHUNK_SIZE);
437
438         if (!chunk_tab) {
439                 out_chunk = chunk;
440                 out_chunk_size = chunk_size;
441         } else {
442                 u8 *compressed_chunk = alloca(chunk_size);
443                 int ret;
444
445                 ret = compress_chunk(chunk, chunk_size, compressed_chunk,
446                                      &out_chunk_size, out_ctype);
447                 if (ret == 0) {
448                         out_chunk = compressed_chunk;
449                 } else {
450                         out_chunk = chunk;
451                         out_chunk_size = chunk_size;
452                 }
453                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
454                 chunk_tab->cur_offset += out_chunk_size;
455         }
456
457         if (fwrite(out_chunk, 1, out_chunk_size, out_fp) != out_chunk_size) {
458                 ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
459                 return WIMLIB_ERR_WRITE;
460         }
461         return 0;
462 }
463
464 /*
465  * Finishes a WIM chunk tale and writes it to the output file at the correct
466  * offset.
467  *
468  * The final size of the full compressed resource is returned in the
469  * @compressed_size_p.
470  */
471 static int
472 finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
473                               FILE *out_fp, u64 *compressed_size_p)
474 {
475         size_t bytes_written;
476         if (fseeko(out_fp, chunk_tab->file_offset, SEEK_SET) != 0) {
477                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of output "
478                                  "WIM file", chunk_tab->file_offset);
479                 return WIMLIB_ERR_WRITE;
480         }
481
482         if (chunk_tab->bytes_per_chunk_entry == 8) {
483                 array_cpu_to_le64(chunk_tab->offsets, chunk_tab->num_chunks);
484         } else {
485                 for (u64 i = 0; i < chunk_tab->num_chunks; i++)
486                         ((u32*)chunk_tab->offsets)[i] =
487                                 cpu_to_le32(chunk_tab->offsets[i]);
488         }
489         bytes_written = fwrite((u8*)chunk_tab->offsets +
490                                         chunk_tab->bytes_per_chunk_entry,
491                                1, chunk_tab->table_disk_size, out_fp);
492         if (bytes_written != chunk_tab->table_disk_size) {
493                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
494                                  "file resource");
495                 return WIMLIB_ERR_WRITE;
496         }
497         if (fseeko(out_fp, 0, SEEK_END) != 0) {
498                 ERROR_WITH_ERRNO("Failed to seek to end of output WIM file");
499                 return WIMLIB_ERR_WRITE;
500         }
501         *compressed_size_p = chunk_tab->cur_offset + chunk_tab->table_disk_size;
502         return 0;
503 }
504
505 static int prepare_resource_for_read(struct lookup_table_entry *lte
506
507                                         #ifdef WITH_NTFS_3G
508                                         , ntfs_inode **ni_ret
509                                         #endif
510                 )
511 {
512         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
513              && !lte->file_on_disk_fp)
514         {
515                 wimlib_assert(lte->file_on_disk);
516                 lte->file_on_disk_fp = fopen(lte->file_on_disk, "rb");
517                 if (!lte->file_on_disk_fp) {
518                         ERROR_WITH_ERRNO("Failed to open the file `%s' for "
519                                          "reading", lte->file_on_disk);
520                         return WIMLIB_ERR_OPEN;
521                 }
522         }
523 #ifdef WITH_NTFS_3G
524         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME
525                   && !lte->attr)
526         {
527                 struct ntfs_location *loc = lte->ntfs_loc;
528                 ntfs_inode *ni;
529                 wimlib_assert(loc);
530                 ni = ntfs_pathname_to_inode(*loc->ntfs_vol_p, NULL, loc->path_utf8);
531                 if (!ni) {
532                         ERROR_WITH_ERRNO("Failed to open inode `%s' in NTFS "
533                                          "volume", loc->path_utf8);
534                         return WIMLIB_ERR_NTFS_3G;
535                 }
536                 lte->attr = ntfs_attr_open(ni,
537                                            loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
538                                            (ntfschar*)loc->stream_name_utf16,
539                                            loc->stream_name_utf16_num_chars);
540                 if (!lte->attr) {
541                         ERROR_WITH_ERRNO("Failed to open attribute of `%s' in "
542                                          "NTFS volume", loc->path_utf8);
543                         ntfs_inode_close(ni);
544                         return WIMLIB_ERR_NTFS_3G;
545                 }
546                 *ni_ret = ni;
547         }
548 #endif
549         return 0;
550 }
551
552 static void end_wim_resource_read(struct lookup_table_entry *lte
553                                 #ifdef WITH_NTFS_3G
554                                         , ntfs_inode *ni
555                                 #endif
556                                         )
557 {
558         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
559             && lte->file_on_disk_fp) {
560                 fclose(lte->file_on_disk_fp);
561                 lte->file_on_disk_fp = NULL;
562         }
563 #ifdef WITH_NTFS_3G
564         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME) {
565                 if (lte->attr) {
566                         ntfs_attr_close(lte->attr);
567                         lte->attr = NULL;
568                 }
569                 if (ni)
570                         ntfs_inode_close(ni);
571         }
572 #endif
573 }
574
575 /*
576  * Writes a WIM resource to a FILE * opened for writing.  The resource may be
577  * written uncompressed or compressed depending on the @out_ctype parameter.
578  *
579  * If by chance the resource compresses to more than the original size (this may
580  * happen with random data or files than are pre-compressed), the resource is
581  * instead written uncompressed (and this is reflected in the @out_res_entry by
582  * removing the WIM_RESHDR_FLAG_COMPRESSED flag).
583  *
584  * @lte:        The lookup table entry for the WIM resource.
585  * @out_fp:     The FILE * to write the resource to.
586  * @out_ctype:  The compression type of the resource to write.  Note: if this is
587  *                      the same as the compression type of the WIM resource we
588  *                      need to read, we simply copy the data (i.e. we do not
589  *                      uncompress it, then compress it again).
590  * @out_res_entry:  If non-NULL, a resource entry that is filled in with the
591  *                  offset, original size, compressed size, and compression flag
592  *                  of the output resource.
593  *
594  * Returns 0 on success; nonzero on failure.
595  */
596 int write_wim_resource(struct lookup_table_entry *lte,
597                        FILE *out_fp, int out_ctype,
598                        struct resource_entry *out_res_entry,
599                        int flags)
600 {
601         u64 bytes_remaining;
602         u64 original_size;
603         u64 old_compressed_size;
604         u64 new_compressed_size;
605         u64 offset;
606         int ret;
607         struct chunk_table *chunk_tab = NULL;
608         bool raw;
609         off_t file_offset;
610 #ifdef WITH_NTFS_3G
611         ntfs_inode *ni = NULL;
612 #endif
613
614         wimlib_assert(lte);
615
616         /* Original size of the resource */
617         original_size = wim_resource_size(lte);
618
619         /* Compressed size of the resource (as it exists now) */
620         old_compressed_size = wim_resource_compressed_size(lte);
621
622         /* Current offset in output file */
623         file_offset = ftello(out_fp);
624         if (file_offset == -1) {
625                 ERROR_WITH_ERRNO("Failed to get offset in output "
626                                  "stream");
627                 return WIMLIB_ERR_WRITE;
628         }
629
630         /* Are the compression types the same?  If so, do a raw copy (copy
631          * without decompressing and recompressing the data). */
632         raw = (wim_resource_compression_type(lte) == out_ctype
633                && out_ctype != WIM_COMPRESSION_TYPE_NONE);
634
635         if (raw) {
636                 flags |= WIMLIB_RESOURCE_FLAG_RAW;
637                 bytes_remaining = old_compressed_size;
638         } else {
639                 flags &= ~WIMLIB_RESOURCE_FLAG_RAW;
640                 bytes_remaining = original_size;
641         }
642
643         /* Empty resource; nothing needs to be done, so just return success. */
644         if (bytes_remaining == 0)
645                 return 0;
646
647         /* Buffer for reading chunks for the resource */
648         u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
649
650         /* If we are writing a compressed resource and not doing a raw copy, we
651          * need to initialize the chunk table */
652         if (out_ctype != WIM_COMPRESSION_TYPE_NONE && !raw) {
653                 ret = begin_wim_resource_chunk_tab(lte, out_fp, file_offset,
654                                                    &chunk_tab);
655                 if (ret != 0)
656                         goto out;
657         }
658
659         /* If the WIM resource is in an external file, open a FILE * to it so we
660          * don't have to open a temporary one in read_wim_resource() for each
661          * chunk. */
662 #ifdef WITH_NTFS_3G
663         ret = prepare_resource_for_read(lte, &ni);
664 #else
665         ret = prepare_resource_for_read(lte);
666 #endif
667         if (ret != 0)
668                 goto out;
669
670         /* If we aren't doing a raw copy, we will compute the SHA1 message
671          * digest of the resource as we read it, and verify it's the same as the
672          * hash given in the lookup table entry once we've finished reading the
673          * resource. */
674         SHA_CTX ctx;
675         if (!raw)
676                 sha1_init(&ctx);
677
678         /* While there are still bytes remaining in the WIM resource, read a
679          * chunk of the resource, update SHA1, then write that chunk using the
680          * desired compression type. */
681         offset = 0;
682         do {
683                 u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
684                 ret = read_wim_resource(lte, buf, to_read, offset, flags);
685                 if (ret != 0)
686                         goto out_fclose;
687                 if (!raw)
688                         sha1_update(&ctx, buf, to_read);
689                 ret = write_wim_resource_chunk(buf, to_read, out_fp,
690                                                out_ctype, chunk_tab);
691                 if (ret != 0)
692                         goto out_fclose;
693                 bytes_remaining -= to_read;
694                 offset += to_read;
695         } while (bytes_remaining);
696
697         /* Raw copy:  The new compressed size is the same as the old compressed
698          * size
699          *
700          * Using WIM_COMPRESSION_TYPE_NONE:  The new compressed size is the
701          * original size
702          *
703          * Using a different compression type:  Call
704          * finish_wim_resource_chunk_tab() and it will provide the new
705          * compressed size.
706          */
707         if (raw) {
708                 new_compressed_size = old_compressed_size;
709         } else {
710                 if (out_ctype == WIM_COMPRESSION_TYPE_NONE)
711                         new_compressed_size = original_size;
712                 else {
713                         ret = finish_wim_resource_chunk_tab(chunk_tab, out_fp,
714                                                             &new_compressed_size);
715                         if (ret != 0)
716                                 goto out_fclose;
717                 }
718         }
719
720         /* Verify SHA1 message digest of the resource, unless we are doing a raw
721          * write (in which case we never even saw the uncompressed data).  Or,
722          * if the hash we had before is all 0's, just re-set it to be the new
723          * hash. */
724         if (!raw) {
725                 u8 md[SHA1_HASH_SIZE];
726                 sha1_final(md, &ctx);
727                 if (is_zero_hash(lte->hash)) {
728                         copy_hash(lte->hash, md);
729                 } else if (!hashes_equal(md, lte->hash)) {
730                         ERROR("WIM resource has incorrect hash!");
731                         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
732                                 ERROR("We were reading it from `%s'; maybe it changed "
733                                       "while we were reading it.",
734                                       lte->file_on_disk);
735                         }
736                         ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
737                         goto out_fclose;
738                 }
739         }
740
741         if (!raw && new_compressed_size >= original_size &&
742             out_ctype != WIM_COMPRESSION_TYPE_NONE)
743         {
744                 /* Oops!  We compressed the resource to larger than the original
745                  * size.  Write the resource uncompressed instead. */
746                 if (fseeko(out_fp, file_offset, SEEK_SET) != 0) {
747                         ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
748                                          "of output WIM file", file_offset);
749                         ret = WIMLIB_ERR_WRITE;
750                         goto out_fclose;
751                 }
752                 ret = write_wim_resource(lte, out_fp, WIM_COMPRESSION_TYPE_NONE,
753                                          out_res_entry, flags);
754                 if (ret != 0)
755                         goto out_fclose;
756                 if (fflush(out_fp) != 0) {
757                         ERROR_WITH_ERRNO("Failed to flush output WIM file");
758                         ret = WIMLIB_ERR_WRITE;
759                         goto out_fclose;
760                 }
761                 if (ftruncate(fileno(out_fp), file_offset + out_res_entry->size) != 0) {
762                         ERROR_WITH_ERRNO("Failed to truncate output WIM file");
763                         ret = WIMLIB_ERR_WRITE;
764                         goto out_fclose;
765                 }
766         } else {
767                 if (out_res_entry) {
768                         out_res_entry->size          = new_compressed_size;
769                         out_res_entry->original_size = original_size;
770                         out_res_entry->offset        = file_offset;
771                         out_res_entry->flags         = lte->resource_entry.flags
772                                                         & ~WIM_RESHDR_FLAG_COMPRESSED;
773                         if (out_ctype != WIM_COMPRESSION_TYPE_NONE)
774                                 out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
775                 }
776         }
777         ret = 0;
778 out_fclose:
779 #ifdef WITH_NTFS_3G
780         end_wim_resource_read(lte, ni);
781 #else
782         end_wim_resource_read(lte);
783 #endif
784 out:
785         FREE(chunk_tab);
786         return ret;
787 }
788
789
790 #ifdef ENABLE_MULTITHREADED_COMPRESSION
791 struct shared_queue {
792         sem_t filled_slots;
793         sem_t empty_slots;
794         pthread_mutex_t lock;
795         unsigned front;
796         unsigned back;
797         void **array;
798         unsigned size;
799 };
800
801 static int shared_queue_init(struct shared_queue *q, unsigned size)
802 {
803         q->array = CALLOC(sizeof(q->array[0]), size);
804         if (!q->array)
805                 return WIMLIB_ERR_NOMEM;
806
807         sem_init(&q->filled_slots, 0, 0);
808         sem_init(&q->empty_slots, 0, size);
809         pthread_mutex_init(&q->lock, NULL);
810         q->front = 0;
811         q->back = size - 1;
812         q->size = size;
813         return 0;
814 }
815
816 static void shared_queue_destroy(struct shared_queue *q)
817 {
818         sem_destroy(&q->filled_slots);
819         sem_destroy(&q->empty_slots);
820         pthread_mutex_destroy(&q->lock);
821         FREE(q->array);
822 }
823
824 static void shared_queue_put(struct shared_queue *q, void *obj)
825 {
826         sem_wait(&q->empty_slots);
827         pthread_mutex_lock(&q->lock);
828
829         q->back = (q->back + 1) % q->size;
830         q->array[q->back] = obj;
831
832         sem_post(&q->filled_slots);
833         pthread_mutex_unlock(&q->lock);
834 }
835
836 static void *shared_queue_get(struct shared_queue *q)
837 {
838         sem_wait(&q->filled_slots);
839         pthread_mutex_lock(&q->lock);
840
841         void *obj = q->array[q->front];
842         q->array[q->front] = NULL;
843         q->front = (q->front + 1) % q->size;
844
845         sem_post(&q->empty_slots);
846         pthread_mutex_unlock(&q->lock);
847         return obj;
848 }
849
850 static inline int shared_queue_get_filled(struct shared_queue *q)
851 {
852         int sval;
853         sem_getvalue(&q->filled_slots, &sval);
854         return sval;
855 }
856
857 struct compressor_thread_params {
858         struct shared_queue *res_to_compress_queue;
859         struct shared_queue *compressed_res_queue;
860         compress_func_t compress;
861 };
862
863 #define MAX_CHUNKS_PER_MSG 2
864
865 struct message {
866         struct lookup_table_entry *lte;
867         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
868         u8 *out_compressed_chunks[MAX_CHUNKS_PER_MSG];
869         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
870         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
871         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
872         unsigned num_chunks;
873         struct list_head list;
874         bool complete;
875         u64 begin_chunk;
876 };
877
878 static void compress_chunks(struct message *msg, compress_func_t compress)
879 {
880         for (unsigned i = 0; i < msg->num_chunks; i++) {
881                 DEBUG2("compress chunk %u of %u", i, msg->num_chunks);
882                 int ret = compress(msg->uncompressed_chunks[i],
883                                    msg->uncompressed_chunk_sizes[i],
884                                    msg->compressed_chunks[i],
885                                    &msg->compressed_chunk_sizes[i]);
886                 if (ret == 0) {
887                         msg->out_compressed_chunks[i] = msg->compressed_chunks[i];
888                 } else {
889                         msg->out_compressed_chunks[i] = msg->uncompressed_chunks[i];
890                         msg->compressed_chunk_sizes[i] = msg->uncompressed_chunk_sizes[i];
891                 }
892         }
893 }
894
895 static void *compressor_thread_proc(void *arg)
896 {
897         struct compressor_thread_params *params = arg;
898         struct shared_queue *res_to_compress_queue = params->res_to_compress_queue;
899         struct shared_queue *compressed_res_queue = params->compressed_res_queue;
900         compress_func_t compress = params->compress;
901         struct message *msg;
902
903         DEBUG("Compressor thread ready");
904         while ((msg = shared_queue_get(res_to_compress_queue)) != NULL) {
905                 compress_chunks(msg, compress);
906                 shared_queue_put(compressed_res_queue, msg);
907         }
908         DEBUG("Compressor thread terminating");
909 }
910 #endif
911
912 static void show_stream_write_progress(u64 *cur_size, u64 *next_size,
913                                        u64 total_size, u64 one_percent,
914                                        unsigned *cur_percent,
915                                        const struct lookup_table_entry *cur_lte)
916 {
917         if (*cur_size >= *next_size) {
918                 printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
919                        "(uncompressed) written (%u%% done)",
920                        *cur_size >> 20,
921                        total_size >> 20, *cur_percent);
922                 fflush(stdout);
923                 *next_size += one_percent;
924                 (*cur_percent)++;
925         }
926         *cur_size += wim_resource_size(cur_lte);
927 }
928
929 static void finish_stream_write_progress(u64 total_size)
930 {
931         printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
932                "(uncompressed) written (100%% done)\n",
933                total_size >> 20, total_size >> 20);
934         fflush(stdout);
935 }
936
937 static int write_stream_list_serial(struct list_head *stream_list,
938                                     FILE *out_fp, int out_ctype,
939                                     int write_flags, u64 total_size)
940 {
941         struct lookup_table_entry *lte;
942         int ret;
943
944         u64 one_percent = total_size / 100;
945         u64 cur_size = 0;
946         u64 next_size = 0;
947         unsigned cur_percent = 0;
948
949         list_for_each_entry(lte, stream_list, staging_list) {
950                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
951                         show_stream_write_progress(&cur_size, &next_size,
952                                                    total_size, one_percent,
953                                                    &cur_percent, lte);
954                 }
955                 ret = write_wim_resource(lte, out_fp, out_ctype,
956                                          &lte->output_resource_entry, 0);
957                 if (ret != 0)
958                         return ret;
959         }
960         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
961                 finish_stream_write_progress(total_size);
962         return 0;
963 }
964
965 #ifdef ENABLE_MULTITHREADED_COMPRESSION
966 static int write_wim_chunks(struct message *msg, FILE *out_fp,
967                             struct chunk_table *chunk_tab)
968 {
969         for (unsigned i = 0; i < msg->num_chunks; i++) {
970                 unsigned chunk_csize = msg->compressed_chunk_sizes[i];
971
972                 DEBUG2("Write wim chunk %u of %u (csize = %u)",
973                       i, msg->num_chunks, chunk_csize);
974
975                 if (fwrite(msg->out_compressed_chunks[i], 1, chunk_csize, out_fp)
976                     != chunk_csize)
977                 {
978                         ERROR_WITH_ERRNO("Failed to write WIM");
979                         return WIMLIB_ERR_WRITE;
980                 }
981
982                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
983                 chunk_tab->cur_offset += chunk_csize;
984         }
985         return 0;
986 }
987
988 /*
989  * This function is executed by the main thread when the resources are being
990  * compressed in parallel.  The main thread is in change of all reading of the
991  * uncompressed data and writing of the compressed data.  The compressor threads
992  * *only* do compression from/to in-memory buffers.
993  *
994  * Each unit of work given to a compressor thread is up to MAX_CHUNKS_PER_MSG
995  * chunks of compressed data to compress, represented in a `struct message'.
996  * Each message is passed from the main thread to a worker thread through the
997  * res_to_compress_queue, and it is passed back through the
998  * compressed_res_queue.
999  */
1000 static int main_writer_thread_proc(struct list_head *stream_list,
1001                                    FILE *out_fp,
1002                                    int out_ctype,
1003                                    struct shared_queue *res_to_compress_queue,
1004                                    struct shared_queue *compressed_res_queue,
1005                                    size_t queue_size,
1006                                    int write_flags,
1007                                    u64 total_size)
1008 {
1009         int ret;
1010
1011
1012         struct message msgs[queue_size];
1013         ZERO_ARRAY(msgs);
1014
1015         // Initially, all the messages are available to use.
1016         LIST_HEAD(available_msgs);
1017         for (size_t i = 0; i < ARRAY_LEN(msgs); i++)
1018                 list_add(&msgs[i].list, &available_msgs);
1019
1020         // outstanding_resources is the list of resources that currently have
1021         // had chunks sent off for compression.
1022         //
1023         // The first stream in outstanding_resources is the stream that is
1024         // currently being written (cur_lte).
1025         //
1026         // The last stream in outstanding_resources is the stream that is
1027         // currently being read and chunks fed to the compressor threads
1028         // (next_lte).
1029         //
1030         // Depending on the number of threads and the sizes of the resource,
1031         // the outstanding streams list may contain streams between cur_lte and
1032         // next_lte that have all their chunks compressed or being compressed,
1033         // but haven't been written yet.
1034         //
1035         LIST_HEAD(outstanding_resources);
1036         struct list_head *next_resource = stream_list->next;
1037         struct lookup_table_entry *next_lte = container_of(next_resource,
1038                                                            struct lookup_table_entry,
1039                                                            staging_list);
1040         next_resource = next_resource->next;
1041         u64 next_chunk = 0;
1042         u64 next_num_chunks = wim_resource_chunks(next_lte);
1043         INIT_LIST_HEAD(&next_lte->msg_list);
1044         list_add_tail(&next_lte->staging_list, &outstanding_resources);
1045
1046         // As in write_wim_resource(), each resource we read is checksummed.
1047         SHA_CTX next_sha_ctx;
1048         sha1_init(&next_sha_ctx);
1049         u8 next_hash[SHA1_HASH_SIZE];
1050
1051         // Resources that don't need any chunks compressed are added to this
1052         // list and written directly by the main thread.
1053         LIST_HEAD(my_resources);
1054
1055         struct lookup_table_entry *cur_lte = next_lte;
1056         struct chunk_table *cur_chunk_tab = NULL;
1057         struct lookup_table_entry *lte;
1058         struct message *msg;
1059
1060         u64 one_percent = total_size / 100;
1061         u64 cur_size = 0;
1062         u64 next_size = 0;
1063         unsigned cur_percent = 0;
1064
1065 #ifdef WITH_NTFS_3G
1066         ntfs_inode *ni = NULL;
1067 #endif
1068
1069 #ifdef WITH_NTFS_3G
1070         ret = prepare_resource_for_read(next_lte, &ni);
1071 #else
1072         ret = prepare_resource_for_read(next_lte);
1073 #endif
1074
1075         DEBUG("Initializing buffers for uncompressed "
1076               "and compressed data (%zu bytes needed)",
1077               queue_size * MAX_CHUNKS_PER_MSG * WIM_CHUNK_SIZE * 2);
1078
1079         // Pre-allocate all the buffers that will be needed to do the chunk
1080         // compression.
1081         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1082                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1083                         msgs[i].compressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
1084                         msgs[i].uncompressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
1085                         if (msgs[i].compressed_chunks[j] == NULL ||
1086                             msgs[i].uncompressed_chunks[j] == NULL)
1087                         {
1088                                 ERROR("Could not allocate enough memory for "
1089                                       "multi-threaded compression");
1090                                 ret = WIMLIB_ERR_NOMEM;
1091                                 goto out;
1092                         }
1093                 }
1094         }
1095
1096         while (1) {
1097                 // Send chunks to the compressor threads until either (a) there
1098                 // are no more messages available since they were all sent off,
1099                 // or (b) there are no more resources that need to be
1100                 // compressed.
1101                 while (!list_empty(&available_msgs) && next_lte != NULL) {
1102
1103                         // Get a message from the available messages
1104                         // list
1105                         msg = container_of(available_msgs.next,
1106                                            struct message,
1107                                            list);
1108
1109                         // ... and delete it from the available messages
1110                         // list
1111                         list_del(&msg->list);
1112
1113                         // Initialize the message with the chunks to
1114                         // compress.
1115                         msg->num_chunks = min(next_num_chunks - next_chunk,
1116                                               MAX_CHUNKS_PER_MSG);
1117                         msg->lte = next_lte;
1118                         msg->complete = false;
1119                         msg->begin_chunk = next_chunk;
1120
1121                         unsigned size = WIM_CHUNK_SIZE;
1122                         for (unsigned i = 0; i < msg->num_chunks; i++) {
1123
1124                                 // Read chunk @next_chunk of the stream into the
1125                                 // message so that a compressor thread can
1126                                 // compress it.
1127
1128                                 if (next_chunk == next_num_chunks - 1 &&
1129                                      wim_resource_size(next_lte) % WIM_CHUNK_SIZE != 0)
1130                                 {
1131                                         size = wim_resource_size(next_lte) % WIM_CHUNK_SIZE;
1132                                 }
1133
1134
1135                                 DEBUG2("Read resource (size=%u, offset=%zu)",
1136                                       size, next_chunk * WIM_CHUNK_SIZE);
1137
1138                                 msg->uncompressed_chunk_sizes[i] = size;
1139
1140                                 ret = read_wim_resource(next_lte,
1141                                                         msg->uncompressed_chunks[i],
1142                                                         size,
1143                                                         next_chunk * WIM_CHUNK_SIZE,
1144                                                         0);
1145                                 if (ret != 0)
1146                                         goto out;
1147                                 sha1_update(&next_sha_ctx,
1148                                             msg->uncompressed_chunks[i], size);
1149                                 next_chunk++;
1150                         }
1151
1152                         // Send the compression request
1153                         list_add_tail(&msg->list, &next_lte->msg_list);
1154                         shared_queue_put(res_to_compress_queue, msg);
1155                         DEBUG2("Compression request sent");
1156
1157                         if (next_chunk != next_num_chunks)
1158                                 // More chunks to send for this resource
1159                                 continue;
1160
1161                         // Done sending compression requests for a resource!
1162                         // Check the SHA1 message digest.
1163                         DEBUG2("Finalize SHA1 md (next_num_chunks=%zu)", next_num_chunks);
1164                         sha1_final(next_hash, &next_sha_ctx);
1165                         if (!hashes_equal(next_lte->hash, next_hash)) {
1166                                 ERROR("WIM resource has incorrect hash!");
1167                                 if (next_lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
1168                                         ERROR("We were reading it from `%s'; maybe it changed "
1169                                               "while we were reading it.",
1170                                               next_lte->file_on_disk);
1171                                 }
1172                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
1173                                 goto out;
1174                         }
1175
1176                         // Advance to the next resource.
1177                         //
1178                         // If the next resource needs no compression, just write
1179                         // it with this thread (not now though--- we could be in
1180                         // the middle of writing another resource.)  Keep doing
1181                         // this until we either get to the end of the resources
1182                         // list, or we get to a resource that needs compression.
1183
1184                         while (1) {
1185                                 if (next_resource == stream_list) {
1186                                         next_lte = NULL;
1187                                         break;
1188                                 }
1189                         #ifdef WITH_NTFS_3G
1190                                 end_wim_resource_read(next_lte, ni);
1191                                 ni = NULL;
1192                         #else
1193                                 end_wim_resource_read(next_lte);
1194                         #endif
1195
1196                                 next_lte = container_of(next_resource,
1197                                                         struct lookup_table_entry,
1198                                                         staging_list);
1199                                 next_resource = next_resource->next;
1200                                 if ((next_lte->resource_location == RESOURCE_IN_WIM
1201                                     && wimlib_get_compression_type(next_lte->wim) == out_ctype)
1202                                     || wim_resource_size(next_lte) == 0)
1203                                 {
1204                                         list_add_tail(&next_lte->staging_list,
1205                                                       &my_resources);
1206                                 } else {
1207                                         list_add_tail(&next_lte->staging_list,
1208                                                       &outstanding_resources);
1209                                         next_chunk = 0;
1210                                         next_num_chunks = wim_resource_chunks(next_lte);
1211                                         sha1_init(&next_sha_ctx);
1212                                         INIT_LIST_HEAD(&next_lte->msg_list);
1213                                 #ifdef WITH_NTFS_3G
1214                                         ret = prepare_resource_for_read(next_lte, &ni);
1215                                 #else
1216                                         ret = prepare_resource_for_read(next_lte);
1217                                 #endif
1218                                         if (ret != 0)
1219                                                 goto out;
1220                                         DEBUG2("Updated next_lte");
1221                                         break;
1222                                 }
1223                         }
1224                 }
1225
1226                 // If there are no outstanding resources, there are no more
1227                 // resources that need to be written.
1228                 if (list_empty(&outstanding_resources)) {
1229                         DEBUG("No outstanding resources! Done");
1230                         ret = 0;
1231                         goto out;
1232                 }
1233
1234                 // Get the next message from the queue and process it.
1235                 // The message will contain 1 or more data chunks that have been
1236                 // compressed.
1237                 DEBUG2("Waiting for message");
1238                 msg = shared_queue_get(compressed_res_queue);
1239                 msg->complete = true;
1240
1241                 DEBUG2("Received msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1242
1243                 list_for_each_entry(msg, &cur_lte->msg_list, list) {
1244                         DEBUG2("complete=%d", msg->complete);
1245                 }
1246
1247                 // Is this the next chunk in the current resource?  If it's not
1248                 // (i.e., an earlier chunk in a same or different resource
1249                 // hasn't been compressed yet), do nothing, and keep this
1250                 // message around until all earlier chunks are received.
1251                 //
1252                 // Otherwise, write all the chunks we can.
1253                 while (!list_empty(&cur_lte->msg_list)
1254                         && (msg = container_of(cur_lte->msg_list.next,
1255                                                struct message,
1256                                                list))->complete)
1257                 {
1258                         DEBUG2("Complete msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1259                         if (msg->begin_chunk == 0) {
1260                                 DEBUG2("Begin chunk tab");
1261
1262
1263
1264                                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1265                                         show_stream_write_progress(&cur_size,
1266                                                                    &next_size,
1267                                                                    total_size,
1268                                                                    one_percent,
1269                                                                    &cur_percent,
1270                                                                    cur_lte);
1271                                 }
1272
1273                                 // This is the first set of chunks.  Leave space
1274                                 // for the chunk table in the output file.
1275                                 off_t cur_offset = ftello(out_fp);
1276                                 if (cur_offset == -1) {
1277                                         ret = WIMLIB_ERR_WRITE;
1278                                         goto out;
1279                                 }
1280                                 ret = begin_wim_resource_chunk_tab(cur_lte,
1281                                                                    out_fp,
1282                                                                    cur_offset,
1283                                                                    &cur_chunk_tab);
1284                                 if (ret != 0)
1285                                         goto out;
1286                         }
1287
1288                         // Write the compressed chunks from the message.
1289                         ret = write_wim_chunks(msg, out_fp, cur_chunk_tab);
1290                         if (ret != 0)
1291                                 goto out;
1292
1293                         list_del(&msg->list);
1294
1295                         // This message is available to use for different chunks
1296                         // now.
1297                         list_add(&msg->list, &available_msgs);
1298
1299                         // Was this the last chunk of the stream?  If so,
1300                         // finish it.
1301                         if (list_empty(&cur_lte->msg_list) &&
1302                             msg->begin_chunk + msg->num_chunks == cur_chunk_tab->num_chunks)
1303                         {
1304                                 DEBUG2("Finish wim chunk tab");
1305                                 u64 res_csize;
1306                                 ret = finish_wim_resource_chunk_tab(cur_chunk_tab,
1307                                                                     out_fp,
1308                                                                     &res_csize);
1309                                 if (ret != 0)
1310                                         goto out;
1311
1312
1313                                 cur_lte->output_resource_entry.size =
1314                                         res_csize;
1315
1316                                 cur_lte->output_resource_entry.original_size =
1317                                         cur_lte->resource_entry.original_size;
1318
1319                                 cur_lte->output_resource_entry.offset =
1320                                         cur_chunk_tab->file_offset;
1321
1322                                 cur_lte->output_resource_entry.flags =
1323                                         cur_lte->resource_entry.flags |
1324                                                 WIM_RESHDR_FLAG_COMPRESSED;
1325
1326                                 FREE(cur_chunk_tab);
1327                                 cur_chunk_tab = NULL;
1328
1329                                 struct list_head *next = cur_lte->staging_list.next;
1330                                 list_del(&cur_lte->staging_list);
1331
1332                                 if (next == &outstanding_resources) {
1333                                         DEBUG("No more outstanding resources");
1334                                         ret = 0;
1335                                         goto out;
1336                                 } else {
1337                                         cur_lte = container_of(cur_lte->staging_list.next,
1338                                                                struct lookup_table_entry,
1339                                                                staging_list);
1340                                 }
1341
1342                                 // Since we just finished writing a stream,
1343                                 // write any streams that have been added to the
1344                                 // my_resources list for direct writing by the
1345                                 // main thread (e.g. resources that don't need
1346                                 // to be compressed because the desired
1347                                 // compression type is the same as the previous
1348                                 // compression type).
1349                                 struct lookup_table_entry *tmp;
1350                                 list_for_each_entry_safe(lte,
1351                                                          tmp,
1352                                                          &my_resources,
1353                                                          staging_list)
1354                                 {
1355                                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1356                                                 show_stream_write_progress(&cur_size,
1357                                                                            &next_size,
1358                                                                            total_size,
1359                                                                            one_percent,
1360                                                                            &cur_percent,
1361                                                                            lte);
1362                                         }
1363
1364                                         ret = write_wim_resource(lte,
1365                                                                  out_fp,
1366                                                                  out_ctype,
1367                                                                  &lte->output_resource_entry,
1368                                                                  0);
1369                                         list_del(&lte->staging_list);
1370                                         if (ret != 0)
1371                                                 goto out;
1372                                 }
1373                         }
1374                 }
1375         }
1376
1377 out:
1378 #ifdef WITH_NTFS_3G
1379         end_wim_resource_read(cur_lte, ni);
1380 #else
1381         end_wim_resource_read(cur_lte);
1382 #endif
1383         if (ret == 0) {
1384                 list_for_each_entry(lte, &my_resources, staging_list) {
1385                         ret = write_wim_resource(lte, out_fp,
1386                                                  out_ctype,
1387                                                  &lte->output_resource_entry,
1388                                                  0);
1389                         if (ret != 0)
1390                                 break;
1391                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1392                                 show_stream_write_progress(&cur_size,
1393                                                            &next_size,
1394                                                            total_size,
1395                                                            one_percent,
1396                                                            &cur_percent,
1397                                                            lte);
1398                         }
1399                 }
1400                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1401                         finish_stream_write_progress(total_size);
1402         } else {
1403                 size_t num_available_msgs = 0;
1404                 struct list_head *cur;
1405
1406                 list_for_each(cur, &available_msgs) {
1407                         num_available_msgs++;
1408                 }
1409
1410                 while (num_available_msgs < ARRAY_LEN(msgs)) {
1411                         shared_queue_get(compressed_res_queue);
1412                         num_available_msgs++;
1413                 }
1414         }
1415
1416         DEBUG("Freeing messages");
1417
1418         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1419                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1420                         FREE(msgs[i].compressed_chunks[j]);
1421                         FREE(msgs[i].uncompressed_chunks[j]);
1422                 }
1423         }
1424
1425         if (cur_chunk_tab != NULL)
1426                 FREE(cur_chunk_tab);
1427         return ret;
1428 }
1429
1430 static int write_stream_list_parallel(struct list_head *stream_list,
1431                                       FILE *out_fp, int out_ctype,
1432                                       int write_flags, u64 total_size,
1433                                       unsigned num_threads)
1434 {
1435         int ret;
1436         struct shared_queue res_to_compress_queue;
1437         struct shared_queue compressed_res_queue;
1438         pthread_t *compressor_threads = NULL;
1439
1440         if (num_threads == 0) {
1441                 long nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1442                 if (nthreads < 1) {
1443                         WARNING("Could not determine number of processors! Assuming 1");
1444                         goto out_serial;
1445                 } else {
1446                         num_threads = nthreads;
1447                 }
1448         }
1449
1450         wimlib_assert(stream_list->next != stream_list);
1451
1452
1453         static const double MESSAGES_PER_THREAD = 2.0;
1454         size_t queue_size = (size_t)(num_threads * MESSAGES_PER_THREAD);
1455
1456         DEBUG("Initializing shared queues (queue_size=%zu)", queue_size);
1457
1458         ret = shared_queue_init(&res_to_compress_queue, queue_size);
1459         if (ret != 0)
1460                 goto out_serial;
1461
1462         ret = shared_queue_init(&compressed_res_queue, queue_size);
1463         if (ret != 0)
1464                 goto out_destroy_res_to_compress_queue;
1465
1466         struct compressor_thread_params params;
1467         params.res_to_compress_queue = &res_to_compress_queue;
1468         params.compressed_res_queue = &compressed_res_queue;
1469         params.compress = get_compress_func(out_ctype);
1470
1471         compressor_threads = MALLOC(num_threads * sizeof(pthread_t));
1472
1473         for (unsigned i = 0; i < num_threads; i++) {
1474                 DEBUG("pthread_create thread %u", i);
1475                 ret = pthread_create(&compressor_threads[i], NULL,
1476                                      compressor_thread_proc, &params);
1477                 if (ret != 0) {
1478                         ret = -1;
1479                         ERROR_WITH_ERRNO("Failed to create compressor "
1480                                          "thread %u", i);
1481                         num_threads = i;
1482                         goto out_join;
1483                 }
1484         }
1485
1486         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1487                 printf("Writing compressed data using %u threads...\n",
1488                        num_threads);
1489         }
1490
1491         ret = main_writer_thread_proc(stream_list,
1492                                       out_fp,
1493                                       out_ctype,
1494                                       &res_to_compress_queue,
1495                                       &compressed_res_queue,
1496                                       queue_size,
1497                                       write_flags,
1498                                       total_size);
1499
1500 out_join:
1501         for (unsigned i = 0; i < num_threads; i++)
1502                 shared_queue_put(&res_to_compress_queue, NULL);
1503
1504         for (unsigned i = 0; i < num_threads; i++) {
1505                 if (pthread_join(compressor_threads[i], NULL)) {
1506                         WARNING("Failed to join compressor thread %u: %s",
1507                                 i, strerror(errno));
1508                 }
1509         }
1510         FREE(compressor_threads);
1511         shared_queue_destroy(&compressed_res_queue);
1512 out_destroy_res_to_compress_queue:
1513         shared_queue_destroy(&res_to_compress_queue);
1514         if (ret >= 0 && ret != WIMLIB_ERR_NOMEM)
1515                 return ret;
1516 out_serial:
1517         WARNING("Falling back to single-threaded compression");
1518         return write_stream_list_serial(stream_list, out_fp,
1519                                         out_ctype, write_flags, total_size);
1520 }
1521 #endif
1522
1523 static int write_stream_list(struct list_head *stream_list, FILE *out_fp,
1524                              int out_ctype, int write_flags,
1525                              unsigned num_threads)
1526 {
1527         struct lookup_table_entry *lte;
1528         size_t num_streams = 0;
1529         u64 total_size = 0;
1530         bool compression_needed = false;
1531
1532         list_for_each_entry(lte, stream_list, staging_list) {
1533                 num_streams++;
1534                 total_size += wim_resource_size(lte);
1535                 if (!compression_needed
1536                     && out_ctype != WIM_COMPRESSION_TYPE_NONE
1537                     && (lte->resource_location != RESOURCE_IN_WIM
1538                         || wimlib_get_compression_type(lte->wim) != out_ctype)
1539                     && wim_resource_size(lte) != 0)
1540                         compression_needed = true;
1541         }
1542
1543         if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE) {
1544                 printf("Preparing to write %zu streams "
1545                        "(%"PRIu64" total bytes uncompressed)\n",
1546                        num_streams, total_size);
1547                 printf("Using compression type %s\n",
1548                        wimlib_get_compression_type_string(out_ctype));
1549         }
1550
1551 #ifdef ENABLE_MULTITHREADED_COMPRESSION
1552         if (compression_needed && total_size >= 1000000 && num_threads != 1) {
1553                 return write_stream_list_parallel(stream_list, out_fp,
1554                                                   out_ctype, write_flags,
1555                                                   total_size, num_threads);
1556         }
1557         else
1558 #endif
1559         {
1560                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1561                         const char *reason = "";
1562                         if (!compression_needed)
1563                                 reason = " (no compression needed)";
1564                         printf("Writing data using 1 thread%s\n", reason);
1565                 }
1566
1567                 return write_stream_list_serial(stream_list, out_fp,
1568                                                 out_ctype, write_flags,
1569                                                 total_size);
1570         }
1571 }
1572
1573
1574 static int dentry_find_streams_to_write(struct dentry *dentry,
1575                                         void *wim)
1576 {
1577         WIMStruct *w = wim;
1578         struct list_head *stream_list = w->private;
1579         struct lookup_table_entry *lte;
1580         for (unsigned i = 0; i <= dentry->d_inode->num_ads; i++) {
1581                 lte = inode_stream_lte(dentry->d_inode, i, w->lookup_table);
1582                 if (lte && ++lte->out_refcnt == 1)
1583                         list_add(&lte->staging_list, stream_list);
1584         }
1585         return 0;
1586 }
1587
1588 static int find_streams_to_write(WIMStruct *w)
1589 {
1590         return for_dentry_in_tree(wim_root_dentry(w),
1591                                   dentry_find_streams_to_write, w);
1592 }
1593
1594 static int write_wim_streams(WIMStruct *w, int image, int write_flags,
1595                              unsigned num_threads)
1596 {
1597
1598         LIST_HEAD(stream_list);
1599
1600         w->private = &stream_list;
1601         for_image(w, image, find_streams_to_write);
1602         return write_stream_list(&stream_list, w->out_fp,
1603                                  wimlib_get_compression_type(w), write_flags,
1604                                  num_threads);
1605 }
1606
1607 /*
1608  * Write the lookup table, xml data, and integrity table, then overwrite the WIM
1609  * header.
1610  */
1611 int finish_write(WIMStruct *w, int image, int write_flags)
1612 {
1613         off_t lookup_table_offset;
1614         off_t xml_data_offset;
1615         off_t lookup_table_size;
1616         off_t integrity_offset;
1617         off_t xml_data_size;
1618         off_t end_offset;
1619         off_t integrity_size;
1620         int ret;
1621         struct wim_header hdr;
1622         FILE *out = w->out_fp;
1623
1624         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1625                 /* Write the lookup table. */
1626                 lookup_table_offset = ftello(out);
1627                 if (lookup_table_offset == -1)
1628                         return WIMLIB_ERR_WRITE;
1629
1630                 DEBUG("Writing lookup table (offset %"PRIu64")",
1631                       lookup_table_offset);
1632                 ret = write_lookup_table(w->lookup_table, out);
1633                 if (ret != 0)
1634                         return ret;
1635         }
1636
1637         xml_data_offset = ftello(out);
1638         if (xml_data_offset == -1)
1639                 return WIMLIB_ERR_WRITE;
1640
1641         /* @hdr will be the header for the new WIM.  First copy all the data
1642          * from the header in the WIMStruct; then set all the fields that may
1643          * have changed, including the resource entries, boot index, and image
1644          * count.  */
1645         memcpy(&hdr, &w->hdr, sizeof(struct wim_header));
1646         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1647                 lookup_table_size = xml_data_offset - lookup_table_offset;
1648                 hdr.lookup_table_res_entry.offset = lookup_table_offset;
1649                 hdr.lookup_table_res_entry.size = lookup_table_size;
1650         }
1651         hdr.lookup_table_res_entry.original_size = hdr.lookup_table_res_entry.size;
1652         hdr.lookup_table_res_entry.flags = WIM_RESHDR_FLAG_METADATA;
1653
1654         DEBUG("Writing XML data (offset %"PRIu64")", xml_data_offset);
1655         ret = write_xml_data(w->wim_info, image, out,
1656                              (write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE) ?
1657                                 wim_info_get_total_bytes(w->wim_info) : 0);
1658         if (ret != 0)
1659                 return ret;
1660
1661         integrity_offset = ftello(out);
1662         if (integrity_offset == -1)
1663                 return WIMLIB_ERR_WRITE;
1664         xml_data_size = integrity_offset - xml_data_offset;
1665
1666         hdr.xml_res_entry.offset                 = xml_data_offset;
1667         hdr.xml_res_entry.size                   = xml_data_size;
1668         hdr.xml_res_entry.original_size          = xml_data_size;
1669         hdr.xml_res_entry.flags                  = 0;
1670
1671         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
1672                 ret = write_integrity_table(out, WIM_HEADER_DISK_SIZE,
1673                                             xml_data_offset,
1674                                             write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS);
1675                 if (ret != 0)
1676                         return ret;
1677                 end_offset = ftello(out);
1678                 if (end_offset == -1)
1679                         return WIMLIB_ERR_WRITE;
1680                 integrity_size              = end_offset - integrity_offset;
1681                 hdr.integrity.offset        = integrity_offset;
1682                 hdr.integrity.size          = integrity_size;
1683                 hdr.integrity.original_size = integrity_size;
1684         } else {
1685                 hdr.integrity.offset        = 0;
1686                 hdr.integrity.size          = 0;
1687                 hdr.integrity.original_size = 0;
1688         }
1689         hdr.integrity.flags = 0;
1690
1691         DEBUG("Updating WIM header.");
1692
1693         /*
1694          * In the WIM header, there is room for the resource entry for a
1695          * metadata resource labeled as the "boot metadata".  This entry should
1696          * be zeroed out if there is no bootable image (boot_idx 0).  Otherwise,
1697          * it should be a copy of the resource entry for the image that is
1698          * marked as bootable.  This is not well documented...
1699          */
1700         if (hdr.boot_idx == 0 || !w->image_metadata
1701                         || (image != WIM_ALL_IMAGES && image != hdr.boot_idx)) {
1702                 memset(&hdr.boot_metadata_res_entry, 0,
1703                        sizeof(struct resource_entry));
1704         } else {
1705                 memcpy(&hdr.boot_metadata_res_entry,
1706                        &w->image_metadata[
1707                           hdr.boot_idx - 1].metadata_lte->output_resource_entry,
1708                        sizeof(struct resource_entry));
1709         }
1710
1711         /* Set image count and boot index correctly for single image writes */
1712         if (image != WIM_ALL_IMAGES) {
1713                 hdr.image_count = 1;
1714                 if (hdr.boot_idx == image)
1715                         hdr.boot_idx = 1;
1716                 else
1717                         hdr.boot_idx = 0;
1718         }
1719
1720
1721         if (fseeko(out, 0, SEEK_SET) != 0)
1722                 return WIMLIB_ERR_WRITE;
1723
1724         ret = write_header(&hdr, out);
1725         if (ret != 0)
1726                 return ret;
1727
1728         DEBUG("Closing output file.");
1729         wimlib_assert(w->out_fp != NULL);
1730         if (fclose(w->out_fp) != 0) {
1731                 ERROR_WITH_ERRNO("Failed to close the WIM file");
1732                 ret = WIMLIB_ERR_WRITE;
1733         }
1734         w->out_fp = NULL;
1735         return ret;
1736 }
1737
1738 /* Open file stream and write dummy header for WIM. */
1739 int begin_write(WIMStruct *w, const char *path, int write_flags)
1740 {
1741         const char *mode;
1742         DEBUG("Opening `%s' for new WIM", path);
1743
1744         /* checking the integrity requires going back over the file to read it.
1745          * XXX
1746          * (It also would be possible to keep a running sha1sum as the file is
1747          * written-- this would be faster, but a bit more complicated) */
1748         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
1749                 mode = "w+b";
1750         else
1751                 mode = "wb";
1752
1753         if (w->out_fp)
1754                 fclose(w->out_fp);
1755
1756         w->out_fp = fopen(path, mode);
1757         if (!w->out_fp) {
1758                 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
1759                                  path);
1760                 return WIMLIB_ERR_OPEN;
1761         }
1762
1763         /* Write dummy header. It will be overwritten later. */
1764         return write_header(&w->hdr, w->out_fp);
1765 }
1766
1767 /* Writes a stand-alone WIM to a file.  */
1768 WIMLIBAPI int wimlib_write(WIMStruct *w, const char *path,
1769                            int image, int write_flags, unsigned num_threads)
1770 {
1771         int ret;
1772
1773         if (!w || !path)
1774                 return WIMLIB_ERR_INVALID_PARAM;
1775
1776         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
1777
1778         if (image != WIM_ALL_IMAGES &&
1779              (image < 1 || image > w->hdr.image_count))
1780                 return WIMLIB_ERR_INVALID_IMAGE;
1781
1782
1783         if (w->hdr.total_parts != 1) {
1784                 ERROR("Cannot call wimlib_write() on part of a split WIM");
1785                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1786         }
1787
1788         if (image == WIM_ALL_IMAGES)
1789                 DEBUG("Writing all images to `%s'.", path);
1790         else
1791                 DEBUG("Writing image %d to `%s'.", image, path);
1792
1793         ret = begin_write(w, path, write_flags);
1794         if (ret != 0)
1795                 return ret;
1796
1797         for_lookup_table_entry(w->lookup_table, lte_zero_out_refcnt, NULL);
1798
1799         ret = write_wim_streams(w, image, write_flags, num_threads);
1800
1801         if (ret != 0) {
1802                 /*ERROR("Failed to write WIM file resources to `%s'", path);*/
1803                 return ret;
1804         }
1805
1806         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1807                 printf("Writing image metadata...\n");
1808
1809         ret = for_image(w, image, write_metadata_resource);
1810
1811         if (ret != 0) {
1812                 /*ERROR("Failed to write WIM image metadata to `%s'", path);*/
1813                 return ret;
1814         }
1815
1816         ret = finish_write(w, image, write_flags);
1817         if (ret != 0)
1818                 return ret;
1819
1820         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1821                 printf("Successfully wrote `%s'\n", path);
1822         return 0;
1823 }