]> wimlib.net Git - wimlib/blob - src/write.c
--threads option and stream writing progress
[wimlib] / src / write.c
1 /*
2  * write.c
3  *
4  * Support for writing WIM files; write a WIM file, overwrite a WIM file, write
5  * compressed file resources, etc.
6  */
7
8 /*
9  * Copyright (C) 2010 Carl Thijssen
10  * Copyright (C) 2012 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "wimlib_internal.h"
29 #include "io.h"
30 #include "dentry.h"
31 #include "lookup_table.h"
32 #include "xml.h"
33 #include "lzx.h"
34 #include "xpress.h"
35 #include <unistd.h>
36 #include <semaphore.h>
37 #include <pthread.h>
38 #include <errno.h>
39
40 #ifdef WITH_NTFS_3G
41 #include <time.h>
42 #include <ntfs-3g/attrib.h>
43 #include <ntfs-3g/inode.h>
44 #include <ntfs-3g/dir.h>
45 #endif
46
47
48 #ifdef HAVE_ALLOCA_H
49 #include <alloca.h>
50 #endif
51
52
53 /* Reopens the FILE* for a WIM read-write. */
54 static int reopen_rw(WIMStruct *w)
55 {
56         FILE *fp;
57
58         if (fclose(w->fp) != 0)
59                 ERROR_WITH_ERRNO("Failed to close the file `%s'", w->filename);
60         w->fp = NULL;
61         fp = fopen(w->filename, "r+b");
62         if (!fp) {
63                 ERROR_WITH_ERRNO("Failed to open `%s' for reading and writing",
64                                  w->filename);
65                 return WIMLIB_ERR_OPEN;
66         }
67         w->fp = fp;
68         return 0;
69 }
70
71
72
73 /*
74  * Writes a WIM file to the original file that it was read from, overwriting it.
75  */
76 WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
77                                unsigned num_threads)
78 {
79         const char *wimfile_name;
80         size_t wim_name_len;
81         int ret;
82
83         if (!w)
84                 return WIMLIB_ERR_INVALID_PARAM;
85
86         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
87
88         wimfile_name = w->filename;
89
90         DEBUG("Replacing WIM file `%s'.", wimfile_name);
91
92         if (!wimfile_name)
93                 return WIMLIB_ERR_NO_FILENAME;
94
95         /* Write the WIM to a temporary file. */
96         /* XXX should the temporary file be somewhere else? */
97         wim_name_len = strlen(wimfile_name);
98         char tmpfile[wim_name_len + 10];
99         memcpy(tmpfile, wimfile_name, wim_name_len);
100         randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
101         tmpfile[wim_name_len + 9] = '\0';
102
103         ret = wimlib_write(w, tmpfile, WIM_ALL_IMAGES, write_flags,
104                            num_threads);
105         if (ret != 0) {
106                 ERROR("Failed to write the WIM file `%s'", tmpfile);
107                 if (unlink(tmpfile) != 0)
108                         WARNING("Failed to remove `%s'", tmpfile);
109                 return ret;
110         }
111
112         DEBUG("Closing original WIM file.");
113         /* Close the original WIM file that was opened for reading. */
114         if (w->fp) {
115                 if (fclose(w->fp) != 0) {
116                         WARNING("Failed to close the file `%s'", wimfile_name);
117                 }
118                 w->fp = NULL;
119         }
120
121         DEBUG("Renaming `%s' to `%s'", tmpfile, wimfile_name);
122
123         /* Rename the new file to the old file .*/
124         if (rename(tmpfile, wimfile_name) != 0) {
125                 ERROR_WITH_ERRNO("Failed to rename `%s' to `%s'",
126                                  tmpfile, wimfile_name);
127                 /* Remove temporary file. */
128                 if (unlink(tmpfile) != 0)
129                         ERROR_WITH_ERRNO("Failed to remove `%s'", tmpfile);
130                 return WIMLIB_ERR_RENAME;
131         }
132
133         if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE)
134                 printf("Successfully renamed `%s' to `%s'\n", tmpfile, wimfile_name);
135
136         return 0;
137 }
138
139 static int check_resource_offset(struct lookup_table_entry *lte, void *arg)
140 {
141         u64 xml_data_offset = *(u64*)arg;
142         if (lte->resource_entry.offset > xml_data_offset) {
143                 ERROR("The following resource is *after* the XML data:");
144                 print_lookup_table_entry(lte);
145                 return WIMLIB_ERR_RESOURCE_ORDER;
146         }
147         return 0;
148 }
149
150 WIMLIBAPI int wimlib_overwrite_xml_and_header(WIMStruct *w, int write_flags)
151 {
152         int ret;
153         FILE *fp;
154         u8 *integrity_table = NULL;
155         off_t xml_end;
156         off_t xml_size;
157         size_t bytes_written;
158
159         DEBUG("Overwriting XML and header of `%s', write_flags = %#x",
160               w->filename, write_flags);
161
162         if (!w->filename)
163                 return WIMLIB_ERR_NO_FILENAME;
164
165         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
166
167         /* Make sure that the integrity table (if present) is after the XML
168          * data, and that there are no stream resources, metadata resources, or
169          * lookup tables after the XML data.  Otherwise, these data would be
170          * destroyed by this function. */
171         if (w->hdr.integrity.offset != 0 &&
172             w->hdr.integrity.offset < w->hdr.xml_res_entry.offset) {
173                 ERROR("Didn't expect the integrity table to be before the XML data");
174                 return WIMLIB_ERR_RESOURCE_ORDER;
175         }
176
177         if (w->hdr.lookup_table_res_entry.offset >
178             w->hdr.xml_res_entry.offset) {
179                 ERROR("Didn't expect the lookup table to be after the XML data");
180                 return WIMLIB_ERR_RESOURCE_ORDER;
181         }
182
183         ret = for_lookup_table_entry(w->lookup_table, check_resource_offset,
184                                      &w->hdr.xml_res_entry.offset);
185         if (ret != 0)
186                 return ret;
187
188         ret = reopen_rw(w);
189         if (ret != 0)
190                 return ret;
191
192         fp = w->fp;
193
194         /* The old integrity table is still OK, as the SHA1 message digests in
195          * the integrity table include neither the header nor the XML data.
196          * Save it for later if it exists and an integrity table was required.
197          * */
198         if ((write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
199              && w->hdr.integrity.offset != 0)
200         {
201                 DEBUG("Reading existing integrity table.");
202                 integrity_table = MALLOC(w->hdr.integrity.size);
203                 if (!integrity_table)
204                         return WIMLIB_ERR_NOMEM;
205
206                 ret = read_uncompressed_resource(fp, w->hdr.integrity.offset,
207                                                  w->hdr.integrity.original_size,
208                                                  integrity_table);
209                 if (ret != 0)
210                         goto err;
211                 DEBUG("Done reading existing integrity table.");
212         }
213
214         DEBUG("Overwriting XML data.");
215         /* Overwrite the XML data. */
216         if (fseeko(fp, w->hdr.xml_res_entry.offset, SEEK_SET) != 0) {
217                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
218                                  "for XML data", w->hdr.xml_res_entry.offset);
219                 ret = WIMLIB_ERR_WRITE;
220                 goto err;
221         }
222         ret = write_xml_data(w->wim_info, WIM_ALL_IMAGES, fp, 0);
223         if (ret != 0)
224                 goto err;
225
226         DEBUG("Updating XML resource entry.");
227         /* Update the XML resource entry in the WIM header. */
228         xml_end = ftello(fp);
229         if (xml_end == -1) {
230                 ret = WIMLIB_ERR_WRITE;
231                 goto err;
232         }
233         xml_size = xml_end - w->hdr.xml_res_entry.offset;
234         w->hdr.xml_res_entry.size = xml_size;
235         w->hdr.xml_res_entry.original_size = xml_size;
236         /* XML data offset is unchanged. */
237
238         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
239                 DEBUG("Writing integrity table.");
240                 w->hdr.integrity.offset = xml_end;
241                 if (integrity_table) {
242                         /* The existing integrity table was saved. */
243                         bytes_written = fwrite(integrity_table, 1,
244                                                w->hdr.integrity.size, fp);
245                         if (bytes_written != w->hdr.integrity.size) {
246                                 ERROR_WITH_ERRNO("Failed to write integrity "
247                                                  "table");
248                                 ret = WIMLIB_ERR_WRITE;
249                                 goto err;
250                         }
251                         FREE(integrity_table);
252                 } else {
253                         /* There was no existing integrity table, so a new one
254                          * must be calculated. */
255                         ret = write_integrity_table(fp, WIM_HEADER_DISK_SIZE,
256                                         w->hdr.lookup_table_res_entry.offset +
257                                         w->hdr.lookup_table_res_entry.size,
258                                         write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS);
259                         if (ret != 0)
260                                 return ret;
261
262                         off_t end_integrity = ftello(fp);
263                         if (end_integrity == -1)
264                                 return WIMLIB_ERR_WRITE;
265
266                         off_t integrity_size           = end_integrity - xml_end;
267                         w->hdr.integrity.size          = integrity_size;
268                         w->hdr.integrity.original_size = integrity_size;
269                         w->hdr.integrity.flags         = 0;
270                 }
271         } else {
272                 DEBUG("Truncating file to end of XML data.");
273                 /* No integrity table to write.  The file should be truncated
274                  * because it's possible that the old file was longer (due to it
275                  * including an integrity table, or due to its XML data being
276                  * longer) */
277                 if (fflush(fp) != 0) {
278                         ERROR_WITH_ERRNO("Failed to flush stream for file `%s'",
279                                          w->filename);
280                         return WIMLIB_ERR_WRITE;
281                 }
282                 if (ftruncate(fileno(fp), xml_end) != 0) {
283                         ERROR_WITH_ERRNO("Failed to truncate `%s' to %"PRIu64" "
284                                          "bytes", w->filename, xml_end);
285                         return WIMLIB_ERR_WRITE;
286                 }
287                 memset(&w->hdr.integrity, 0, sizeof(struct resource_entry));
288         }
289
290         DEBUG("Overwriting header.");
291         /* Overwrite the header. */
292         if (fseeko(fp, 0, SEEK_SET) != 0) {
293                 ERROR_WITH_ERRNO("Failed to seek to beginning of `%s'",
294                                  w->filename);
295                 return WIMLIB_ERR_WRITE;
296         }
297
298         ret = write_header(&w->hdr, fp);
299         if (ret != 0)
300                 return ret;
301
302         DEBUG("Closing `%s'.", w->filename);
303         if (fclose(fp) != 0) {
304                 ERROR_WITH_ERRNO("Failed to close `%s'", w->filename);
305                 return WIMLIB_ERR_WRITE;
306         }
307         w->fp = NULL;
308         DEBUG("Done.");
309         return 0;
310 err:
311         FREE(integrity_table);
312         return ret;
313 }
314
315
316 /* Chunk table that's located at the beginning of each compressed resource in
317  * the WIM.  (This is not the on-disk format; the on-disk format just has an
318  * array of offsets.) */
319 struct chunk_table {
320         off_t file_offset;
321         u64 num_chunks;
322         u64 original_resource_size;
323         u64 bytes_per_chunk_entry;
324         u64 table_disk_size;
325         u64 cur_offset;
326         u64 *cur_offset_p;
327         u64 offsets[0];
328 };
329
330 /*
331  * Allocates and initializes a chunk table, and reserves space for it in the
332  * output file.
333  */
334 static int
335 begin_wim_resource_chunk_tab(const struct lookup_table_entry *lte,
336                              FILE *out_fp,
337                              off_t file_offset,
338                              struct chunk_table **chunk_tab_ret)
339 {
340         u64 size = wim_resource_size(lte);
341         u64 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
342         size_t alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
343         struct chunk_table *chunk_tab = CALLOC(1, alloc_size);
344         int ret;
345
346         if (!chunk_tab) {
347                 ERROR("Failed to allocate chunk table for %"PRIu64" byte "
348                       "resource", size);
349                 ret = WIMLIB_ERR_NOMEM;
350                 goto out;
351         }
352         chunk_tab->file_offset = file_offset;
353         chunk_tab->num_chunks = num_chunks;
354         chunk_tab->original_resource_size = size;
355         chunk_tab->bytes_per_chunk_entry = (size >= (1ULL << 32)) ? 8 : 4;
356         chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
357                                      (num_chunks - 1);
358         chunk_tab->cur_offset = 0;
359         chunk_tab->cur_offset_p = chunk_tab->offsets;
360
361         if (fwrite(chunk_tab, 1, chunk_tab->table_disk_size, out_fp) !=
362                    chunk_tab->table_disk_size) {
363                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
364                                  "file resource");
365                 ret = WIMLIB_ERR_WRITE;
366                 goto out;
367         }
368
369         ret = 0;
370 out:
371         *chunk_tab_ret = chunk_tab;
372         return ret;
373 }
374
375 typedef int (*compress_func_t)(const void *, unsigned, void *, unsigned *);
376
377 compress_func_t get_compress_func(int out_ctype)
378 {
379         if (out_ctype == WIM_COMPRESSION_TYPE_LZX)
380                 return lzx_compress;
381         else
382                 return xpress_compress;
383 }
384
385
386 /*
387  * Compresses a chunk of a WIM resource.
388  *
389  * @chunk:              Uncompressed data of the chunk.
390  * @chunk_size:         Size of the uncompressed chunk in bytes.
391  * @compressed_chunk:   Pointer to output buffer of size at least
392  *                              (@chunk_size - 1) bytes.
393  * @compressed_chunk_len_ret:   Pointer to an unsigned int into which the size
394  *                                      of the compressed chunk will be
395  *                                      returned.
396  * @ctype:      Type of compression to use.  Must be WIM_COMPRESSION_TYPE_LZX
397  *              or WIM_COMPRESSION_TYPE_XPRESS.
398  *
399  * Returns zero if compressed succeeded, and nonzero if the chunk could not be
400  * compressed to any smaller than @chunk_size.  This function cannot fail for
401  * any other reasons.
402  */
403 static int compress_chunk(const u8 chunk[], unsigned chunk_size,
404                           u8 compressed_chunk[],
405                           unsigned *compressed_chunk_len_ret,
406                           int ctype)
407 {
408         compress_func_t compress = get_compress_func(ctype);
409         return (*compress)(chunk, chunk_size, compressed_chunk,
410                            compressed_chunk_len_ret);
411 }
412
413 /*
414  * Writes a chunk of a WIM resource to an output file.
415  *
416  * @chunk:        Uncompressed data of the chunk.
417  * @chunk_size:   Size of the chunk (<= WIM_CHUNK_SIZE)
418  * @out_fp:       FILE * to write tho chunk to.
419  * @out_ctype:    Compression type to use when writing the chunk (ignored if no
420  *                      chunk table provided)
421  * @chunk_tab:    Pointer to chunk table being created.  It is updated with the
422  *                      offset of the chunk we write.
423  *
424  * Returns 0 on success; nonzero on failure.
425  */
426 static int write_wim_resource_chunk(const u8 chunk[], unsigned chunk_size,
427                                     FILE *out_fp, int out_ctype,
428                                     struct chunk_table *chunk_tab)
429 {
430         const u8 *out_chunk;
431         unsigned out_chunk_size;
432
433         wimlib_assert(chunk_size <= WIM_CHUNK_SIZE);
434
435         if (!chunk_tab) {
436                 out_chunk = chunk;
437                 out_chunk_size = chunk_size;
438         } else {
439                 u8 *compressed_chunk = alloca(chunk_size);
440                 int ret;
441
442                 ret = compress_chunk(chunk, chunk_size, compressed_chunk,
443                                      &out_chunk_size, out_ctype);
444                 if (ret == 0) {
445                         out_chunk = compressed_chunk;
446                 } else {
447                         out_chunk = chunk;
448                         out_chunk_size = chunk_size;
449                 }
450                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
451                 chunk_tab->cur_offset += out_chunk_size;
452         }
453
454         if (fwrite(out_chunk, 1, out_chunk_size, out_fp) != out_chunk_size) {
455                 ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
456                 return WIMLIB_ERR_WRITE;
457         }
458         return 0;
459 }
460
461 /*
462  * Finishes a WIM chunk tale and writes it to the output file at the correct
463  * offset.
464  *
465  * The final size of the full compressed resource is returned in the
466  * @compressed_size_p.
467  */
468 static int
469 finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
470                               FILE *out_fp, u64 *compressed_size_p)
471 {
472         size_t bytes_written;
473         if (fseeko(out_fp, chunk_tab->file_offset, SEEK_SET) != 0) {
474                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of output "
475                                  "WIM file", chunk_tab->file_offset);
476                 return WIMLIB_ERR_WRITE;
477         }
478
479         if (chunk_tab->bytes_per_chunk_entry == 8) {
480                 array_cpu_to_le64(chunk_tab->offsets, chunk_tab->num_chunks);
481         } else {
482                 for (u64 i = 0; i < chunk_tab->num_chunks; i++)
483                         ((u32*)chunk_tab->offsets)[i] =
484                                 cpu_to_le32(chunk_tab->offsets[i]);
485         }
486         bytes_written = fwrite((u8*)chunk_tab->offsets +
487                                         chunk_tab->bytes_per_chunk_entry,
488                                1, chunk_tab->table_disk_size, out_fp);
489         if (bytes_written != chunk_tab->table_disk_size) {
490                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
491                                  "file resource");
492                 return WIMLIB_ERR_WRITE;
493         }
494         if (fseeko(out_fp, 0, SEEK_END) != 0) {
495                 ERROR_WITH_ERRNO("Failed to seek to end of output WIM file");
496                 return WIMLIB_ERR_WRITE;
497         }
498         *compressed_size_p = chunk_tab->cur_offset + chunk_tab->table_disk_size;
499         return 0;
500 }
501
502 static int prepare_resource_for_read(struct lookup_table_entry *lte
503
504                                         #ifdef WITH_NTFS_3G
505                                         , ntfs_inode **ni_ret
506                                         #endif
507                 )
508 {
509         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
510              && !lte->file_on_disk_fp)
511         {
512                 wimlib_assert(lte->file_on_disk);
513                 lte->file_on_disk_fp = fopen(lte->file_on_disk, "rb");
514                 if (!lte->file_on_disk_fp) {
515                         ERROR_WITH_ERRNO("Failed to open the file `%s' for "
516                                          "reading", lte->file_on_disk);
517                         return WIMLIB_ERR_OPEN;
518                 }
519         }
520 #ifdef WITH_NTFS_3G
521         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME
522                   && !lte->attr)
523         {
524                 struct ntfs_location *loc = lte->ntfs_loc;
525                 ntfs_inode *ni;
526                 wimlib_assert(loc);
527                 ni = ntfs_pathname_to_inode(*loc->ntfs_vol_p, NULL, loc->path_utf8);
528                 if (!ni) {
529                         ERROR_WITH_ERRNO("Failed to open inode `%s' in NTFS "
530                                          "volume", loc->path_utf8);
531                         return WIMLIB_ERR_NTFS_3G;
532                 }
533                 lte->attr = ntfs_attr_open(ni,
534                                            loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
535                                            (ntfschar*)loc->stream_name_utf16,
536                                            loc->stream_name_utf16_num_chars);
537                 if (!lte->attr) {
538                         ERROR_WITH_ERRNO("Failed to open attribute of `%s' in "
539                                          "NTFS volume", loc->path_utf8);
540                         ntfs_inode_close(ni);
541                         return WIMLIB_ERR_NTFS_3G;
542                 }
543                 *ni_ret = ni;
544         }
545 #endif
546         return 0;
547 }
548
549 static void end_wim_resource_read(struct lookup_table_entry *lte
550                                 #ifdef WITH_NTFS_3G
551                                         , ntfs_inode *ni
552                                 #endif
553                                         )
554 {
555         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
556             && lte->file_on_disk_fp) {
557                 fclose(lte->file_on_disk_fp);
558                 lte->file_on_disk_fp = NULL;
559         }
560 #ifdef WITH_NTFS_3G
561         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME) {
562                 if (lte->attr) {
563                         ntfs_attr_close(lte->attr);
564                         lte->attr = NULL;
565                 }
566                 if (ni)
567                         ntfs_inode_close(ni);
568         }
569 #endif
570 }
571
572 /*
573  * Writes a WIM resource to a FILE * opened for writing.  The resource may be
574  * written uncompressed or compressed depending on the @out_ctype parameter.
575  *
576  * If by chance the resource compresses to more than the original size (this may
577  * happen with random data or files than are pre-compressed), the resource is
578  * instead written uncompressed (and this is reflected in the @out_res_entry by
579  * removing the WIM_RESHDR_FLAG_COMPRESSED flag).
580  *
581  * @lte:        The lookup table entry for the WIM resource.
582  * @out_fp:     The FILE * to write the resource to.
583  * @out_ctype:  The compression type of the resource to write.  Note: if this is
584  *                      the same as the compression type of the WIM resource we
585  *                      need to read, we simply copy the data (i.e. we do not
586  *                      uncompress it, then compress it again).
587  * @out_res_entry:  If non-NULL, a resource entry that is filled in with the
588  *                  offset, original size, compressed size, and compression flag
589  *                  of the output resource.
590  *
591  * Returns 0 on success; nonzero on failure.
592  */
593 int write_wim_resource(struct lookup_table_entry *lte,
594                        FILE *out_fp, int out_ctype,
595                        struct resource_entry *out_res_entry,
596                        int flags)
597 {
598         u64 bytes_remaining;
599         u64 original_size;
600         u64 old_compressed_size;
601         u64 new_compressed_size;
602         u64 offset;
603         int ret;
604         struct chunk_table *chunk_tab = NULL;
605         bool raw;
606         off_t file_offset;
607 #ifdef WITH_NTFS_3G
608         ntfs_inode *ni = NULL;
609 #endif
610
611         wimlib_assert(lte);
612
613         /* Original size of the resource */
614         original_size = wim_resource_size(lte);
615
616         /* Compressed size of the resource (as it exists now) */
617         old_compressed_size = wim_resource_compressed_size(lte);
618
619         /* Current offset in output file */
620         file_offset = ftello(out_fp);
621         if (file_offset == -1) {
622                 ERROR_WITH_ERRNO("Failed to get offset in output "
623                                  "stream");
624                 return WIMLIB_ERR_WRITE;
625         }
626
627         /* Are the compression types the same?  If so, do a raw copy (copy
628          * without decompressing and recompressing the data). */
629         raw = (wim_resource_compression_type(lte) == out_ctype
630                && out_ctype != WIM_COMPRESSION_TYPE_NONE);
631
632         if (raw) {
633                 flags |= WIMLIB_RESOURCE_FLAG_RAW;
634                 bytes_remaining = old_compressed_size;
635         } else {
636                 flags &= ~WIMLIB_RESOURCE_FLAG_RAW;
637                 bytes_remaining = original_size;
638         }
639
640         /* Empty resource; nothing needs to be done, so just return success. */
641         if (bytes_remaining == 0)
642                 return 0;
643
644         /* Buffer for reading chunks for the resource */
645         u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
646
647         /* If we are writing a compressed resource and not doing a raw copy, we
648          * need to initialize the chunk table */
649         if (out_ctype != WIM_COMPRESSION_TYPE_NONE && !raw) {
650                 ret = begin_wim_resource_chunk_tab(lte, out_fp, file_offset,
651                                                    &chunk_tab);
652                 if (ret != 0)
653                         goto out;
654         }
655
656         /* If the WIM resource is in an external file, open a FILE * to it so we
657          * don't have to open a temporary one in read_wim_resource() for each
658          * chunk. */
659 #ifdef WITH_NTFS_3G
660         ret = prepare_resource_for_read(lte, &ni);
661 #else
662         ret = prepare_resource_for_read(lte);
663 #endif
664         if (ret != 0)
665                 goto out;
666
667         /* If we aren't doing a raw copy, we will compute the SHA1 message
668          * digest of the resource as we read it, and verify it's the same as the
669          * hash given in the lookup table entry once we've finished reading the
670          * resource. */
671         SHA_CTX ctx;
672         if (!raw)
673                 sha1_init(&ctx);
674
675         /* While there are still bytes remaining in the WIM resource, read a
676          * chunk of the resource, update SHA1, then write that chunk using the
677          * desired compression type. */
678         offset = 0;
679         do {
680                 u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
681                 ret = read_wim_resource(lte, buf, to_read, offset, flags);
682                 if (ret != 0)
683                         goto out_fclose;
684                 if (!raw)
685                         sha1_update(&ctx, buf, to_read);
686                 ret = write_wim_resource_chunk(buf, to_read, out_fp,
687                                                out_ctype, chunk_tab);
688                 if (ret != 0)
689                         goto out_fclose;
690                 bytes_remaining -= to_read;
691                 offset += to_read;
692         } while (bytes_remaining);
693
694         /* Raw copy:  The new compressed size is the same as the old compressed
695          * size
696          *
697          * Using WIM_COMPRESSION_TYPE_NONE:  The new compressed size is the
698          * original size
699          *
700          * Using a different compression type:  Call
701          * finish_wim_resource_chunk_tab() and it will provide the new
702          * compressed size.
703          */
704         if (raw) {
705                 new_compressed_size = old_compressed_size;
706         } else {
707                 if (out_ctype == WIM_COMPRESSION_TYPE_NONE)
708                         new_compressed_size = original_size;
709                 else {
710                         ret = finish_wim_resource_chunk_tab(chunk_tab, out_fp,
711                                                             &new_compressed_size);
712                         if (ret != 0)
713                                 goto out_fclose;
714                 }
715         }
716
717         /* Verify SHA1 message digest of the resource, unless we are doing a raw
718          * write (in which case we never even saw the uncompressed data).  Or,
719          * if the hash we had before is all 0's, just re-set it to be the new
720          * hash. */
721         if (!raw) {
722                 u8 md[SHA1_HASH_SIZE];
723                 sha1_final(md, &ctx);
724                 if (is_zero_hash(lte->hash)) {
725                         copy_hash(lte->hash, md);
726                 } else if (!hashes_equal(md, lte->hash)) {
727                         ERROR("WIM resource has incorrect hash!");
728                         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
729                                 ERROR("We were reading it from `%s'; maybe it changed "
730                                       "while we were reading it.",
731                                       lte->file_on_disk);
732                         }
733                         ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
734                         goto out_fclose;
735                 }
736         }
737
738         if (!raw && new_compressed_size >= original_size &&
739             out_ctype != WIM_COMPRESSION_TYPE_NONE)
740         {
741                 /* Oops!  We compressed the resource to larger than the original
742                  * size.  Write the resource uncompressed instead. */
743                 if (fseeko(out_fp, file_offset, SEEK_SET) != 0) {
744                         ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
745                                          "of output WIM file", file_offset);
746                         ret = WIMLIB_ERR_WRITE;
747                         goto out_fclose;
748                 }
749                 ret = write_wim_resource(lte, out_fp, WIM_COMPRESSION_TYPE_NONE,
750                                          out_res_entry, flags);
751                 if (ret != 0)
752                         goto out_fclose;
753                 if (fflush(out_fp) != 0) {
754                         ERROR_WITH_ERRNO("Failed to flush output WIM file");
755                         ret = WIMLIB_ERR_WRITE;
756                         goto out_fclose;
757                 }
758                 if (ftruncate(fileno(out_fp), file_offset + out_res_entry->size) != 0) {
759                         ERROR_WITH_ERRNO("Failed to truncate output WIM file");
760                         ret = WIMLIB_ERR_WRITE;
761                         goto out_fclose;
762                 }
763         } else {
764                 if (out_res_entry) {
765                         out_res_entry->size          = new_compressed_size;
766                         out_res_entry->original_size = original_size;
767                         out_res_entry->offset        = file_offset;
768                         out_res_entry->flags         = lte->resource_entry.flags
769                                                         & ~WIM_RESHDR_FLAG_COMPRESSED;
770                         if (out_ctype != WIM_COMPRESSION_TYPE_NONE)
771                                 out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
772                 }
773         }
774         ret = 0;
775 out_fclose:
776 #ifdef WITH_NTFS_3G
777         end_wim_resource_read(lte, ni);
778 #else
779         end_wim_resource_read(lte);
780 #endif
781 out:
782         FREE(chunk_tab);
783         return ret;
784 }
785
786
787 struct shared_queue {
788         sem_t filled_slots;
789         sem_t empty_slots;
790         pthread_mutex_t lock;
791         unsigned front;
792         unsigned back;
793         void **array;
794         unsigned size;
795 };
796
797 static int shared_queue_init(struct shared_queue *q, unsigned size)
798 {
799         q->array = CALLOC(sizeof(q->array[0]), size);
800         if (!q->array)
801                 return WIMLIB_ERR_NOMEM;
802
803         sem_init(&q->filled_slots, 0, 0);
804         sem_init(&q->empty_slots, 0, size);
805         pthread_mutex_init(&q->lock, NULL);
806         q->front = 0;
807         q->back = size - 1;
808         q->size = size;
809         return 0;
810 }
811
812 static void shared_queue_destroy(struct shared_queue *q)
813 {
814         sem_destroy(&q->filled_slots);
815         sem_destroy(&q->empty_slots);
816         pthread_mutex_destroy(&q->lock);
817         FREE(q->array);
818 }
819
820 static void shared_queue_put(struct shared_queue *q, void *obj)
821 {
822         sem_wait(&q->empty_slots);
823         pthread_mutex_lock(&q->lock);
824
825         q->back = (q->back + 1) % q->size;
826         q->array[q->back] = obj;
827
828         sem_post(&q->filled_slots);
829         pthread_mutex_unlock(&q->lock);
830 }
831
832 static void *shared_queue_get(struct shared_queue *q)
833 {
834         sem_wait(&q->filled_slots);
835         pthread_mutex_lock(&q->lock);
836
837         void *obj = q->array[q->front];
838         q->array[q->front] = NULL;
839         q->front = (q->front + 1) % q->size;
840
841         sem_post(&q->empty_slots);
842         pthread_mutex_unlock(&q->lock);
843         return obj;
844 }
845
846 static inline int shared_queue_get_filled(struct shared_queue *q)
847 {
848         int sval;
849         sem_getvalue(&q->filled_slots, &sval);
850         return sval;
851 }
852
853 struct compressor_thread_params {
854         struct shared_queue *res_to_compress_queue;
855         struct shared_queue *compressed_res_queue;
856         compress_func_t compress;
857 };
858
859 #define MAX_CHUNKS_PER_MSG 2
860
861 struct message {
862         struct lookup_table_entry *lte;
863         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
864         u8 *out_compressed_chunks[MAX_CHUNKS_PER_MSG];
865         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
866         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
867         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
868         unsigned num_chunks;
869         struct list_head list;
870         bool complete;
871         u64 begin_chunk;
872 };
873
874 static void compress_chunks(struct message *msg, compress_func_t compress)
875 {
876         for (unsigned i = 0; i < msg->num_chunks; i++) {
877                 DEBUG2("compress chunk %u of %u", i, msg->num_chunks);
878                 int ret = compress(msg->uncompressed_chunks[i],
879                                    msg->uncompressed_chunk_sizes[i],
880                                    msg->compressed_chunks[i],
881                                    &msg->compressed_chunk_sizes[i]);
882                 if (ret == 0) {
883                         msg->out_compressed_chunks[i] = msg->compressed_chunks[i];
884                 } else {
885                         msg->out_compressed_chunks[i] = msg->uncompressed_chunks[i];
886                         msg->compressed_chunk_sizes[i] = msg->uncompressed_chunk_sizes[i];
887                 }
888         }
889 }
890
891 static void *compressor_thread_proc(void *arg)
892 {
893         struct compressor_thread_params *params = arg;
894         struct shared_queue *res_to_compress_queue = params->res_to_compress_queue;
895         struct shared_queue *compressed_res_queue = params->compressed_res_queue;
896         compress_func_t compress = params->compress;
897         struct message *msg;
898
899         DEBUG("Compressor thread ready");
900         while ((msg = shared_queue_get(res_to_compress_queue)) != NULL) {
901                 compress_chunks(msg, compress);
902                 shared_queue_put(compressed_res_queue, msg);
903         }
904         DEBUG("Compressor thread terminating");
905 }
906
907 static void show_stream_write_progress(u64 *cur_size, u64 *next_size,
908                                        u64 total_size, u64 one_percent,
909                                        unsigned *cur_percent,
910                                        const struct lookup_table_entry *cur_lte)
911 {
912         if (*cur_size >= *next_size) {
913                 printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
914                        "(uncompressed) written (%u%% done)",
915                        *cur_size >> 20,
916                        total_size >> 20, *cur_percent);
917                 fflush(stdout);
918                 *next_size += one_percent;
919                 (*cur_percent)++;
920         }
921         *cur_size += wim_resource_size(cur_lte);
922 }
923
924 static void finish_stream_write_progress(u64 total_size)
925 {
926         printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
927                "(uncompressed) written (100%% done)\n",
928                total_size >> 20, total_size >> 20);
929         fflush(stdout);
930 }
931
932 static int write_stream_list_serial(struct list_head *stream_list,
933                                     FILE *out_fp, int out_ctype,
934                                     int write_flags, u64 total_size)
935 {
936         struct lookup_table_entry *lte;
937         int ret;
938
939         u64 one_percent = total_size / 100;
940         u64 cur_size = 0;
941         u64 next_size = 0;
942         unsigned cur_percent = 0;
943
944         list_for_each_entry(lte, stream_list, staging_list) {
945                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
946                         show_stream_write_progress(&cur_size, &next_size,
947                                                    total_size, one_percent,
948                                                    &cur_percent, lte);
949                 }
950                 ret = write_wim_resource(lte, out_fp, out_ctype,
951                                          &lte->output_resource_entry, 0);
952                 if (ret != 0)
953                         return ret;
954         }
955         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
956                 finish_stream_write_progress(total_size);
957         return 0;
958 }
959
960 static int write_wim_chunks(struct message *msg, FILE *out_fp,
961                             struct chunk_table *chunk_tab)
962 {
963         for (unsigned i = 0; i < msg->num_chunks; i++) {
964                 unsigned chunk_csize = msg->compressed_chunk_sizes[i];
965
966                 DEBUG2("Write wim chunk %u of %u (csize = %u)",
967                       i, msg->num_chunks, chunk_csize);
968
969                 if (fwrite(msg->out_compressed_chunks[i], 1, chunk_csize, out_fp)
970                     != chunk_csize)
971                 {
972                         ERROR_WITH_ERRNO("Failed to write WIM");
973                         return WIMLIB_ERR_WRITE;
974                 }
975
976                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
977                 chunk_tab->cur_offset += chunk_csize;
978         }
979         return 0;
980 }
981
982
983 /*
984  * This function is executed by the main thread when the resources are being
985  * compressed in parallel.  The main thread is in change of all reading of the
986  * uncompressed data and writing of the compressed data.  The compressor threads
987  * *only* do compression from/to in-memory buffers.
988  *
989  * Each unit of work given to a compressor thread is up to MAX_CHUNKS_PER_MSG
990  * chunks of compressed data to compress, represented in a `struct message'.
991  * Each message is passed from the main thread to a worker thread through the
992  * res_to_compress_queue, and it is passed back through the
993  * compressed_res_queue.
994  */
995 static int main_writer_thread_proc(struct list_head *stream_list,
996                                    FILE *out_fp,
997                                    int out_ctype,
998                                    struct shared_queue *res_to_compress_queue,
999                                    struct shared_queue *compressed_res_queue,
1000                                    size_t queue_size,
1001                                    int write_flags,
1002                                    u64 total_size)
1003 {
1004         int ret;
1005
1006
1007         struct message msgs[queue_size];
1008         ZERO_ARRAY(msgs);
1009
1010         // Initially, all the messages are available to use.
1011         LIST_HEAD(available_msgs);
1012         for (size_t i = 0; i < ARRAY_LEN(msgs); i++)
1013                 list_add(&msgs[i].list, &available_msgs);
1014
1015         // outstanding_resources is the list of resources that currently have
1016         // had chunks sent off for compression.
1017         //
1018         // The first stream in outstanding_resources is the stream that is
1019         // currently being written (cur_lte).
1020         //
1021         // The last stream in outstanding_resources is the stream that is
1022         // currently being read and chunks fed to the compressor threads
1023         // (next_lte).
1024         //
1025         // Depending on the number of threads and the sizes of the resource,
1026         // the outstanding streams list may contain streams between cur_lte and
1027         // next_lte that have all their chunks compressed or being compressed,
1028         // but haven't been written yet.
1029         //
1030         LIST_HEAD(outstanding_resources);
1031         struct list_head *next_resource = stream_list->next;
1032         struct lookup_table_entry *next_lte = container_of(next_resource,
1033                                                            struct lookup_table_entry,
1034                                                            staging_list);
1035         next_resource = next_resource->next;
1036         u64 next_chunk = 0;
1037         u64 next_num_chunks = wim_resource_chunks(next_lte);
1038         INIT_LIST_HEAD(&next_lte->msg_list);
1039         list_add_tail(&next_lte->staging_list, &outstanding_resources);
1040
1041         // As in write_wim_resource(), each resource we read is checksummed.
1042         SHA_CTX next_sha_ctx;
1043         sha1_init(&next_sha_ctx);
1044         u8 next_hash[SHA1_HASH_SIZE];
1045
1046         // Resources that don't need any chunks compressed are added to this
1047         // list and written directly by the main thread.
1048         LIST_HEAD(my_resources);
1049
1050         struct lookup_table_entry *cur_lte = next_lte;
1051         struct chunk_table *cur_chunk_tab = NULL;
1052         struct lookup_table_entry *lte;
1053         struct message *msg;
1054
1055         u64 one_percent = total_size / 100;
1056         u64 cur_size = 0;
1057         u64 next_size = 0;
1058         unsigned cur_percent = 0;
1059
1060 #ifdef WITH_NTFS_3G
1061         ntfs_inode *ni = NULL;
1062 #endif
1063
1064 #ifdef WITH_NTFS_3G
1065         ret = prepare_resource_for_read(next_lte, &ni);
1066 #else
1067         ret = prepare_resource_for_read(next_lte);
1068 #endif
1069
1070         DEBUG("Initializing buffers for uncompressed "
1071               "and compressed data (%zu bytes needed)",
1072               queue_size * MAX_CHUNKS_PER_MSG * WIM_CHUNK_SIZE * 2);
1073
1074         // Pre-allocate all the buffers that will be needed to do the chunk
1075         // compression.
1076         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1077                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1078                         msgs[i].compressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
1079                         msgs[i].uncompressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
1080                         if (msgs[i].compressed_chunks[j] == NULL ||
1081                             msgs[i].uncompressed_chunks[j] == NULL)
1082                         {
1083                                 ERROR("Could not allocate enough memory for "
1084                                       "multi-threaded compression");
1085                                 ret = WIMLIB_ERR_NOMEM;
1086                                 goto out;
1087                         }
1088                 }
1089         }
1090
1091         while (1) {
1092                 // Send chunks to the compressor threads until either (a) there
1093                 // are no more messages available since they were all sent off,
1094                 // or (b) there are no more resources that need to be
1095                 // compressed.
1096                 while (!list_empty(&available_msgs) && next_lte != NULL) {
1097
1098                         // Get a message from the available messages
1099                         // list
1100                         msg = container_of(available_msgs.next,
1101                                            struct message,
1102                                            list);
1103
1104                         // ... and delete it from the available messages
1105                         // list
1106                         list_del(&msg->list);
1107
1108                         // Initialize the message with the chunks to
1109                         // compress.
1110                         msg->num_chunks = min(next_num_chunks - next_chunk,
1111                                               MAX_CHUNKS_PER_MSG);
1112                         msg->lte = next_lte;
1113                         msg->complete = false;
1114                         msg->begin_chunk = next_chunk;
1115
1116                         unsigned size = WIM_CHUNK_SIZE;
1117                         for (unsigned i = 0; i < msg->num_chunks; i++) {
1118
1119                                 // Read chunk @next_chunk of the stream into the
1120                                 // message so that a compressor thread can
1121                                 // compress it.
1122
1123                                 if (next_chunk == next_num_chunks - 1 &&
1124                                      wim_resource_size(next_lte) % WIM_CHUNK_SIZE != 0)
1125                                 {
1126                                         size = wim_resource_size(next_lte) % WIM_CHUNK_SIZE;
1127                                 }
1128
1129
1130                                 DEBUG2("Read resource (size=%u, offset=%zu)",
1131                                       size, next_chunk * WIM_CHUNK_SIZE);
1132
1133                                 msg->uncompressed_chunk_sizes[i] = size;
1134
1135                                 ret = read_wim_resource(next_lte,
1136                                                         msg->uncompressed_chunks[i],
1137                                                         size,
1138                                                         next_chunk * WIM_CHUNK_SIZE,
1139                                                         0);
1140                                 if (ret != 0)
1141                                         goto out;
1142                                 sha1_update(&next_sha_ctx,
1143                                             msg->uncompressed_chunks[i], size);
1144                                 next_chunk++;
1145                         }
1146
1147                         // Send the compression request
1148                         list_add_tail(&msg->list, &next_lte->msg_list);
1149                         shared_queue_put(res_to_compress_queue, msg);
1150                         DEBUG2("Compression request sent");
1151
1152                         if (next_chunk != next_num_chunks)
1153                                 // More chunks to send for this resource
1154                                 continue;
1155
1156                         // Done sending compression requests for a resource!
1157                         // Check the SHA1 message digest.
1158                         DEBUG2("Finalize SHA1 md (next_num_chunks=%zu)", next_num_chunks);
1159                         sha1_final(next_hash, &next_sha_ctx);
1160                         if (!hashes_equal(next_lte->hash, next_hash)) {
1161                                 ERROR("WIM resource has incorrect hash!");
1162                                 if (next_lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
1163                                         ERROR("We were reading it from `%s'; maybe it changed "
1164                                               "while we were reading it.",
1165                                               next_lte->file_on_disk);
1166                                 }
1167                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
1168                                 goto out;
1169                         }
1170
1171                         // Advance to the next resource.
1172                         //
1173                         // If the next resource needs no compression, just write
1174                         // it with this thread (not now though--- we could be in
1175                         // the middle of writing another resource.)  Keep doing
1176                         // this until we either get to the end of the resources
1177                         // list, or we get to a resource that needs compression.
1178
1179                         while (1) {
1180                                 if (next_resource == stream_list) {
1181                                         next_lte = NULL;
1182                                         break;
1183                                 }
1184                         #ifdef WITH_NTFS_3G
1185                                 end_wim_resource_read(next_lte, ni);
1186                                 ni = NULL;
1187                         #else
1188                                 end_wim_resource_read(next_lte);
1189                         #endif
1190
1191                                 next_lte = container_of(next_resource,
1192                                                         struct lookup_table_entry,
1193                                                         staging_list);
1194                                 next_resource = next_resource->next;
1195                                 if ((next_lte->resource_location == RESOURCE_IN_WIM
1196                                     && wimlib_get_compression_type(next_lte->wim) == out_ctype)
1197                                     || wim_resource_size(next_lte) == 0)
1198                                 {
1199                                         list_add_tail(&next_lte->staging_list,
1200                                                       &my_resources);
1201                                 } else {
1202                                         list_add_tail(&next_lte->staging_list,
1203                                                       &outstanding_resources);
1204                                         next_chunk = 0;
1205                                         next_num_chunks = wim_resource_chunks(next_lte);
1206                                         sha1_init(&next_sha_ctx);
1207                                         INIT_LIST_HEAD(&next_lte->msg_list);
1208                                 #ifdef WITH_NTFS_3G
1209                                         ret = prepare_resource_for_read(next_lte, &ni);
1210                                 #else
1211                                         ret = prepare_resource_for_read(next_lte);
1212                                 #endif
1213                                         if (ret != 0)
1214                                                 goto out;
1215                                         DEBUG2("Updated next_lte");
1216                                         break;
1217                                 }
1218                         }
1219                 }
1220
1221                 // If there are no outstanding resources, there are no more
1222                 // resources that need to be written.
1223                 if (list_empty(&outstanding_resources)) {
1224                         DEBUG("No outstanding resources! Done");
1225                         ret = 0;
1226                         goto out;
1227                 }
1228
1229                 // Get the next message from the queue and process it.
1230                 // The message will contain 1 or more data chunks that have been
1231                 // compressed.
1232                 DEBUG2("Waiting for message");
1233                 msg = shared_queue_get(compressed_res_queue);
1234                 msg->complete = true;
1235
1236                 DEBUG2("Received msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1237
1238                 list_for_each_entry(msg, &cur_lte->msg_list, list) {
1239                         DEBUG2("complete=%d", msg->complete);
1240                 }
1241
1242                 // Is this the next chunk in the current resource?  If it's not
1243                 // (i.e., an earlier chunk in a same or different resource
1244                 // hasn't been compressed yet), do nothing, and keep this
1245                 // message around until all earlier chunks are received.
1246                 //
1247                 // Otherwise, write all the chunks we can.
1248                 while (!list_empty(&cur_lte->msg_list)
1249                         && (msg = container_of(cur_lte->msg_list.next,
1250                                                struct message,
1251                                                list))->complete)
1252                 {
1253                         DEBUG2("Complete msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1254                         if (msg->begin_chunk == 0) {
1255                                 DEBUG2("Begin chunk tab");
1256
1257
1258
1259                                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1260                                         show_stream_write_progress(&cur_size,
1261                                                                    &next_size,
1262                                                                    total_size,
1263                                                                    one_percent,
1264                                                                    &cur_percent,
1265                                                                    cur_lte);
1266                                 }
1267
1268                                 // This is the first set of chunks.  Leave space
1269                                 // for the chunk table in the output file.
1270                                 off_t cur_offset = ftello(out_fp);
1271                                 if (cur_offset == -1) {
1272                                         ret = WIMLIB_ERR_WRITE;
1273                                         goto out;
1274                                 }
1275                                 ret = begin_wim_resource_chunk_tab(cur_lte,
1276                                                                    out_fp,
1277                                                                    cur_offset,
1278                                                                    &cur_chunk_tab);
1279                                 if (ret != 0)
1280                                         goto out;
1281                         }
1282
1283                         // Write the compressed chunks from the message.
1284                         ret = write_wim_chunks(msg, out_fp, cur_chunk_tab);
1285                         if (ret != 0)
1286                                 goto out;
1287
1288                         list_del(&msg->list);
1289
1290                         // This message is available to use for different chunks
1291                         // now.
1292                         list_add(&msg->list, &available_msgs);
1293
1294                         // Was this the last chunk of the stream?  If so,
1295                         // finish it.
1296                         if (list_empty(&cur_lte->msg_list) &&
1297                             msg->begin_chunk + msg->num_chunks == cur_chunk_tab->num_chunks)
1298                         {
1299                                 DEBUG2("Finish wim chunk tab");
1300                                 u64 res_csize;
1301                                 ret = finish_wim_resource_chunk_tab(cur_chunk_tab,
1302                                                                     out_fp,
1303                                                                     &res_csize);
1304                                 if (ret != 0)
1305                                         goto out;
1306
1307
1308                                 cur_lte->output_resource_entry.size =
1309                                         res_csize;
1310
1311                                 cur_lte->output_resource_entry.original_size =
1312                                         cur_lte->resource_entry.original_size;
1313
1314                                 cur_lte->output_resource_entry.offset =
1315                                         cur_chunk_tab->file_offset;
1316
1317                                 cur_lte->output_resource_entry.flags =
1318                                         cur_lte->resource_entry.flags |
1319                                                 WIM_RESHDR_FLAG_COMPRESSED;
1320
1321                                 FREE(cur_chunk_tab);
1322                                 cur_chunk_tab = NULL;
1323
1324                                 struct list_head *next = cur_lte->staging_list.next;
1325                                 list_del(&cur_lte->staging_list);
1326
1327                                 if (next == &outstanding_resources) {
1328                                         DEBUG("No more outstanding resources");
1329                                         ret = 0;
1330                                         goto out;
1331                                 } else {
1332                                         cur_lte = container_of(cur_lte->staging_list.next,
1333                                                                struct lookup_table_entry,
1334                                                                staging_list);
1335                                 }
1336
1337                                 // Since we just finished writing a stream,
1338                                 // write any streams that have been added to the
1339                                 // my_resources list for direct writing by the
1340                                 // main thread (e.g. resources that don't need
1341                                 // to be compressed because the desired
1342                                 // compression type is the same as the previous
1343                                 // compression type).
1344                                 struct lookup_table_entry *tmp;
1345                                 list_for_each_entry_safe(lte,
1346                                                          tmp,
1347                                                          &my_resources,
1348                                                          staging_list)
1349                                 {
1350                                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1351                                                 show_stream_write_progress(&cur_size,
1352                                                                            &next_size,
1353                                                                            total_size,
1354                                                                            one_percent,
1355                                                                            &cur_percent,
1356                                                                            lte);
1357                                         }
1358
1359                                         ret = write_wim_resource(lte,
1360                                                                  out_fp,
1361                                                                  out_ctype,
1362                                                                  &lte->output_resource_entry,
1363                                                                  0);
1364                                         list_del(&lte->staging_list);
1365                                         if (ret != 0)
1366                                                 goto out;
1367                                 }
1368                         }
1369                 }
1370         }
1371
1372 out:
1373 #ifdef WITH_NTFS_3G
1374         end_wim_resource_read(cur_lte, ni);
1375 #else
1376         end_wim_resource_read(cur_lte);
1377 #endif
1378         if (ret == 0) {
1379                 list_for_each_entry(lte, &my_resources, staging_list) {
1380                         ret = write_wim_resource(lte, out_fp,
1381                                                  out_ctype,
1382                                                  &lte->output_resource_entry,
1383                                                  0);
1384                         if (ret != 0)
1385                                 break;
1386                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1387                                 show_stream_write_progress(&cur_size,
1388                                                            &next_size,
1389                                                            total_size,
1390                                                            one_percent,
1391                                                            &cur_percent,
1392                                                            lte);
1393                         }
1394                 }
1395                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1396                         finish_stream_write_progress(total_size);
1397         } else {
1398                 size_t num_available_msgs = 0;
1399                 struct list_head *cur;
1400
1401                 list_for_each(cur, &available_msgs) {
1402                         num_available_msgs++;
1403                 }
1404
1405                 while (num_available_msgs < ARRAY_LEN(msgs)) {
1406                         shared_queue_get(compressed_res_queue);
1407                         num_available_msgs++;
1408                 }
1409         }
1410
1411         DEBUG("Freeing messages");
1412
1413         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1414                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1415                         FREE(msgs[i].compressed_chunks[j]);
1416                         FREE(msgs[i].uncompressed_chunks[j]);
1417                 }
1418         }
1419
1420         if (cur_chunk_tab != NULL)
1421                 FREE(cur_chunk_tab);
1422         return ret;
1423 }
1424
1425 static int write_stream_list_parallel(struct list_head *stream_list,
1426                                       FILE *out_fp, int out_ctype,
1427                                       int write_flags, u64 total_size,
1428                                       unsigned num_threads)
1429 {
1430         int ret;
1431         struct shared_queue res_to_compress_queue;
1432         struct shared_queue compressed_res_queue;
1433
1434         if (num_threads == 0) {
1435                 long nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1436                 if (nthreads < 1) {
1437                         WARNING("Could not determine number of processors! Assuming 1");
1438                         goto out_serial;
1439                 } else {
1440                         num_threads = nthreads;
1441                 }
1442         }
1443
1444         wimlib_assert(stream_list->next != stream_list);
1445
1446         {
1447                 pthread_t compressor_threads[num_threads];
1448
1449                 static const double MESSAGES_PER_THREAD = 2.0;
1450                 size_t queue_size = (size_t)(num_threads * MESSAGES_PER_THREAD);
1451
1452                 DEBUG("Initializing shared queues (queue_size=%zu)", queue_size);
1453
1454                 ret = shared_queue_init(&res_to_compress_queue, queue_size);
1455                 if (ret != 0)
1456                         goto out_serial;
1457
1458                 ret = shared_queue_init(&compressed_res_queue, queue_size);
1459                 if (ret != 0)
1460                         goto out_destroy_res_to_compress_queue;
1461
1462                 struct compressor_thread_params params;
1463                 params.res_to_compress_queue = &res_to_compress_queue;
1464                 params.compressed_res_queue = &compressed_res_queue;
1465                 params.compress = get_compress_func(out_ctype);
1466
1467                 for (unsigned i = 0; i < num_threads; i++) {
1468                         DEBUG("pthread_create thread %u", i);
1469                         ret = pthread_create(&compressor_threads[i], NULL,
1470                                              compressor_thread_proc, &params);
1471                         if (ret != 0) {
1472                                 ERROR_WITH_ERRNO("Failed to create compressor "
1473                                                  "thread %u", i);
1474                                 num_threads = i;
1475                                 goto out_join;
1476                         }
1477                 }
1478
1479                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1480                         printf("Writing compressed data using %u threads...\n",
1481                                num_threads);
1482                 }
1483
1484                 ret = main_writer_thread_proc(stream_list,
1485                                               out_fp,
1486                                               out_ctype,
1487                                               &res_to_compress_queue,
1488                                               &compressed_res_queue,
1489                                               queue_size,
1490                                               write_flags,
1491                                               total_size);
1492
1493         out_join:
1494                 for (unsigned i = 0; i < num_threads; i++)
1495                         shared_queue_put(&res_to_compress_queue, NULL);
1496
1497                 for (unsigned i = 0; i < num_threads; i++) {
1498                         if (pthread_join(compressor_threads[i], NULL)) {
1499                                 WARNING("Failed to join compressor thread %u: %s",
1500                                         i, strerror(errno));
1501                         }
1502                 }
1503         }
1504         shared_queue_destroy(&compressed_res_queue);
1505 out_destroy_res_to_compress_queue:
1506         shared_queue_destroy(&res_to_compress_queue);
1507         if (ret >= 0 && ret != WIMLIB_ERR_NOMEM)
1508                 return ret;
1509 out_serial:
1510         WARNING("Falling back to single-threaded compression");
1511         return write_stream_list_serial(stream_list, out_fp,
1512                                         out_ctype, write_flags, total_size);
1513 }
1514
1515 static int write_stream_list(struct list_head *stream_list, FILE *out_fp,
1516                              int out_ctype, int write_flags,
1517                              unsigned num_threads)
1518 {
1519         struct lookup_table_entry *lte;
1520         size_t num_streams = 0;
1521         u64 total_size = 0;
1522         bool compression_needed = false;
1523
1524         list_for_each_entry(lte, stream_list, staging_list) {
1525                 num_streams++;
1526                 total_size += wim_resource_size(lte);
1527                 if (!compression_needed
1528                     && out_ctype != WIM_COMPRESSION_TYPE_NONE
1529                     && (lte->resource_location != RESOURCE_IN_WIM
1530                         || wimlib_get_compression_type(lte->wim) != out_ctype)
1531                     && wim_resource_size(lte) != 0)
1532                         compression_needed = true;
1533         }
1534
1535         if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE) {
1536                 printf("Preparing to write %zu streams "
1537                        "(%"PRIu64" total bytes uncompressed)\n",
1538                        num_streams, total_size);
1539                 printf("Using compression type %s\n",
1540                        wimlib_get_compression_type_string(out_ctype));
1541         }
1542
1543         if (compression_needed && total_size >= 1000000 && num_threads != 1) {
1544                 return write_stream_list_parallel(stream_list, out_fp,
1545                                                   out_ctype, write_flags,
1546                                                   total_size, num_threads);
1547         } else {
1548                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1549                         const char *reason = "";
1550                         if (num_threads != 1)
1551                                 reason = " (no compression needed)";
1552                         printf("Writing data using 1 thread%s\n", reason);
1553                 }
1554
1555                 return write_stream_list_serial(stream_list, out_fp,
1556                                                 out_ctype, write_flags,
1557                                                 total_size);
1558         }
1559 }
1560
1561
1562 static int dentry_find_streams_to_write(struct dentry *dentry,
1563                                         void *wim)
1564 {
1565         WIMStruct *w = wim;
1566         struct list_head *stream_list = w->private;
1567         struct lookup_table_entry *lte;
1568         for (unsigned i = 0; i <= dentry->d_inode->num_ads; i++) {
1569                 lte = inode_stream_lte(dentry->d_inode, i, w->lookup_table);
1570                 if (lte && ++lte->out_refcnt == 1)
1571                         list_add(&lte->staging_list, stream_list);
1572         }
1573         return 0;
1574 }
1575
1576 static int find_streams_to_write(WIMStruct *w)
1577 {
1578         return for_dentry_in_tree(wim_root_dentry(w),
1579                                   dentry_find_streams_to_write, w);
1580 }
1581
1582 static int write_wim_streams(WIMStruct *w, int image, int write_flags,
1583                              unsigned num_threads)
1584 {
1585
1586         LIST_HEAD(stream_list);
1587
1588         w->private = &stream_list;
1589         for_image(w, image, find_streams_to_write);
1590         return write_stream_list(&stream_list, w->out_fp,
1591                                  wimlib_get_compression_type(w), write_flags,
1592                                  num_threads);
1593 }
1594
1595 /*
1596  * Write the lookup table, xml data, and integrity table, then overwrite the WIM
1597  * header.
1598  */
1599 int finish_write(WIMStruct *w, int image, int write_flags)
1600 {
1601         off_t lookup_table_offset;
1602         off_t xml_data_offset;
1603         off_t lookup_table_size;
1604         off_t integrity_offset;
1605         off_t xml_data_size;
1606         off_t end_offset;
1607         off_t integrity_size;
1608         int ret;
1609         struct wim_header hdr;
1610         FILE *out = w->out_fp;
1611
1612         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1613                 /* Write the lookup table. */
1614                 lookup_table_offset = ftello(out);
1615                 if (lookup_table_offset == -1)
1616                         return WIMLIB_ERR_WRITE;
1617
1618                 DEBUG("Writing lookup table (offset %"PRIu64")",
1619                       lookup_table_offset);
1620                 ret = write_lookup_table(w->lookup_table, out);
1621                 if (ret != 0)
1622                         return ret;
1623         }
1624
1625         xml_data_offset = ftello(out);
1626         if (xml_data_offset == -1)
1627                 return WIMLIB_ERR_WRITE;
1628
1629         /* @hdr will be the header for the new WIM.  First copy all the data
1630          * from the header in the WIMStruct; then set all the fields that may
1631          * have changed, including the resource entries, boot index, and image
1632          * count.  */
1633         memcpy(&hdr, &w->hdr, sizeof(struct wim_header));
1634         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1635                 lookup_table_size = xml_data_offset - lookup_table_offset;
1636                 hdr.lookup_table_res_entry.offset = lookup_table_offset;
1637                 hdr.lookup_table_res_entry.size = lookup_table_size;
1638         }
1639         hdr.lookup_table_res_entry.original_size = hdr.lookup_table_res_entry.size;
1640         hdr.lookup_table_res_entry.flags = WIM_RESHDR_FLAG_METADATA;
1641
1642         DEBUG("Writing XML data (offset %"PRIu64")", xml_data_offset);
1643         ret = write_xml_data(w->wim_info, image, out,
1644                              (write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE) ?
1645                                 wim_info_get_total_bytes(w->wim_info) : 0);
1646         if (ret != 0)
1647                 return ret;
1648
1649         integrity_offset = ftello(out);
1650         if (integrity_offset == -1)
1651                 return WIMLIB_ERR_WRITE;
1652         xml_data_size = integrity_offset - xml_data_offset;
1653
1654         hdr.xml_res_entry.offset                 = xml_data_offset;
1655         hdr.xml_res_entry.size                   = xml_data_size;
1656         hdr.xml_res_entry.original_size          = xml_data_size;
1657         hdr.xml_res_entry.flags                  = 0;
1658
1659         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
1660                 ret = write_integrity_table(out, WIM_HEADER_DISK_SIZE,
1661                                             xml_data_offset,
1662                                             write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS);
1663                 if (ret != 0)
1664                         return ret;
1665                 end_offset = ftello(out);
1666                 if (end_offset == -1)
1667                         return WIMLIB_ERR_WRITE;
1668                 integrity_size              = end_offset - integrity_offset;
1669                 hdr.integrity.offset        = integrity_offset;
1670                 hdr.integrity.size          = integrity_size;
1671                 hdr.integrity.original_size = integrity_size;
1672         } else {
1673                 hdr.integrity.offset        = 0;
1674                 hdr.integrity.size          = 0;
1675                 hdr.integrity.original_size = 0;
1676         }
1677         hdr.integrity.flags = 0;
1678
1679         DEBUG("Updating WIM header.");
1680
1681         /*
1682          * In the WIM header, there is room for the resource entry for a
1683          * metadata resource labeled as the "boot metadata".  This entry should
1684          * be zeroed out if there is no bootable image (boot_idx 0).  Otherwise,
1685          * it should be a copy of the resource entry for the image that is
1686          * marked as bootable.  This is not well documented...
1687          */
1688         if (hdr.boot_idx == 0 || !w->image_metadata
1689                         || (image != WIM_ALL_IMAGES && image != hdr.boot_idx)) {
1690                 memset(&hdr.boot_metadata_res_entry, 0,
1691                        sizeof(struct resource_entry));
1692         } else {
1693                 memcpy(&hdr.boot_metadata_res_entry,
1694                        &w->image_metadata[
1695                           hdr.boot_idx - 1].metadata_lte->output_resource_entry,
1696                        sizeof(struct resource_entry));
1697         }
1698
1699         /* Set image count and boot index correctly for single image writes */
1700         if (image != WIM_ALL_IMAGES) {
1701                 hdr.image_count = 1;
1702                 if (hdr.boot_idx == image)
1703                         hdr.boot_idx = 1;
1704                 else
1705                         hdr.boot_idx = 0;
1706         }
1707
1708
1709         if (fseeko(out, 0, SEEK_SET) != 0)
1710                 return WIMLIB_ERR_WRITE;
1711
1712         ret = write_header(&hdr, out);
1713         if (ret != 0)
1714                 return ret;
1715
1716         DEBUG("Closing output file.");
1717         wimlib_assert(w->out_fp != NULL);
1718         if (fclose(w->out_fp) != 0) {
1719                 ERROR_WITH_ERRNO("Failed to close the WIM file");
1720                 ret = WIMLIB_ERR_WRITE;
1721         }
1722         w->out_fp = NULL;
1723         return ret;
1724 }
1725
1726 /* Open file stream and write dummy header for WIM. */
1727 int begin_write(WIMStruct *w, const char *path, int write_flags)
1728 {
1729         const char *mode;
1730         DEBUG("Opening `%s' for new WIM", path);
1731
1732         /* checking the integrity requires going back over the file to read it.
1733          * XXX
1734          * (It also would be possible to keep a running sha1sum as the file is
1735          * written-- this would be faster, but a bit more complicated) */
1736         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
1737                 mode = "w+b";
1738         else
1739                 mode = "wb";
1740
1741         if (w->out_fp)
1742                 fclose(w->out_fp);
1743
1744         w->out_fp = fopen(path, mode);
1745         if (!w->out_fp) {
1746                 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
1747                                  path);
1748                 return WIMLIB_ERR_OPEN;
1749         }
1750
1751         /* Write dummy header. It will be overwritten later. */
1752         return write_header(&w->hdr, w->out_fp);
1753 }
1754
1755 /* Writes a stand-alone WIM to a file.  */
1756 WIMLIBAPI int wimlib_write(WIMStruct *w, const char *path,
1757                            int image, int write_flags, unsigned num_threads)
1758 {
1759         int ret;
1760
1761         if (!w || !path)
1762                 return WIMLIB_ERR_INVALID_PARAM;
1763
1764         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
1765
1766         if (image != WIM_ALL_IMAGES &&
1767              (image < 1 || image > w->hdr.image_count))
1768                 return WIMLIB_ERR_INVALID_IMAGE;
1769
1770
1771         if (w->hdr.total_parts != 1) {
1772                 ERROR("Cannot call wimlib_write() on part of a split WIM");
1773                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1774         }
1775
1776         if (image == WIM_ALL_IMAGES)
1777                 DEBUG("Writing all images to `%s'.", path);
1778         else
1779                 DEBUG("Writing image %d to `%s'.", image, path);
1780
1781         ret = begin_write(w, path, write_flags);
1782         if (ret != 0)
1783                 return ret;
1784
1785         for_lookup_table_entry(w->lookup_table, lte_zero_out_refcnt, NULL);
1786
1787         ret = write_wim_streams(w, image, write_flags, num_threads);
1788
1789         if (ret != 0) {
1790                 /*ERROR("Failed to write WIM file resources to `%s'", path);*/
1791                 return ret;
1792         }
1793
1794         ret = for_image(w, image, write_metadata_resource);
1795
1796         if (ret != 0) {
1797                 /*ERROR("Failed to write WIM image metadata to `%s'", path);*/
1798                 return ret;
1799         }
1800
1801         ret = finish_write(w, image, write_flags);
1802         if (ret != 0)
1803                 return ret;
1804
1805         if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE)
1806                 printf("Successfully wrote `%s'\n", path);
1807         return 0;
1808 }