wimlib.net Git - wimlib/blob - src/write.c

   1 /*
   2  * write.c
   3  *
   4  * Support for writing WIM files; write a WIM file, overwrite a WIM file, write
   5  * compressed file resources, etc.
   6  */
   7
   8 /*
   9  * Copyright (C) 2010 Carl Thijssen
  10  * Copyright (C) 2012 Eric Biggers
  11  *
  12  * This file is part of wimlib, a library for working with WIM files.
  13  *
  14  * wimlib is free software; you can redistribute it and/or modify it under the
  15  * terms of the GNU General Public License as published by the Free
  16  * Software Foundation; either version 3 of the License, or (at your option)
  17  * any later version.
  18  *
  19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
  22  * details.
  23  *
  24  * You should have received a copy of the GNU General Public License
  25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
  26  */
  27
  28 #include "wimlib_internal.h"
  29 #include "io.h"
  30 #include "dentry.h"
  31 #include "lookup_table.h"
  32 #include "xml.h"
  33 #include "lzx.h"
  34 #include "xpress.h"
  35 #include <unistd.h>
  36
  37 #ifdef ENABLE_MULTITHREADED_COMPRESSION
  38 #include <semaphore.h>
  39 #include <pthread.h>
  40 #include <errno.h>
  41 #endif
  42
  43 #ifdef WITH_NTFS_3G
  44 #include <time.h>
  45 #include <ntfs-3g/attrib.h>
  46 #include <ntfs-3g/inode.h>
  47 #include <ntfs-3g/dir.h>
  48 #endif
  49
  50
  51 #ifdef HAVE_ALLOCA_H
  52 #include <alloca.h>
  53 #else
  54 #include <stdlib.h>
  55 #endif
  56
  57 /* Reopens the FILE* for a WIM read-write. */
  58 static int reopen_rw(WIMStruct *w)
  59 {
  60         FILE *fp;
  61
  62         if (fclose(w->fp) != 0)
  63                 ERROR_WITH_ERRNO("Failed to close the file `%s'", w->filename);
  64         w->fp = NULL;
  65         fp = fopen(w->filename, "r+b");
  66         if (!fp) {
  67                 ERROR_WITH_ERRNO("Failed to open `%s' for reading and writing",
  68                                  w->filename);
  69                 return WIMLIB_ERR_OPEN;
  70         }
  71         w->fp = fp;
  72         return 0;
  73 }
  74
  75
  76
  77 /*
  78  * Writes a WIM file to the original file that it was read from, overwriting it.
  79  */
  80 WIMLIBAPI int wimlib_overwrite(WIMStruct *w, int write_flags,
  81                                unsigned num_threads)
  82 {
  83         const char *wimfile_name;
  84         size_t wim_name_len;
  85         int ret;
  86
  87         if (!w)
  88                 return WIMLIB_ERR_INVALID_PARAM;
  89
  90         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
  91
  92         wimfile_name = w->filename;
  93
  94         DEBUG("Replacing WIM file `%s'.", wimfile_name);
  95
  96         if (!wimfile_name)
  97                 return WIMLIB_ERR_NO_FILENAME;
  98
  99         /* Write the WIM to a temporary file. */
 100         /* XXX should the temporary file be somewhere else? */
 101         wim_name_len = strlen(wimfile_name);
 102         char tmpfile[wim_name_len + 10];
 103         memcpy(tmpfile, wimfile_name, wim_name_len);
 104         randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
 105         tmpfile[wim_name_len + 9] = '\0';
 106
 107         ret = wimlib_write(w, tmpfile, WIM_ALL_IMAGES, write_flags,
 108                            num_threads);
 109         if (ret != 0) {
 110                 ERROR("Failed to write the WIM file `%s'", tmpfile);
 111                 if (unlink(tmpfile) != 0)
 112                         WARNING("Failed to remove `%s'", tmpfile);
 113                 return ret;
 114         }
 115
 116         DEBUG("Closing original WIM file.");
 117         /* Close the original WIM file that was opened for reading. */
 118         if (w->fp) {
 119                 if (fclose(w->fp) != 0) {
 120                         WARNING("Failed to close the file `%s'", wimfile_name);
 121                 }
 122                 w->fp = NULL;
 123         }
 124
 125         DEBUG("Renaming `%s' to `%s'", tmpfile, wimfile_name);
 126
 127         /* Rename the new file to the old file .*/
 128         if (rename(tmpfile, wimfile_name) != 0) {
 129                 ERROR_WITH_ERRNO("Failed to rename `%s' to `%s'",
 130                                  tmpfile, wimfile_name);
 131                 /* Remove temporary file. */
 132                 if (unlink(tmpfile) != 0)
 133                         ERROR_WITH_ERRNO("Failed to remove `%s'", tmpfile);
 134                 return WIMLIB_ERR_RENAME;
 135         }
 136
 137         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
 138                 printf("Successfully renamed `%s' to `%s'\n", tmpfile, wimfile_name);
 139
 140         return 0;
 141 }
 142
 143 static int check_resource_offset(struct lookup_table_entry *lte, void *arg)
 144 {
 145         u64 xml_data_offset = *(u64*)arg;
 146         if (lte->resource_entry.offset > xml_data_offset) {
 147                 ERROR("The following resource is *after* the XML data:");
 148                 print_lookup_table_entry(lte);
 149                 return WIMLIB_ERR_RESOURCE_ORDER;
 150         }
 151         return 0;
 152 }
 153
 154 WIMLIBAPI int wimlib_overwrite_xml_and_header(WIMStruct *w, int write_flags)
 155 {
 156         int ret;
 157         FILE *fp;
 158         u8 *integrity_table = NULL;
 159         off_t xml_end;
 160         off_t xml_size;
 161         size_t bytes_written;
 162
 163         DEBUG("Overwriting XML and header of `%s', write_flags = %#x",
 164               w->filename, write_flags);
 165
 166         if (!w->filename)
 167                 return WIMLIB_ERR_NO_FILENAME;
 168
 169         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
 170
 171         /* Make sure that the integrity table (if present) is after the XML
 172          * data, and that there are no stream resources, metadata resources, or
 173          * lookup tables after the XML data.  Otherwise, these data would be
 174          * destroyed by this function. */
 175         if (w->hdr.integrity.offset != 0 &&
 176             w->hdr.integrity.offset < w->hdr.xml_res_entry.offset) {
 177                 ERROR("Didn't expect the integrity table to be before the XML data");
 178                 return WIMLIB_ERR_RESOURCE_ORDER;
 179         }
 180
 181         if (w->hdr.lookup_table_res_entry.offset >
 182             w->hdr.xml_res_entry.offset) {
 183                 ERROR("Didn't expect the lookup table to be after the XML data");
 184                 return WIMLIB_ERR_RESOURCE_ORDER;
 185         }
 186
 187         ret = for_lookup_table_entry(w->lookup_table, check_resource_offset,
 188                                      &w->hdr.xml_res_entry.offset);
 189         if (ret != 0)
 190                 return ret;
 191
 192         ret = reopen_rw(w);
 193         if (ret != 0)
 194                 return ret;
 195
 196         fp = w->fp;
 197
 198         /* The old integrity table is still OK, as the SHA1 message digests in
 199          * the integrity table include neither the header nor the XML data.
 200          * Save it for later if it exists and an integrity table was required.
 201          * */
 202         if ((write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
 203              && w->hdr.integrity.offset != 0)
 204         {
 205                 DEBUG("Reading existing integrity table.");
 206                 integrity_table = MALLOC(w->hdr.integrity.size);
 207                 if (!integrity_table)
 208                         return WIMLIB_ERR_NOMEM;
 209
 210                 ret = read_uncompressed_resource(fp, w->hdr.integrity.offset,
 211                                                  w->hdr.integrity.original_size,
 212                                                  integrity_table);
 213                 if (ret != 0)
 214                         goto err;
 215                 DEBUG("Done reading existing integrity table.");
 216         }
 217
 218         DEBUG("Overwriting XML data.");
 219         /* Overwrite the XML data. */
 220         if (fseeko(fp, w->hdr.xml_res_entry.offset, SEEK_SET) != 0) {
 221                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
 222                                  "for XML data", w->hdr.xml_res_entry.offset);
 223                 ret = WIMLIB_ERR_WRITE;
 224                 goto err;
 225         }
 226         ret = write_xml_data(w->wim_info, WIM_ALL_IMAGES, fp, 0);
 227         if (ret != 0)
 228                 goto err;
 229
 230         DEBUG("Updating XML resource entry.");
 231         /* Update the XML resource entry in the WIM header. */
 232         xml_end = ftello(fp);
 233         if (xml_end == -1) {
 234                 ret = WIMLIB_ERR_WRITE;
 235                 goto err;
 236         }
 237         xml_size = xml_end - w->hdr.xml_res_entry.offset;
 238         w->hdr.xml_res_entry.size = xml_size;
 239         w->hdr.xml_res_entry.original_size = xml_size;
 240         /* XML data offset is unchanged. */
 241
 242         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
 243                 DEBUG("Writing integrity table.");
 244                 w->hdr.integrity.offset = xml_end;
 245                 if (integrity_table) {
 246                         /* The existing integrity table was saved. */
 247                         bytes_written = fwrite(integrity_table, 1,
 248                                                w->hdr.integrity.size, fp);
 249                         if (bytes_written != w->hdr.integrity.size) {
 250                                 ERROR_WITH_ERRNO("Failed to write integrity "
 251                                                  "table");
 252                                 ret = WIMLIB_ERR_WRITE;
 253                                 goto err;
 254                         }
 255                         FREE(integrity_table);
 256                 } else {
 257                         /* There was no existing integrity table, so a new one
 258                          * must be calculated. */
 259                         ret = write_integrity_table(fp, WIM_HEADER_DISK_SIZE,
 260                                         w->hdr.lookup_table_res_entry.offset +
 261                                         w->hdr.lookup_table_res_entry.size,
 262                                         write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS);
 263                         if (ret != 0)
 264                                 return ret;
 265
 266                         off_t end_integrity = ftello(fp);
 267                         if (end_integrity == -1)
 268                                 return WIMLIB_ERR_WRITE;
 269
 270                         off_t integrity_size           = end_integrity - xml_end;
 271                         w->hdr.integrity.size          = integrity_size;
 272                         w->hdr.integrity.original_size = integrity_size;
 273                         w->hdr.integrity.flags         = 0;
 274                 }
 275         } else {
 276                 DEBUG("Truncating file to end of XML data.");
 277                 /* No integrity table to write.  The file should be truncated
 278                  * because it's possible that the old file was longer (due to it
 279                  * including an integrity table, or due to its XML data being
 280                  * longer) */
 281                 if (fflush(fp) != 0) {
 282                         ERROR_WITH_ERRNO("Failed to flush stream for file `%s'",
 283                                          w->filename);
 284                         return WIMLIB_ERR_WRITE;
 285                 }
 286                 if (ftruncate(fileno(fp), xml_end) != 0) {
 287                         ERROR_WITH_ERRNO("Failed to truncate `%s' to %"PRIu64" "
 288                                          "bytes", w->filename, xml_end);
 289                         return WIMLIB_ERR_WRITE;
 290                 }
 291                 memset(&w->hdr.integrity, 0, sizeof(struct resource_entry));
 292         }
 293
 294         DEBUG("Overwriting header.");
 295         /* Overwrite the header. */
 296         if (fseeko(fp, 0, SEEK_SET) != 0) {
 297                 ERROR_WITH_ERRNO("Failed to seek to beginning of `%s'",
 298                                  w->filename);
 299                 return WIMLIB_ERR_WRITE;
 300         }
 301
 302         ret = write_header(&w->hdr, fp);
 303         if (ret != 0)
 304                 return ret;
 305
 306         DEBUG("Closing `%s'.", w->filename);
 307         if (fclose(fp) != 0) {
 308                 ERROR_WITH_ERRNO("Failed to close `%s'", w->filename);
 309                 return WIMLIB_ERR_WRITE;
 310         }
 311         w->fp = NULL;
 312         DEBUG("Done.");
 313         return 0;
 314 err:
 315         FREE(integrity_table);
 316         return ret;
 317 }
 318
 319
 320 /* Chunk table that's located at the beginning of each compressed resource in
 321  * the WIM.  (This is not the on-disk format; the on-disk format just has an
 322  * array of offsets.) */
 323 struct chunk_table {
 324         off_t file_offset;
 325         u64 num_chunks;
 326         u64 original_resource_size;
 327         u64 bytes_per_chunk_entry;
 328         u64 table_disk_size;
 329         u64 cur_offset;
 330         u64 *cur_offset_p;
 331         u64 offsets[0];
 332 };
 333
 334 /*
 335  * Allocates and initializes a chunk table, and reserves space for it in the
 336  * output file.
 337  */
 338 static int
 339 begin_wim_resource_chunk_tab(const struct lookup_table_entry *lte,
 340                              FILE *out_fp,
 341                              off_t file_offset,
 342                              struct chunk_table **chunk_tab_ret)
 343 {
 344         u64 size = wim_resource_size(lte);
 345         u64 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
 346         size_t alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
 347         struct chunk_table *chunk_tab = CALLOC(1, alloc_size);
 348         int ret;
 349
 350         if (!chunk_tab) {
 351                 ERROR("Failed to allocate chunk table for %"PRIu64" byte "
 352                       "resource", size);
 353                 ret = WIMLIB_ERR_NOMEM;
 354                 goto out;
 355         }
 356         chunk_tab->file_offset = file_offset;
 357         chunk_tab->num_chunks = num_chunks;
 358         chunk_tab->original_resource_size = size;
 359         chunk_tab->bytes_per_chunk_entry = (size >= (1ULL << 32)) ? 8 : 4;
 360         chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
 361                                      (num_chunks - 1);
 362         chunk_tab->cur_offset = 0;
 363         chunk_tab->cur_offset_p = chunk_tab->offsets;
 364
 365         if (fwrite(chunk_tab, 1, chunk_tab->table_disk_size, out_fp) !=
 366                    chunk_tab->table_disk_size) {
 367                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
 368                                  "file resource");
 369                 ret = WIMLIB_ERR_WRITE;
 370                 goto out;
 371         }
 372
 373         ret = 0;
 374 out:
 375         *chunk_tab_ret = chunk_tab;
 376         return ret;
 377 }
 378
 379 typedef int (*compress_func_t)(const void *, unsigned, void *, unsigned *);
 380
 381 compress_func_t get_compress_func(int out_ctype)
 382 {
 383         if (out_ctype == WIM_COMPRESSION_TYPE_LZX)
 384                 return lzx_compress;
 385         else
 386                 return xpress_compress;
 387 }
 388
 389
 390 /*
 391  * Compresses a chunk of a WIM resource.
 392  *
 393  * @chunk:              Uncompressed data of the chunk.
 394  * @chunk_size:         Size of the uncompressed chunk in bytes.
 395  * @compressed_chunk:   Pointer to output buffer of size at least
 396  *                              (@chunk_size - 1) bytes.
 397  * @compressed_chunk_len_ret:   Pointer to an unsigned int into which the size
 398  *                                      of the compressed chunk will be
 399  *                                      returned.
 400  * @ctype:      Type of compression to use.  Must be WIM_COMPRESSION_TYPE_LZX
 401  *              or WIM_COMPRESSION_TYPE_XPRESS.
 402  *
 403  * Returns zero if compressed succeeded, and nonzero if the chunk could not be
 404  * compressed to any smaller than @chunk_size.  This function cannot fail for
 405  * any other reasons.
 406  */
 407 static int compress_chunk(const u8 chunk[], unsigned chunk_size,
 408                           u8 compressed_chunk[],
 409                           unsigned *compressed_chunk_len_ret,
 410                           int ctype)
 411 {
 412         compress_func_t compress = get_compress_func(ctype);
 413         return (*compress)(chunk, chunk_size, compressed_chunk,
 414                            compressed_chunk_len_ret);
 415 }
 416
 417 /*
 418  * Writes a chunk of a WIM resource to an output file.
 419  *
 420  * @chunk:        Uncompressed data of the chunk.
 421  * @chunk_size:   Size of the chunk (<= WIM_CHUNK_SIZE)
 422  * @out_fp:       FILE * to write tho chunk to.
 423  * @out_ctype:    Compression type to use when writing the chunk (ignored if no
 424  *                      chunk table provided)
 425  * @chunk_tab:    Pointer to chunk table being created.  It is updated with the
 426  *                      offset of the chunk we write.
 427  *
 428  * Returns 0 on success; nonzero on failure.
 429  */
 430 static int write_wim_resource_chunk(const u8 chunk[], unsigned chunk_size,
 431                                     FILE *out_fp, int out_ctype,
 432                                     struct chunk_table *chunk_tab)
 433 {
 434         const u8 *out_chunk;
 435         unsigned out_chunk_size;
 436
 437         wimlib_assert(chunk_size <= WIM_CHUNK_SIZE);
 438
 439         if (!chunk_tab) {
 440                 out_chunk = chunk;
 441                 out_chunk_size = chunk_size;
 442         } else {
 443                 u8 *compressed_chunk = alloca(chunk_size);
 444                 int ret;
 445
 446                 ret = compress_chunk(chunk, chunk_size, compressed_chunk,
 447                                      &out_chunk_size, out_ctype);
 448                 if (ret == 0) {
 449                         out_chunk = compressed_chunk;
 450                 } else {
 451                         out_chunk = chunk;
 452                         out_chunk_size = chunk_size;
 453                 }
 454                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
 455                 chunk_tab->cur_offset += out_chunk_size;
 456         }
 457
 458         if (fwrite(out_chunk, 1, out_chunk_size, out_fp) != out_chunk_size) {
 459                 ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
 460                 return WIMLIB_ERR_WRITE;
 461         }
 462         return 0;
 463 }
 464
 465 /*
 466  * Finishes a WIM chunk tale and writes it to the output file at the correct
 467  * offset.
 468  *
 469  * The final size of the full compressed resource is returned in the
 470  * @compressed_size_p.
 471  */
 472 static int
 473 finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
 474                               FILE *out_fp, u64 *compressed_size_p)
 475 {
 476         size_t bytes_written;
 477         if (fseeko(out_fp, chunk_tab->file_offset, SEEK_SET) != 0) {
 478                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of output "
 479                                  "WIM file", chunk_tab->file_offset);
 480                 return WIMLIB_ERR_WRITE;
 481         }
 482
 483         if (chunk_tab->bytes_per_chunk_entry == 8) {
 484                 array_cpu_to_le64(chunk_tab->offsets, chunk_tab->num_chunks);
 485         } else {
 486                 for (u64 i = 0; i < chunk_tab->num_chunks; i++)
 487                         ((u32*)chunk_tab->offsets)[i] =
 488                                 cpu_to_le32(chunk_tab->offsets[i]);
 489         }
 490         bytes_written = fwrite((u8*)chunk_tab->offsets +
 491                                         chunk_tab->bytes_per_chunk_entry,
 492                                1, chunk_tab->table_disk_size, out_fp);
 493         if (bytes_written != chunk_tab->table_disk_size) {
 494                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
 495                                  "file resource");
 496                 return WIMLIB_ERR_WRITE;
 497         }
 498         if (fseeko(out_fp, 0, SEEK_END) != 0) {
 499                 ERROR_WITH_ERRNO("Failed to seek to end of output WIM file");
 500                 return WIMLIB_ERR_WRITE;
 501         }
 502         *compressed_size_p = chunk_tab->cur_offset + chunk_tab->table_disk_size;
 503         return 0;
 504 }
 505
 506 static int prepare_resource_for_read(struct lookup_table_entry *lte
 507
 508                                         #ifdef WITH_NTFS_3G
 509                                         , ntfs_inode **ni_ret
 510                                         #endif
 511                 )
 512 {
 513         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
 514              && !lte->file_on_disk_fp)
 515         {
 516                 wimlib_assert(lte->file_on_disk);
 517                 lte->file_on_disk_fp = fopen(lte->file_on_disk, "rb");
 518                 if (!lte->file_on_disk_fp) {
 519                         ERROR_WITH_ERRNO("Failed to open the file `%s' for "
 520                                          "reading", lte->file_on_disk);
 521                         return WIMLIB_ERR_OPEN;
 522                 }
 523         }
 524 #ifdef WITH_NTFS_3G
 525         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME
 526                   && !lte->attr)
 527         {
 528                 struct ntfs_location *loc = lte->ntfs_loc;
 529                 ntfs_inode *ni;
 530                 wimlib_assert(loc);
 531                 ni = ntfs_pathname_to_inode(*loc->ntfs_vol_p, NULL, loc->path_utf8);
 532                 if (!ni) {
 533                         ERROR_WITH_ERRNO("Failed to open inode `%s' in NTFS "
 534                                          "volume", loc->path_utf8);
 535                         return WIMLIB_ERR_NTFS_3G;
 536                 }
 537                 lte->attr = ntfs_attr_open(ni,
 538                                            loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
 539                                            (ntfschar*)loc->stream_name_utf16,
 540                                            loc->stream_name_utf16_num_chars);
 541                 if (!lte->attr) {
 542                         ERROR_WITH_ERRNO("Failed to open attribute of `%s' in "
 543                                          "NTFS volume", loc->path_utf8);
 544                         ntfs_inode_close(ni);
 545                         return WIMLIB_ERR_NTFS_3G;
 546                 }
 547                 *ni_ret = ni;
 548         }
 549 #endif
 550         return 0;
 551 }
 552
 553 static void end_wim_resource_read(struct lookup_table_entry *lte
 554                                 #ifdef WITH_NTFS_3G
 555                                         , ntfs_inode *ni
 556                                 #endif
 557                                         )
 558 {
 559         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
 560             && lte->file_on_disk_fp) {
 561                 fclose(lte->file_on_disk_fp);
 562                 lte->file_on_disk_fp = NULL;
 563         }
 564 #ifdef WITH_NTFS_3G
 565         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME) {
 566                 if (lte->attr) {
 567                         ntfs_attr_close(lte->attr);
 568                         lte->attr = NULL;
 569                 }
 570                 if (ni)
 571                         ntfs_inode_close(ni);
 572         }
 573 #endif
 574 }
 575
 576 /*
 577  * Writes a WIM resource to a FILE * opened for writing.  The resource may be
 578  * written uncompressed or compressed depending on the @out_ctype parameter.
 579  *
 580  * If by chance the resource compresses to more than the original size (this may
 581  * happen with random data or files than are pre-compressed), the resource is
 582  * instead written uncompressed (and this is reflected in the @out_res_entry by
 583  * removing the WIM_RESHDR_FLAG_COMPRESSED flag).
 584  *
 585  * @lte:        The lookup table entry for the WIM resource.
 586  * @out_fp:     The FILE * to write the resource to.
 587  * @out_ctype:  The compression type of the resource to write.  Note: if this is
 588  *                      the same as the compression type of the WIM resource we
 589  *                      need to read, we simply copy the data (i.e. we do not
 590  *                      uncompress it, then compress it again).
 591  * @out_res_entry:  If non-NULL, a resource entry that is filled in with the
 592  *                  offset, original size, compressed size, and compression flag
 593  *                  of the output resource.
 594  *
 595  * Returns 0 on success; nonzero on failure.
 596  */
 597 int write_wim_resource(struct lookup_table_entry *lte,
 598                        FILE *out_fp, int out_ctype,
 599                        struct resource_entry *out_res_entry,
 600                        int flags)
 601 {
 602         u64 bytes_remaining;
 603         u64 original_size;
 604         u64 old_compressed_size;
 605         u64 new_compressed_size;
 606         u64 offset;
 607         int ret;
 608         struct chunk_table *chunk_tab = NULL;
 609         bool raw;
 610         off_t file_offset;
 611 #ifdef WITH_NTFS_3G
 612         ntfs_inode *ni = NULL;
 613 #endif
 614
 615         wimlib_assert(lte);
 616
 617         /* Original size of the resource */
 618         original_size = wim_resource_size(lte);
 619
 620         /* Compressed size of the resource (as it exists now) */
 621         old_compressed_size = wim_resource_compressed_size(lte);
 622
 623         /* Current offset in output file */
 624         file_offset = ftello(out_fp);
 625         if (file_offset == -1) {
 626                 ERROR_WITH_ERRNO("Failed to get offset in output "
 627                                  "stream");
 628                 return WIMLIB_ERR_WRITE;
 629         }
 630
 631         /* Are the compression types the same?  If so, do a raw copy (copy
 632          * without decompressing and recompressing the data). */
 633         raw = (wim_resource_compression_type(lte) == out_ctype
 634                && out_ctype != WIM_COMPRESSION_TYPE_NONE);
 635
 636         if (raw) {
 637                 flags |= WIMLIB_RESOURCE_FLAG_RAW;
 638                 bytes_remaining = old_compressed_size;
 639         } else {
 640                 flags &= ~WIMLIB_RESOURCE_FLAG_RAW;
 641                 bytes_remaining = original_size;
 642         }
 643
 644         /* Empty resource; nothing needs to be done, so just return success. */
 645         if (bytes_remaining == 0)
 646                 return 0;
 647
 648         /* Buffer for reading chunks for the resource */
 649         u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
 650
 651         /* If we are writing a compressed resource and not doing a raw copy, we
 652          * need to initialize the chunk table */
 653         if (out_ctype != WIM_COMPRESSION_TYPE_NONE && !raw) {
 654                 ret = begin_wim_resource_chunk_tab(lte, out_fp, file_offset,
 655                                                    &chunk_tab);
 656                 if (ret != 0)
 657                         goto out;
 658         }
 659
 660         /* If the WIM resource is in an external file, open a FILE * to it so we
 661          * don't have to open a temporary one in read_wim_resource() for each
 662          * chunk. */
 663 #ifdef WITH_NTFS_3G
 664         ret = prepare_resource_for_read(lte, &ni);
 665 #else
 666         ret = prepare_resource_for_read(lte);
 667 #endif
 668         if (ret != 0)
 669                 goto out;
 670
 671         /* If we aren't doing a raw copy, we will compute the SHA1 message
 672          * digest of the resource as we read it, and verify it's the same as the
 673          * hash given in the lookup table entry once we've finished reading the
 674          * resource. */
 675         SHA_CTX ctx;
 676         if (!raw)
 677                 sha1_init(&ctx);
 678
 679         /* While there are still bytes remaining in the WIM resource, read a
 680          * chunk of the resource, update SHA1, then write that chunk using the
 681          * desired compression type. */
 682         offset = 0;
 683         do {
 684                 u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
 685                 ret = read_wim_resource(lte, buf, to_read, offset, flags);
 686                 if (ret != 0)
 687                         goto out_fclose;
 688                 if (!raw)
 689                         sha1_update(&ctx, buf, to_read);
 690                 ret = write_wim_resource_chunk(buf, to_read, out_fp,
 691                                                out_ctype, chunk_tab);
 692                 if (ret != 0)
 693                         goto out_fclose;
 694                 bytes_remaining -= to_read;
 695                 offset += to_read;
 696         } while (bytes_remaining);
 697
 698         /* Raw copy:  The new compressed size is the same as the old compressed
 699          * size
 700          *
 701          * Using WIM_COMPRESSION_TYPE_NONE:  The new compressed size is the
 702          * original size
 703          *
 704          * Using a different compression type:  Call
 705          * finish_wim_resource_chunk_tab() and it will provide the new
 706          * compressed size.
 707          */
 708         if (raw) {
 709                 new_compressed_size = old_compressed_size;
 710         } else {
 711                 if (out_ctype == WIM_COMPRESSION_TYPE_NONE)
 712                         new_compressed_size = original_size;
 713                 else {
 714                         ret = finish_wim_resource_chunk_tab(chunk_tab, out_fp,
 715                                                             &new_compressed_size);
 716                         if (ret != 0)
 717                                 goto out_fclose;
 718                 }
 719         }
 720
 721         /* Verify SHA1 message digest of the resource, unless we are doing a raw
 722          * write (in which case we never even saw the uncompressed data).  Or,
 723          * if the hash we had before is all 0's, just re-set it to be the new
 724          * hash. */
 725         if (!raw) {
 726                 u8 md[SHA1_HASH_SIZE];
 727                 sha1_final(md, &ctx);
 728                 if (is_zero_hash(lte->hash)) {
 729                         copy_hash(lte->hash, md);
 730                 } else if (!hashes_equal(md, lte->hash)) {
 731                         ERROR("WIM resource has incorrect hash!");
 732                         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
 733                                 ERROR("We were reading it from `%s'; maybe it changed "
 734                                       "while we were reading it.",
 735                                       lte->file_on_disk);
 736                         }
 737                         ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
 738                         goto out_fclose;
 739                 }
 740         }
 741
 742         if (!raw && new_compressed_size >= original_size &&
 743             out_ctype != WIM_COMPRESSION_TYPE_NONE)
 744         {
 745                 /* Oops!  We compressed the resource to larger than the original
 746                  * size.  Write the resource uncompressed instead. */
 747                 if (fseeko(out_fp, file_offset, SEEK_SET) != 0) {
 748                         ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
 749                                          "of output WIM file", file_offset);
 750                         ret = WIMLIB_ERR_WRITE;
 751                         goto out_fclose;
 752                 }
 753                 ret = write_wim_resource(lte, out_fp, WIM_COMPRESSION_TYPE_NONE,
 754                                          out_res_entry, flags);
 755                 if (ret != 0)
 756                         goto out_fclose;
 757                 if (fflush(out_fp) != 0) {
 758                         ERROR_WITH_ERRNO("Failed to flush output WIM file");
 759                         ret = WIMLIB_ERR_WRITE;
 760                         goto out_fclose;
 761                 }
 762                 if (ftruncate(fileno(out_fp), file_offset + out_res_entry->size) != 0) {
 763                         ERROR_WITH_ERRNO("Failed to truncate output WIM file");
 764                         ret = WIMLIB_ERR_WRITE;
 765                         goto out_fclose;
 766                 }
 767         } else {
 768                 if (out_res_entry) {
 769                         out_res_entry->size          = new_compressed_size;
 770                         out_res_entry->original_size = original_size;
 771                         out_res_entry->offset        = file_offset;
 772                         out_res_entry->flags         = lte->resource_entry.flags
 773                                                         & ~WIM_RESHDR_FLAG_COMPRESSED;
 774                         if (out_ctype != WIM_COMPRESSION_TYPE_NONE)
 775                                 out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
 776                 }
 777         }
 778         ret = 0;
 779 out_fclose:
 780 #ifdef WITH_NTFS_3G
 781         end_wim_resource_read(lte, ni);
 782 #else
 783         end_wim_resource_read(lte);
 784 #endif
 785 out:
 786         FREE(chunk_tab);
 787         return ret;
 788 }
 789
 790
 791 #ifdef ENABLE_MULTITHREADED_COMPRESSION
 792 struct shared_queue {
 793         sem_t filled_slots;
 794         sem_t empty_slots;
 795         pthread_mutex_t lock;
 796         unsigned front;
 797         unsigned back;
 798         void **array;
 799         unsigned size;
 800 };
 801
 802 static int shared_queue_init(struct shared_queue *q, unsigned size)
 803 {
 804         q->array = CALLOC(sizeof(q->array[0]), size);
 805         if (!q->array)
 806                 return WIMLIB_ERR_NOMEM;
 807
 808         sem_init(&q->filled_slots, 0, 0);
 809         sem_init(&q->empty_slots, 0, size);
 810         pthread_mutex_init(&q->lock, NULL);
 811         q->front = 0;
 812         q->back = size - 1;
 813         q->size = size;
 814         return 0;
 815 }
 816
 817 static void shared_queue_destroy(struct shared_queue *q)
 818 {
 819         sem_destroy(&q->filled_slots);
 820         sem_destroy(&q->empty_slots);
 821         pthread_mutex_destroy(&q->lock);
 822         FREE(q->array);
 823 }
 824
 825 static void shared_queue_put(struct shared_queue *q, void *obj)
 826 {
 827         sem_wait(&q->empty_slots);
 828         pthread_mutex_lock(&q->lock);
 829
 830         q->back = (q->back + 1) % q->size;
 831         q->array[q->back] = obj;
 832
 833         sem_post(&q->filled_slots);
 834         pthread_mutex_unlock(&q->lock);
 835 }
 836
 837 static void *shared_queue_get(struct shared_queue *q)
 838 {
 839         sem_wait(&q->filled_slots);
 840         pthread_mutex_lock(&q->lock);
 841
 842         void *obj = q->array[q->front];
 843         q->array[q->front] = NULL;
 844         q->front = (q->front + 1) % q->size;
 845
 846         sem_post(&q->empty_slots);
 847         pthread_mutex_unlock(&q->lock);
 848         return obj;
 849 }
 850
 851 static inline int shared_queue_get_filled(struct shared_queue *q)
 852 {
 853         int sval;
 854         sem_getvalue(&q->filled_slots, &sval);
 855         return sval;
 856 }
 857
 858 struct compressor_thread_params {
 859         struct shared_queue *res_to_compress_queue;
 860         struct shared_queue *compressed_res_queue;
 861         compress_func_t compress;
 862 };
 863
 864 #define MAX_CHUNKS_PER_MSG 2
 865
 866 struct message {
 867         struct lookup_table_entry *lte;
 868         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
 869         u8 *out_compressed_chunks[MAX_CHUNKS_PER_MSG];
 870         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
 871         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
 872         unsigned compressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
 873         unsigned num_chunks;
 874         struct list_head list;
 875         bool complete;
 876         u64 begin_chunk;
 877 };
 878
 879 static void compress_chunks(struct message *msg, compress_func_t compress)
 880 {
 881         for (unsigned i = 0; i < msg->num_chunks; i++) {
 882                 DEBUG2("compress chunk %u of %u", i, msg->num_chunks);
 883                 int ret = compress(msg->uncompressed_chunks[i],
 884                                    msg->uncompressed_chunk_sizes[i],
 885                                    msg->compressed_chunks[i],
 886                                    &msg->compressed_chunk_sizes[i]);
 887                 if (ret == 0) {
 888                         msg->out_compressed_chunks[i] = msg->compressed_chunks[i];
 889                 } else {
 890                         msg->out_compressed_chunks[i] = msg->uncompressed_chunks[i];
 891                         msg->compressed_chunk_sizes[i] = msg->uncompressed_chunk_sizes[i];
 892                 }
 893         }
 894 }
 895
 896 static void *compressor_thread_proc(void *arg)
 897 {
 898         struct compressor_thread_params *params = arg;
 899         struct shared_queue *res_to_compress_queue = params->res_to_compress_queue;
 900         struct shared_queue *compressed_res_queue = params->compressed_res_queue;
 901         compress_func_t compress = params->compress;
 902         struct message *msg;
 903
 904         DEBUG("Compressor thread ready");
 905         while ((msg = shared_queue_get(res_to_compress_queue)) != NULL) {
 906                 compress_chunks(msg, compress);
 907                 shared_queue_put(compressed_res_queue, msg);
 908         }
 909         DEBUG("Compressor thread terminating");
 910 }
 911 #endif
 912
 913 static void show_stream_write_progress(u64 *cur_size, u64 *next_size,
 914                                        u64 total_size, u64 one_percent,
 915                                        unsigned *cur_percent,
 916                                        const struct lookup_table_entry *cur_lte)
 917 {
 918         if (*cur_size >= *next_size) {
 919                 printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
 920                        "(uncompressed) written (%u%% done)",
 921                        *cur_size >> 20,
 922                        total_size >> 20, *cur_percent);
 923                 fflush(stdout);
 924                 *next_size += one_percent;
 925                 (*cur_percent)++;
 926         }
 927         *cur_size += wim_resource_size(cur_lte);
 928 }
 929
 930 static void finish_stream_write_progress(u64 total_size)
 931 {
 932         printf("\r%"PRIu64" MiB of %"PRIu64" MiB "
 933                "(uncompressed) written (100%% done)\n",
 934                total_size >> 20, total_size >> 20);
 935         fflush(stdout);
 936 }
 937
 938 static int write_stream_list_serial(struct list_head *stream_list,
 939                                     FILE *out_fp, int out_ctype,
 940                                     int write_flags, u64 total_size)
 941 {
 942         struct lookup_table_entry *lte;
 943         int ret;
 944
 945         u64 one_percent = total_size / 100;
 946         u64 cur_size = 0;
 947         u64 next_size = 0;
 948         unsigned cur_percent = 0;
 949
 950         list_for_each_entry(lte, stream_list, staging_list) {
 951                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
 952                         show_stream_write_progress(&cur_size, &next_size,
 953                                                    total_size, one_percent,
 954                                                    &cur_percent, lte);
 955                 }
 956                 ret = write_wim_resource(lte, out_fp, out_ctype,
 957                                          &lte->output_resource_entry, 0);
 958                 if (ret != 0)
 959                         return ret;
 960         }
 961         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
 962                 finish_stream_write_progress(total_size);
 963         return 0;
 964 }
 965
 966 #ifdef ENABLE_MULTITHREADED_COMPRESSION
 967 static int write_wim_chunks(struct message *msg, FILE *out_fp,
 968                             struct chunk_table *chunk_tab)
 969 {
 970         for (unsigned i = 0; i < msg->num_chunks; i++) {
 971                 unsigned chunk_csize = msg->compressed_chunk_sizes[i];
 972
 973                 DEBUG2("Write wim chunk %u of %u (csize = %u)",
 974                       i, msg->num_chunks, chunk_csize);
 975
 976                 if (fwrite(msg->out_compressed_chunks[i], 1, chunk_csize, out_fp)
 977                     != chunk_csize)
 978                 {
 979                         ERROR_WITH_ERRNO("Failed to write WIM");
 980                         return WIMLIB_ERR_WRITE;
 981                 }
 982
 983                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
 984                 chunk_tab->cur_offset += chunk_csize;
 985         }
 986         return 0;
 987 }
 988
 989 /*
 990  * This function is executed by the main thread when the resources are being
 991  * compressed in parallel.  The main thread is in change of all reading of the
 992  * uncompressed data and writing of the compressed data.  The compressor threads
 993  * *only* do compression from/to in-memory buffers.
 994  *
 995  * Each unit of work given to a compressor thread is up to MAX_CHUNKS_PER_MSG
 996  * chunks of compressed data to compress, represented in a `struct message'.
 997  * Each message is passed from the main thread to a worker thread through the
 998  * res_to_compress_queue, and it is passed back through the
 999  * compressed_res_queue.
1000  */
1001 static int main_writer_thread_proc(struct list_head *stream_list,
1002                                    FILE *out_fp,
1003                                    int out_ctype,
1004                                    struct shared_queue *res_to_compress_queue,
1005                                    struct shared_queue *compressed_res_queue,
1006                                    size_t queue_size,
1007                                    int write_flags,
1008                                    u64 total_size)
1009 {
1010         int ret;
1011
1012
1013         struct message msgs[queue_size];
1014         ZERO_ARRAY(msgs);
1015
1016         // Initially, all the messages are available to use.
1017         LIST_HEAD(available_msgs);
1018         for (size_t i = 0; i < ARRAY_LEN(msgs); i++)
1019                 list_add(&msgs[i].list, &available_msgs);
1020
1021         // outstanding_resources is the list of resources that currently have
1022         // had chunks sent off for compression.
1023         //
1024         // The first stream in outstanding_resources is the stream that is
1025         // currently being written (cur_lte).
1026         //
1027         // The last stream in outstanding_resources is the stream that is
1028         // currently being read and chunks fed to the compressor threads
1029         // (next_lte).
1030         //
1031         // Depending on the number of threads and the sizes of the resource,
1032         // the outstanding streams list may contain streams between cur_lte and
1033         // next_lte that have all their chunks compressed or being compressed,
1034         // but haven't been written yet.
1035         //
1036         LIST_HEAD(outstanding_resources);
1037         struct list_head *next_resource = stream_list->next;
1038         struct lookup_table_entry *next_lte = container_of(next_resource,
1039                                                            struct lookup_table_entry,
1040                                                            staging_list);
1041         next_resource = next_resource->next;
1042         u64 next_chunk = 0;
1043         u64 next_num_chunks = wim_resource_chunks(next_lte);
1044         INIT_LIST_HEAD(&next_lte->msg_list);
1045         list_add_tail(&next_lte->staging_list, &outstanding_resources);
1046
1047         // As in write_wim_resource(), each resource we read is checksummed.
1048         SHA_CTX next_sha_ctx;
1049         sha1_init(&next_sha_ctx);
1050         u8 next_hash[SHA1_HASH_SIZE];
1051
1052         // Resources that don't need any chunks compressed are added to this
1053         // list and written directly by the main thread.
1054         LIST_HEAD(my_resources);
1055
1056         struct lookup_table_entry *cur_lte = next_lte;
1057         struct chunk_table *cur_chunk_tab = NULL;
1058         struct lookup_table_entry *lte;
1059         struct message *msg;
1060
1061         u64 one_percent = total_size / 100;
1062         u64 cur_size = 0;
1063         u64 next_size = 0;
1064         unsigned cur_percent = 0;
1065
1066 #ifdef WITH_NTFS_3G
1067         ntfs_inode *ni = NULL;
1068 #endif
1069
1070 #ifdef WITH_NTFS_3G
1071         ret = prepare_resource_for_read(next_lte, &ni);
1072 #else
1073         ret = prepare_resource_for_read(next_lte);
1074 #endif
1075
1076         DEBUG("Initializing buffers for uncompressed "
1077               "and compressed data (%zu bytes needed)",
1078               queue_size * MAX_CHUNKS_PER_MSG * WIM_CHUNK_SIZE * 2);
1079
1080         // Pre-allocate all the buffers that will be needed to do the chunk
1081         // compression.
1082         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1083                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1084                         msgs[i].compressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
1085                         msgs[i].uncompressed_chunks[j] = MALLOC(WIM_CHUNK_SIZE);
1086                         if (msgs[i].compressed_chunks[j] == NULL ||
1087                             msgs[i].uncompressed_chunks[j] == NULL)
1088                         {
1089                                 ERROR("Could not allocate enough memory for "
1090                                       "multi-threaded compression");
1091                                 ret = WIMLIB_ERR_NOMEM;
1092                                 goto out;
1093                         }
1094                 }
1095         }
1096
1097         while (1) {
1098                 // Send chunks to the compressor threads until either (a) there
1099                 // are no more messages available since they were all sent off,
1100                 // or (b) there are no more resources that need to be
1101                 // compressed.
1102                 while (!list_empty(&available_msgs) && next_lte != NULL) {
1103
1104                         // Get a message from the available messages
1105                         // list
1106                         msg = container_of(available_msgs.next,
1107                                            struct message,
1108                                            list);
1109
1110                         // ... and delete it from the available messages
1111                         // list
1112                         list_del(&msg->list);
1113
1114                         // Initialize the message with the chunks to
1115                         // compress.
1116                         msg->num_chunks = min(next_num_chunks - next_chunk,
1117                                               MAX_CHUNKS_PER_MSG);
1118                         msg->lte = next_lte;
1119                         msg->complete = false;
1120                         msg->begin_chunk = next_chunk;
1121
1122                         unsigned size = WIM_CHUNK_SIZE;
1123                         for (unsigned i = 0; i < msg->num_chunks; i++) {
1124
1125                                 // Read chunk @next_chunk of the stream into the
1126                                 // message so that a compressor thread can
1127                                 // compress it.
1128
1129                                 if (next_chunk == next_num_chunks - 1 &&
1130                                      wim_resource_size(next_lte) % WIM_CHUNK_SIZE != 0)
1131                                 {
1132                                         size = wim_resource_size(next_lte) % WIM_CHUNK_SIZE;
1133                                 }
1134
1135
1136                                 DEBUG2("Read resource (size=%u, offset=%zu)",
1137                                       size, next_chunk * WIM_CHUNK_SIZE);
1138
1139                                 msg->uncompressed_chunk_sizes[i] = size;
1140
1141                                 ret = read_wim_resource(next_lte,
1142                                                         msg->uncompressed_chunks[i],
1143                                                         size,
1144                                                         next_chunk * WIM_CHUNK_SIZE,
1145                                                         0);
1146                                 if (ret != 0)
1147                                         goto out;
1148                                 sha1_update(&next_sha_ctx,
1149                                             msg->uncompressed_chunks[i], size);
1150                                 next_chunk++;
1151                         }
1152
1153                         // Send the compression request
1154                         list_add_tail(&msg->list, &next_lte->msg_list);
1155                         shared_queue_put(res_to_compress_queue, msg);
1156                         DEBUG2("Compression request sent");
1157
1158                         if (next_chunk != next_num_chunks)
1159                                 // More chunks to send for this resource
1160                                 continue;
1161
1162                         // Done sending compression requests for a resource!
1163                         // Check the SHA1 message digest.
1164                         DEBUG2("Finalize SHA1 md (next_num_chunks=%zu)", next_num_chunks);
1165                         sha1_final(next_hash, &next_sha_ctx);
1166                         if (!hashes_equal(next_lte->hash, next_hash)) {
1167                                 ERROR("WIM resource has incorrect hash!");
1168                                 if (next_lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
1169                                         ERROR("We were reading it from `%s'; maybe it changed "
1170                                               "while we were reading it.",
1171                                               next_lte->file_on_disk);
1172                                 }
1173                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
1174                                 goto out;
1175                         }
1176
1177                         // Advance to the next resource.
1178                         //
1179                         // If the next resource needs no compression, just write
1180                         // it with this thread (not now though--- we could be in
1181                         // the middle of writing another resource.)  Keep doing
1182                         // this until we either get to the end of the resources
1183                         // list, or we get to a resource that needs compression.
1184
1185                         while (1) {
1186                                 if (next_resource == stream_list) {
1187                                         next_lte = NULL;
1188                                         break;
1189                                 }
1190                         #ifdef WITH_NTFS_3G
1191                                 end_wim_resource_read(next_lte, ni);
1192                                 ni = NULL;
1193                         #else
1194                                 end_wim_resource_read(next_lte);
1195                         #endif
1196
1197                                 next_lte = container_of(next_resource,
1198                                                         struct lookup_table_entry,
1199                                                         staging_list);
1200                                 next_resource = next_resource->next;
1201                                 if ((next_lte->resource_location == RESOURCE_IN_WIM
1202                                     && wimlib_get_compression_type(next_lte->wim) == out_ctype)
1203                                     || wim_resource_size(next_lte) == 0)
1204                                 {
1205                                         list_add_tail(&next_lte->staging_list,
1206                                                       &my_resources);
1207                                 } else {
1208                                         list_add_tail(&next_lte->staging_list,
1209                                                       &outstanding_resources);
1210                                         next_chunk = 0;
1211                                         next_num_chunks = wim_resource_chunks(next_lte);
1212                                         sha1_init(&next_sha_ctx);
1213                                         INIT_LIST_HEAD(&next_lte->msg_list);
1214                                 #ifdef WITH_NTFS_3G
1215                                         ret = prepare_resource_for_read(next_lte, &ni);
1216                                 #else
1217                                         ret = prepare_resource_for_read(next_lte);
1218                                 #endif
1219                                         if (ret != 0)
1220                                                 goto out;
1221                                         DEBUG2("Updated next_lte");
1222                                         break;
1223                                 }
1224                         }
1225                 }
1226
1227                 // If there are no outstanding resources, there are no more
1228                 // resources that need to be written.
1229                 if (list_empty(&outstanding_resources)) {
1230                         DEBUG("No outstanding resources! Done");
1231                         ret = 0;
1232                         goto out;
1233                 }
1234
1235                 // Get the next message from the queue and process it.
1236                 // The message will contain 1 or more data chunks that have been
1237                 // compressed.
1238                 DEBUG2("Waiting for message");
1239                 msg = shared_queue_get(compressed_res_queue);
1240                 msg->complete = true;
1241
1242                 DEBUG2("Received msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1243
1244                 list_for_each_entry(msg, &cur_lte->msg_list, list) {
1245                         DEBUG2("complete=%d", msg->complete);
1246                 }
1247
1248                 // Is this the next chunk in the current resource?  If it's not
1249                 // (i.e., an earlier chunk in a same or different resource
1250                 // hasn't been compressed yet), do nothing, and keep this
1251                 // message around until all earlier chunks are received.
1252                 //
1253                 // Otherwise, write all the chunks we can.
1254                 while (!list_empty(&cur_lte->msg_list)
1255                         && (msg = container_of(cur_lte->msg_list.next,
1256                                                struct message,
1257                                                list))->complete)
1258                 {
1259                         DEBUG2("Complete msg (begin_chunk=%"PRIu64")", msg->begin_chunk);
1260                         if (msg->begin_chunk == 0) {
1261                                 DEBUG2("Begin chunk tab");
1262
1263
1264
1265                                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1266                                         show_stream_write_progress(&cur_size,
1267                                                                    &next_size,
1268                                                                    total_size,
1269                                                                    one_percent,
1270                                                                    &cur_percent,
1271                                                                    cur_lte);
1272                                 }
1273
1274                                 // This is the first set of chunks.  Leave space
1275                                 // for the chunk table in the output file.
1276                                 off_t cur_offset = ftello(out_fp);
1277                                 if (cur_offset == -1) {
1278                                         ret = WIMLIB_ERR_WRITE;
1279                                         goto out;
1280                                 }
1281                                 ret = begin_wim_resource_chunk_tab(cur_lte,
1282                                                                    out_fp,
1283                                                                    cur_offset,
1284                                                                    &cur_chunk_tab);
1285                                 if (ret != 0)
1286                                         goto out;
1287                         }
1288
1289                         // Write the compressed chunks from the message.
1290                         ret = write_wim_chunks(msg, out_fp, cur_chunk_tab);
1291                         if (ret != 0)
1292                                 goto out;
1293
1294                         list_del(&msg->list);
1295
1296                         // This message is available to use for different chunks
1297                         // now.
1298                         list_add(&msg->list, &available_msgs);
1299
1300                         // Was this the last chunk of the stream?  If so,
1301                         // finish it.
1302                         if (list_empty(&cur_lte->msg_list) &&
1303                             msg->begin_chunk + msg->num_chunks == cur_chunk_tab->num_chunks)
1304                         {
1305                                 DEBUG2("Finish wim chunk tab");
1306                                 u64 res_csize;
1307                                 ret = finish_wim_resource_chunk_tab(cur_chunk_tab,
1308                                                                     out_fp,
1309                                                                     &res_csize);
1310                                 if (ret != 0)
1311                                         goto out;
1312
1313
1314                                 cur_lte->output_resource_entry.size =
1315                                         res_csize;
1316
1317                                 cur_lte->output_resource_entry.original_size =
1318                                         cur_lte->resource_entry.original_size;
1319
1320                                 cur_lte->output_resource_entry.offset =
1321                                         cur_chunk_tab->file_offset;
1322
1323                                 cur_lte->output_resource_entry.flags =
1324                                         cur_lte->resource_entry.flags |
1325                                                 WIM_RESHDR_FLAG_COMPRESSED;
1326
1327                                 FREE(cur_chunk_tab);
1328                                 cur_chunk_tab = NULL;
1329
1330                                 struct list_head *next = cur_lte->staging_list.next;
1331                                 list_del(&cur_lte->staging_list);
1332
1333                                 if (next == &outstanding_resources) {
1334                                         DEBUG("No more outstanding resources");
1335                                         ret = 0;
1336                                         goto out;
1337                                 } else {
1338                                         cur_lte = container_of(cur_lte->staging_list.next,
1339                                                                struct lookup_table_entry,
1340                                                                staging_list);
1341                                 }
1342
1343                                 // Since we just finished writing a stream,
1344                                 // write any streams that have been added to the
1345                                 // my_resources list for direct writing by the
1346                                 // main thread (e.g. resources that don't need
1347                                 // to be compressed because the desired
1348                                 // compression type is the same as the previous
1349                                 // compression type).
1350                                 struct lookup_table_entry *tmp;
1351                                 list_for_each_entry_safe(lte,
1352                                                          tmp,
1353                                                          &my_resources,
1354                                                          staging_list)
1355                                 {
1356                                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1357                                                 show_stream_write_progress(&cur_size,
1358                                                                            &next_size,
1359                                                                            total_size,
1360                                                                            one_percent,
1361                                                                            &cur_percent,
1362                                                                            lte);
1363                                         }
1364
1365                                         ret = write_wim_resource(lte,
1366                                                                  out_fp,
1367                                                                  out_ctype,
1368                                                                  &lte->output_resource_entry,
1369                                                                  0);
1370                                         list_del(&lte->staging_list);
1371                                         if (ret != 0)
1372                                                 goto out;
1373                                 }
1374                         }
1375                 }
1376         }
1377
1378 out:
1379 #ifdef WITH_NTFS_3G
1380         end_wim_resource_read(cur_lte, ni);
1381 #else
1382         end_wim_resource_read(cur_lte);
1383 #endif
1384         if (ret == 0) {
1385                 list_for_each_entry(lte, &my_resources, staging_list) {
1386                         ret = write_wim_resource(lte, out_fp,
1387                                                  out_ctype,
1388                                                  &lte->output_resource_entry,
1389                                                  0);
1390                         if (ret != 0)
1391                                 break;
1392                         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1393                                 show_stream_write_progress(&cur_size,
1394                                                            &next_size,
1395                                                            total_size,
1396                                                            one_percent,
1397                                                            &cur_percent,
1398                                                            lte);
1399                         }
1400                 }
1401                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1402                         finish_stream_write_progress(total_size);
1403         } else {
1404                 size_t num_available_msgs = 0;
1405                 struct list_head *cur;
1406
1407                 list_for_each(cur, &available_msgs) {
1408                         num_available_msgs++;
1409                 }
1410
1411                 while (num_available_msgs < ARRAY_LEN(msgs)) {
1412                         shared_queue_get(compressed_res_queue);
1413                         num_available_msgs++;
1414                 }
1415         }
1416
1417         DEBUG("Freeing messages");
1418
1419         for (size_t i = 0; i < ARRAY_LEN(msgs); i++) {
1420                 for (size_t j = 0; j < MAX_CHUNKS_PER_MSG; j++) {
1421                         FREE(msgs[i].compressed_chunks[j]);
1422                         FREE(msgs[i].uncompressed_chunks[j]);
1423                 }
1424         }
1425
1426         if (cur_chunk_tab != NULL)
1427                 FREE(cur_chunk_tab);
1428         return ret;
1429 }
1430
1431
1432 static const char *get_data_type(int ctype)
1433 {
1434         switch (ctype) {
1435         case WIM_COMPRESSION_TYPE_NONE:
1436                 return "uncompressed";
1437         case WIM_COMPRESSION_TYPE_LZX:
1438                 return "LZX-compressed";
1439         case WIM_COMPRESSION_TYPE_XPRESS:
1440                 return "XPRESS-compressed";
1441         }
1442 }
1443
1444 static int write_stream_list_parallel(struct list_head *stream_list,
1445                                       FILE *out_fp, int out_ctype,
1446                                       int write_flags, u64 total_size,
1447                                       unsigned num_threads)
1448 {
1449         int ret;
1450         struct shared_queue res_to_compress_queue;
1451         struct shared_queue compressed_res_queue;
1452         pthread_t *compressor_threads = NULL;
1453
1454         if (num_threads == 0) {
1455                 long nthreads = sysconf(_SC_NPROCESSORS_ONLN);
1456                 if (nthreads < 1) {
1457                         WARNING("Could not determine number of processors! Assuming 1");
1458                         goto out_serial;
1459                 } else {
1460                         num_threads = nthreads;
1461                 }
1462         }
1463
1464         wimlib_assert(stream_list->next != stream_list);
1465
1466
1467         static const double MESSAGES_PER_THREAD = 2.0;
1468         size_t queue_size = (size_t)(num_threads * MESSAGES_PER_THREAD);
1469
1470         DEBUG("Initializing shared queues (queue_size=%zu)", queue_size);
1471
1472         ret = shared_queue_init(&res_to_compress_queue, queue_size);
1473         if (ret != 0)
1474                 goto out_serial;
1475
1476         ret = shared_queue_init(&compressed_res_queue, queue_size);
1477         if (ret != 0)
1478                 goto out_destroy_res_to_compress_queue;
1479
1480         struct compressor_thread_params params;
1481         params.res_to_compress_queue = &res_to_compress_queue;
1482         params.compressed_res_queue = &compressed_res_queue;
1483         params.compress = get_compress_func(out_ctype);
1484
1485         compressor_threads = MALLOC(num_threads * sizeof(pthread_t));
1486
1487         for (unsigned i = 0; i < num_threads; i++) {
1488                 DEBUG("pthread_create thread %u", i);
1489                 ret = pthread_create(&compressor_threads[i], NULL,
1490                                      compressor_thread_proc, &params);
1491                 if (ret != 0) {
1492                         ret = -1;
1493                         ERROR_WITH_ERRNO("Failed to create compressor "
1494                                          "thread %u", i);
1495                         num_threads = i;
1496                         goto out_join;
1497                 }
1498         }
1499
1500         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1501                 printf("Writing %s compressed data using %u threads...\n",
1502                        get_data_type(out_ctype), num_threads);
1503         }
1504
1505         ret = main_writer_thread_proc(stream_list,
1506                                       out_fp,
1507                                       out_ctype,
1508                                       &res_to_compress_queue,
1509                                       &compressed_res_queue,
1510                                       queue_size,
1511                                       write_flags,
1512                                       total_size);
1513
1514 out_join:
1515         for (unsigned i = 0; i < num_threads; i++)
1516                 shared_queue_put(&res_to_compress_queue, NULL);
1517
1518         for (unsigned i = 0; i < num_threads; i++) {
1519                 if (pthread_join(compressor_threads[i], NULL)) {
1520                         WARNING("Failed to join compressor thread %u: %s",
1521                                 i, strerror(errno));
1522                 }
1523         }
1524         FREE(compressor_threads);
1525         shared_queue_destroy(&compressed_res_queue);
1526 out_destroy_res_to_compress_queue:
1527         shared_queue_destroy(&res_to_compress_queue);
1528         if (ret >= 0 && ret != WIMLIB_ERR_NOMEM)
1529                 return ret;
1530 out_serial:
1531         WARNING("Falling back to single-threaded compression");
1532         return write_stream_list_serial(stream_list, out_fp,
1533                                         out_ctype, write_flags, total_size);
1534 }
1535 #endif
1536
1537 static int write_stream_list(struct list_head *stream_list, FILE *out_fp,
1538                              int out_ctype, int write_flags,
1539                              unsigned num_threads)
1540 {
1541         struct lookup_table_entry *lte;
1542         size_t num_streams = 0;
1543         u64 total_size = 0;
1544         bool compression_needed = false;
1545
1546         list_for_each_entry(lte, stream_list, staging_list) {
1547                 num_streams++;
1548                 total_size += wim_resource_size(lte);
1549                 if (!compression_needed
1550                     && out_ctype != WIM_COMPRESSION_TYPE_NONE
1551                     && (lte->resource_location != RESOURCE_IN_WIM
1552                         || wimlib_get_compression_type(lte->wim) != out_ctype)
1553                     && wim_resource_size(lte) != 0)
1554                         compression_needed = true;
1555         }
1556
1557         if (write_flags & WIMLIB_WRITE_FLAG_VERBOSE) {
1558                 printf("Preparing to write %zu streams "
1559                        "(%"PRIu64" total bytes uncompressed)\n",
1560                        num_streams, total_size);
1561                 printf("Using compression type %s\n",
1562                        wimlib_get_compression_type_string(out_ctype));
1563         }
1564
1565 #ifdef ENABLE_MULTITHREADED_COMPRESSION
1566         if (compression_needed && total_size >= 1000000 && num_threads != 1) {
1567                 return write_stream_list_parallel(stream_list, out_fp,
1568                                                   out_ctype, write_flags,
1569                                                   total_size, num_threads);
1570         }
1571         else
1572 #endif
1573         {
1574                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
1575                         const char *reason = "";
1576                         if (!compression_needed)
1577                                 reason = " (no compression needed)";
1578                         printf("Writing %s data using 1 thread%s\n",
1579                                get_data_type(out_ctype), reason);
1580                 }
1581
1582                 return write_stream_list_serial(stream_list, out_fp,
1583                                                 out_ctype, write_flags,
1584                                                 total_size);
1585         }
1586 }
1587
1588
1589 static int dentry_find_streams_to_write(struct dentry *dentry,
1590                                         void *wim)
1591 {
1592         WIMStruct *w = wim;
1593         struct list_head *stream_list = w->private;
1594         struct lookup_table_entry *lte;
1595         for (unsigned i = 0; i <= dentry->d_inode->num_ads; i++) {
1596                 lte = inode_stream_lte(dentry->d_inode, i, w->lookup_table);
1597                 if (lte && ++lte->out_refcnt == 1)
1598                         list_add(&lte->staging_list, stream_list);
1599         }
1600         return 0;
1601 }
1602
1603 static int find_streams_to_write(WIMStruct *w)
1604 {
1605         return for_dentry_in_tree(wim_root_dentry(w),
1606                                   dentry_find_streams_to_write, w);
1607 }
1608
1609 static int write_wim_streams(WIMStruct *w, int image, int write_flags,
1610                              unsigned num_threads)
1611 {
1612
1613         LIST_HEAD(stream_list);
1614
1615         w->private = &stream_list;
1616         for_image(w, image, find_streams_to_write);
1617         return write_stream_list(&stream_list, w->out_fp,
1618                                  wimlib_get_compression_type(w), write_flags,
1619                                  num_threads);
1620 }
1621
1622 /*
1623  * Write the lookup table, xml data, and integrity table, then overwrite the WIM
1624  * header.
1625  */
1626 int finish_write(WIMStruct *w, int image, int write_flags)
1627 {
1628         off_t lookup_table_offset;
1629         off_t xml_data_offset;
1630         off_t lookup_table_size;
1631         off_t integrity_offset;
1632         off_t xml_data_size;
1633         off_t end_offset;
1634         off_t integrity_size;
1635         int ret;
1636         struct wim_header hdr;
1637         FILE *out = w->out_fp;
1638
1639         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1640                 /* Write the lookup table. */
1641                 lookup_table_offset = ftello(out);
1642                 if (lookup_table_offset == -1)
1643                         return WIMLIB_ERR_WRITE;
1644
1645                 DEBUG("Writing lookup table (offset %"PRIu64")",
1646                       lookup_table_offset);
1647                 ret = write_lookup_table(w->lookup_table, out);
1648                 if (ret != 0)
1649                         return ret;
1650         }
1651
1652         xml_data_offset = ftello(out);
1653         if (xml_data_offset == -1)
1654                 return WIMLIB_ERR_WRITE;
1655
1656         /* @hdr will be the header for the new WIM.  First copy all the data
1657          * from the header in the WIMStruct; then set all the fields that may
1658          * have changed, including the resource entries, boot index, and image
1659          * count.  */
1660         memcpy(&hdr, &w->hdr, sizeof(struct wim_header));
1661         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
1662                 lookup_table_size = xml_data_offset - lookup_table_offset;
1663                 hdr.lookup_table_res_entry.offset = lookup_table_offset;
1664                 hdr.lookup_table_res_entry.size = lookup_table_size;
1665         }
1666         hdr.lookup_table_res_entry.original_size = hdr.lookup_table_res_entry.size;
1667         hdr.lookup_table_res_entry.flags = WIM_RESHDR_FLAG_METADATA;
1668
1669         DEBUG("Writing XML data (offset %"PRIu64")", xml_data_offset);
1670         ret = write_xml_data(w->wim_info, image, out,
1671                              (write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE) ?
1672                                 wim_info_get_total_bytes(w->wim_info) : 0);
1673         if (ret != 0)
1674                 return ret;
1675
1676         integrity_offset = ftello(out);
1677         if (integrity_offset == -1)
1678                 return WIMLIB_ERR_WRITE;
1679         xml_data_size = integrity_offset - xml_data_offset;
1680
1681         hdr.xml_res_entry.offset                 = xml_data_offset;
1682         hdr.xml_res_entry.size                   = xml_data_size;
1683         hdr.xml_res_entry.original_size          = xml_data_size;
1684         hdr.xml_res_entry.flags                  = 0;
1685
1686         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
1687                 ret = write_integrity_table(out, WIM_HEADER_DISK_SIZE,
1688                                             xml_data_offset,
1689                                             write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS);
1690                 if (ret != 0)
1691                         return ret;
1692                 end_offset = ftello(out);
1693                 if (end_offset == -1)
1694                         return WIMLIB_ERR_WRITE;
1695                 integrity_size              = end_offset - integrity_offset;
1696                 hdr.integrity.offset        = integrity_offset;
1697                 hdr.integrity.size          = integrity_size;
1698                 hdr.integrity.original_size = integrity_size;
1699         } else {
1700                 hdr.integrity.offset        = 0;
1701                 hdr.integrity.size          = 0;
1702                 hdr.integrity.original_size = 0;
1703         }
1704         hdr.integrity.flags = 0;
1705
1706         DEBUG("Updating WIM header.");
1707
1708         /*
1709          * In the WIM header, there is room for the resource entry for a
1710          * metadata resource labeled as the "boot metadata".  This entry should
1711          * be zeroed out if there is no bootable image (boot_idx 0).  Otherwise,
1712          * it should be a copy of the resource entry for the image that is
1713          * marked as bootable.  This is not well documented...
1714          */
1715         if (hdr.boot_idx == 0 || !w->image_metadata
1716                         || (image != WIM_ALL_IMAGES && image != hdr.boot_idx)) {
1717                 memset(&hdr.boot_metadata_res_entry, 0,
1718                        sizeof(struct resource_entry));
1719         } else {
1720                 memcpy(&hdr.boot_metadata_res_entry,
1721                        &w->image_metadata[
1722                           hdr.boot_idx - 1].metadata_lte->output_resource_entry,
1723                        sizeof(struct resource_entry));
1724         }
1725
1726         /* Set image count and boot index correctly for single image writes */
1727         if (image != WIM_ALL_IMAGES) {
1728                 hdr.image_count = 1;
1729                 if (hdr.boot_idx == image)
1730                         hdr.boot_idx = 1;
1731                 else
1732                         hdr.boot_idx = 0;
1733         }
1734
1735
1736         if (fseeko(out, 0, SEEK_SET) != 0)
1737                 return WIMLIB_ERR_WRITE;
1738
1739         ret = write_header(&hdr, out);
1740         if (ret != 0)
1741                 return ret;
1742
1743         DEBUG("Closing output file.");
1744         wimlib_assert(w->out_fp != NULL);
1745         if (fclose(w->out_fp) != 0) {
1746                 ERROR_WITH_ERRNO("Failed to close the WIM file");
1747                 ret = WIMLIB_ERR_WRITE;
1748         }
1749         w->out_fp = NULL;
1750         return ret;
1751 }
1752
1753 /* Open file stream and write dummy header for WIM. */
1754 int begin_write(WIMStruct *w, const char *path, int write_flags)
1755 {
1756         const char *mode;
1757         DEBUG("Opening `%s' for new WIM", path);
1758
1759         /* checking the integrity requires going back over the file to read it.
1760          * XXX
1761          * (It also would be possible to keep a running sha1sum as the file is
1762          * written-- this would be faster, but a bit more complicated) */
1763         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
1764                 mode = "w+b";
1765         else
1766                 mode = "wb";
1767
1768         if (w->out_fp)
1769                 fclose(w->out_fp);
1770
1771         w->out_fp = fopen(path, mode);
1772         if (!w->out_fp) {
1773                 ERROR_WITH_ERRNO("Failed to open the file `%s' for writing",
1774                                  path);
1775                 return WIMLIB_ERR_OPEN;
1776         }
1777
1778         /* Write dummy header. It will be overwritten later. */
1779         return write_header(&w->hdr, w->out_fp);
1780 }
1781
1782 /* Writes a stand-alone WIM to a file.  */
1783 WIMLIBAPI int wimlib_write(WIMStruct *w, const char *path,
1784                            int image, int write_flags, unsigned num_threads)
1785 {
1786         int ret;
1787
1788         if (!w || !path)
1789                 return WIMLIB_ERR_INVALID_PARAM;
1790
1791         write_flags &= ~WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE;
1792
1793         if (image != WIM_ALL_IMAGES &&
1794              (image < 1 || image > w->hdr.image_count))
1795                 return WIMLIB_ERR_INVALID_IMAGE;
1796
1797
1798         if (w->hdr.total_parts != 1) {
1799                 ERROR("Cannot call wimlib_write() on part of a split WIM");
1800                 return WIMLIB_ERR_SPLIT_UNSUPPORTED;
1801         }
1802
1803         if (image == WIM_ALL_IMAGES)
1804                 DEBUG("Writing all images to `%s'.", path);
1805         else
1806                 DEBUG("Writing image %d to `%s'.", image, path);
1807
1808         ret = begin_write(w, path, write_flags);
1809         if (ret != 0)
1810                 return ret;
1811
1812         for_lookup_table_entry(w->lookup_table, lte_zero_out_refcnt, NULL);
1813
1814         ret = write_wim_streams(w, image, write_flags, num_threads);
1815
1816         if (ret != 0) {
1817                 /*ERROR("Failed to write WIM file resources to `%s'", path);*/
1818                 return ret;
1819         }
1820
1821         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1822                 printf("Writing image metadata...\n");
1823
1824         ret = for_image(w, image, write_metadata_resource);
1825
1826         if (ret != 0) {
1827                 /*ERROR("Failed to write WIM image metadata to `%s'", path);*/
1828                 return ret;
1829         }
1830
1831         ret = finish_write(w, image, write_flags);
1832         if (ret != 0)
1833                 return ret;
1834
1835         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
1836                 printf("Successfully wrote `%s'\n", path);
1837         return 0;
1838 }