wimlib.net Git - wimlib/blob - src/write.c

   1 /*
   2  * write.c
   3  *
   4  * Support for writing WIM files; write a WIM file, overwrite a WIM file, write
   5  * compressed file resources, etc.
   6  */
   7
   8 /*
   9  * Copyright (C) 2012, 2013 Eric Biggers
  10  *
  11  * This file is part of wimlib, a library for working with WIM files.
  12  *
  13  * wimlib is free software; you can redistribute it and/or modify it under the
  14  * terms of the GNU General Public License as published by the Free
  15  * Software Foundation; either version 3 of the License, or (at your option)
  16  * any later version.
  17  *
  18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
  21  * details.
  22  *
  23  * You should have received a copy of the GNU General Public License
  24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
  25  */
  26
  27 #ifdef HAVE_CONFIG_H
  28 #  include "config.h"
  29 #endif
  30
  31 #if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK)
  32 /* On BSD, this should be included before "wimlib/list.h" so that "wimlib/list.h" can
  33  * overwrite the LIST_HEAD macro. */
  34 #  include <sys/file.h>
  35 #endif
  36
  37 #include "wimlib/endianness.h"
  38 #include "wimlib/error.h"
  39 #include "wimlib/file_io.h"
  40 #include "wimlib/header.h"
  41 #include "wimlib/integrity.h"
  42 #include "wimlib/lookup_table.h"
  43 #include "wimlib/metadata.h"
  44 #include "wimlib/resource.h"
  45 #include "wimlib/write.h"
  46 #include "wimlib/xml.h"
  47
  48 #ifdef __WIN32__
  49 #  include "wimlib/win32.h" /* win32_get_number_of_processors() */
  50 #endif
  51
  52 #ifdef ENABLE_MULTITHREADED_COMPRESSION
  53 #  include <pthread.h>
  54 #endif
  55
  56 #include <errno.h>
  57 #include <fcntl.h>
  58 #include <limits.h>
  59 #include <stdlib.h>
  60 #include <unistd.h>
  61
  62 #ifdef HAVE_ALLOCA_H
  63 #  include <alloca.h>
  64 #endif
  65
  66
  67 #ifndef __WIN32__
  68 #  include <sys/uio.h> /* for `struct iovec' */
  69 #endif
  70
  71 static int
  72 alloc_lzx_context(int write_resource_flags, struct wimlib_lzx_context **ctx_pp)
  73 {
  74         struct wimlib_lzx_params params;
  75         params.size_of_this = sizeof(params);
  76         if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_COMPRESS_SLOW)
  77                 params.algorithm = WIMLIB_LZX_ALGORITHM_SLOW;
  78         else
  79                 params.algorithm = WIMLIB_LZX_ALGORITHM_FAST;
  80         params.use_defaults = 1;
  81         return wimlib_lzx_alloc_context(&params, ctx_pp);
  82 }
  83
  84 static unsigned
  85 compress_chunk(const void * uncompressed_data,
  86                unsigned uncompressed_len,
  87                void *compressed_data,
  88                int out_ctype,
  89                struct wimlib_lzx_context *comp_ctx)
  90 {
  91         switch (out_ctype) {
  92         case WIMLIB_COMPRESSION_TYPE_XPRESS:
  93                 return wimlib_xpress_compress(uncompressed_data,
  94                                               uncompressed_len,
  95                                               compressed_data);
  96         case WIMLIB_COMPRESSION_TYPE_LZX:
  97                 return wimlib_lzx_compress2(uncompressed_data,
  98                                             uncompressed_len,
  99                                             compressed_data,
 100                                             comp_ctx);
 101         default:
 102                 wimlib_assert(0);
 103                 return 0;
 104         }
 105 }
 106
 107 /* Chunk table that's located at the beginning of each compressed resource in
 108  * the WIM.  (This is not the on-disk format; the on-disk format just has an
 109  * array of offsets.) */
 110 struct chunk_table {
 111         u64 original_resource_size;
 112         u64 num_chunks;
 113         u64 table_disk_size;
 114         unsigned bytes_per_chunk_entry;
 115         void *cur_offset_p;
 116         union {
 117                 u32 cur_offset_u32;
 118                 u64 cur_offset_u64;
 119         };
 120         /* Beginning of chunk offsets, in either 32-bit or 64-bit little endian
 121          * integers, including the first offset of 0, which will not be written.
 122          * */
 123         u8 offsets[] _aligned_attribute(8);
 124 };
 125
 126 /* Allocate and initializes a chunk table, then reserve space for it in the
 127  * output file unless writing a pipable resource.  */
 128 static int
 129 begin_wim_resource_chunk_tab(const struct wim_lookup_table_entry *lte,
 130                              struct filedes *out_fd,
 131                              struct chunk_table **chunk_tab_ret,
 132                              int resource_flags)
 133 {
 134         u64 size;
 135         u64 num_chunks;
 136         unsigned bytes_per_chunk_entry;
 137         size_t alloc_size;
 138         struct chunk_table *chunk_tab;
 139         int ret;
 140
 141         size = wim_resource_size(lte);
 142         num_chunks = wim_resource_chunks(lte);
 143         bytes_per_chunk_entry = (size > (1ULL << 32)) ? 8 : 4;
 144         alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
 145         chunk_tab = CALLOC(1, alloc_size);
 146
 147         if (!chunk_tab) {
 148                 ERROR("Failed to allocate chunk table for %"PRIu64" byte "
 149                       "resource", size);
 150                 return WIMLIB_ERR_NOMEM;
 151         }
 152         chunk_tab->num_chunks = num_chunks;
 153         chunk_tab->original_resource_size = size;
 154         chunk_tab->bytes_per_chunk_entry = bytes_per_chunk_entry;
 155         chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
 156                                      (num_chunks - 1);
 157         chunk_tab->cur_offset_p = chunk_tab->offsets;
 158
 159         /* We don't know the correct offsets yet; so just write zeroes to
 160          * reserve space for the table, so we can go back to it later after
 161          * we've written the compressed chunks following it.
 162          *
 163          * Special case: if writing a pipable WIM, compressed resources are in a
 164          * modified format (see comment above write_pipable_wim()) and do not
 165          * have a chunk table at the beginning, so don't reserve any space for
 166          * one.  */
 167         if (!(resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE)) {
 168                 ret = full_write(out_fd, chunk_tab->offsets,
 169                                  chunk_tab->table_disk_size);
 170                 if (ret) {
 171                         ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
 172                                          "file resource");
 173                         FREE(chunk_tab);
 174                         return ret;
 175                 }
 176         }
 177         *chunk_tab_ret = chunk_tab;
 178         return 0;
 179 }
 180
 181 /* Add the offset for the next chunk to the chunk table being constructed for a
 182  * compressed stream. */
 183 static void
 184 chunk_tab_record_chunk(struct chunk_table *chunk_tab, unsigned out_chunk_size)
 185 {
 186         if (chunk_tab->bytes_per_chunk_entry == 4) {
 187                 *(le32*)chunk_tab->cur_offset_p = cpu_to_le32(chunk_tab->cur_offset_u32);
 188                 chunk_tab->cur_offset_p = (le32*)chunk_tab->cur_offset_p + 1;
 189                 chunk_tab->cur_offset_u32 += out_chunk_size;
 190         } else {
 191                 *(le64*)chunk_tab->cur_offset_p = cpu_to_le64(chunk_tab->cur_offset_u64);
 192                 chunk_tab->cur_offset_p = (le64*)chunk_tab->cur_offset_p + 1;
 193                 chunk_tab->cur_offset_u64 += out_chunk_size;
 194         }
 195 }
 196
 197 /* Finishes a WIM chunk table and writes it to the output file at the correct
 198  * offset.  */
 199 static int
 200 finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
 201                               struct filedes *out_fd,
 202                               off_t res_start_offset,
 203                               int write_resource_flags)
 204 {
 205         int ret;
 206
 207         if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) {
 208                 ret = full_write(out_fd,
 209                                  chunk_tab->offsets +
 210                                          chunk_tab->bytes_per_chunk_entry,
 211                                  chunk_tab->table_disk_size);
 212         } else {
 213                 ret  = full_pwrite(out_fd,
 214                                    chunk_tab->offsets +
 215                                            chunk_tab->bytes_per_chunk_entry,
 216                                    chunk_tab->table_disk_size,
 217                                    res_start_offset);
 218         }
 219         if (ret) {
 220                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
 221                                  "file resource");
 222         }
 223         return ret;
 224 }
 225
 226 /* Write the header for a stream in a pipable WIM.
 227  */
 228 static int
 229 write_pwm_stream_header(const struct wim_lookup_table_entry *lte,
 230                         struct filedes *out_fd,
 231                         int additional_reshdr_flags)
 232 {
 233         struct pwm_stream_hdr stream_hdr;
 234         u32 reshdr_flags;
 235         int ret;
 236
 237         stream_hdr.magic = PWM_STREAM_MAGIC;
 238         stream_hdr.uncompressed_size = cpu_to_le64(lte->resource_entry.original_size);
 239         if (additional_reshdr_flags & PWM_RESHDR_FLAG_UNHASHED) {
 240                 zero_out_hash(stream_hdr.hash);
 241         } else {
 242                 wimlib_assert(!lte->unhashed);
 243                 copy_hash(stream_hdr.hash, lte->hash);
 244         }
 245
 246         reshdr_flags = lte->resource_entry.flags & ~WIM_RESHDR_FLAG_COMPRESSED;
 247         reshdr_flags |= additional_reshdr_flags;
 248         stream_hdr.flags = cpu_to_le32(reshdr_flags);
 249         ret = full_write(out_fd, &stream_hdr, sizeof(stream_hdr));
 250         if (ret)
 251                 ERROR_WITH_ERRNO("Error writing stream header");
 252         return ret;
 253 }
 254
 255 static int
 256 seek_and_truncate(struct filedes *out_fd, off_t offset)
 257 {
 258         if (filedes_seek(out_fd, offset) == -1 ||
 259             ftruncate(out_fd->fd, offset))
 260         {
 261                 ERROR_WITH_ERRNO("Failed to truncate output WIM file");
 262                 return WIMLIB_ERR_WRITE;
 263         }
 264         return 0;
 265 }
 266
 267 static int
 268 finalize_and_check_sha1(SHA_CTX *sha_ctx, struct wim_lookup_table_entry *lte)
 269 {
 270         u8 md[SHA1_HASH_SIZE];
 271
 272         sha1_final(md, sha_ctx);
 273         if (lte->unhashed) {
 274                 copy_hash(lte->hash, md);
 275         } else if (!hashes_equal(md, lte->hash)) {
 276                 ERROR("WIM resource has incorrect hash!");
 277                 if (lte_filename_valid(lte)) {
 278                         ERROR("We were reading it from \"%"TS"\"; maybe "
 279                               "it changed while we were reading it.",
 280                               lte->file_on_disk);
 281                 }
 282                 return WIMLIB_ERR_INVALID_RESOURCE_HASH;
 283         }
 284         return 0;
 285 }
 286
 287 struct write_resource_ctx {
 288         int out_ctype;
 289         struct wimlib_lzx_context *comp_ctx;
 290         struct chunk_table *chunk_tab;
 291         struct filedes *out_fd;
 292         SHA_CTX sha_ctx;
 293         bool doing_sha;
 294         int resource_flags;
 295 };
 296
 297 static int
 298 write_resource_cb(const void *chunk, size_t chunk_size, void *_ctx)
 299 {
 300         struct write_resource_ctx *ctx = _ctx;
 301         const void *out_chunk;
 302         unsigned out_chunk_size;
 303         int ret;
 304
 305         if (ctx->doing_sha)
 306                 sha1_update(&ctx->sha_ctx, chunk, chunk_size);
 307
 308         out_chunk = chunk;
 309         out_chunk_size = chunk_size;
 310         if (ctx->out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
 311                 void *compressed_chunk;
 312                 unsigned compressed_size;
 313
 314                 /* Compress the chunk.  */
 315                 compressed_chunk = alloca(chunk_size);
 316
 317                 compressed_size = compress_chunk(chunk, chunk_size,
 318                                                  compressed_chunk,
 319                                                  ctx->out_ctype,
 320                                                  ctx->comp_ctx);
 321                 /* Use compressed data if compression to less than input size
 322                  * was successful.  */
 323                 if (compressed_size) {
 324                         out_chunk = compressed_chunk;
 325                         out_chunk_size = compressed_size;
 326                 }
 327         }
 328
 329         if (ctx->chunk_tab) {
 330                 /* Update chunk table accounting.  */
 331                 chunk_tab_record_chunk(ctx->chunk_tab, out_chunk_size);
 332
 333                 /* If writing compressed chunks to a pipable WIM, before the
 334                  * chunk data write a chunk header that provides the compressed
 335                  * chunk size.  */
 336                 if (ctx->resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) {
 337                         struct pwm_chunk_hdr chunk_hdr = {
 338                                 .compressed_size = cpu_to_le32(out_chunk_size),
 339                         };
 340                         ret = full_write(ctx->out_fd, &chunk_hdr,
 341                                          sizeof(chunk_hdr));
 342                         if (ret)
 343                                 goto error;
 344                 }
 345         }
 346
 347         /* Write the chunk data.  */
 348         ret = full_write(ctx->out_fd, out_chunk, out_chunk_size);
 349         if (ret)
 350                 goto error;
 351         return 0;
 352
 353 error:
 354         ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
 355         return ret;
 356 }
 357
 358 /*
 359  * write_wim_resource()-
 360  *
 361  * Write a resource to an output WIM.
 362  *
 363  * @lte:
 364  *      Lookup table entry for the resource, which could be in another WIM, in
 365  *      an external file, or in another location.
 366  *
 367  * @out_fd:
 368  *      File descriptor opened to the output WIM.
 369  *
 370  * @out_ctype:
 371  *      One of the WIMLIB_COMPRESSION_TYPE_* constants to indicate which
 372  *      compression algorithm to use.
 373  *
 374  * @out_res_entry:
 375  *      On success, this is filled in with the offset, flags, compressed size,
 376  *      and uncompressed size of the resource in the output WIM.
 377  *
 378  * @resource_flags:
 379  *      * WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS to force data to be recompressed even
 380  *        if it could otherwise be copied directly from the input;
 381  *      * WIMLIB_WRITE_RESOURCE_FLAG_COMPRESS_SLOW to compress the data as much
 382  *        as possible;
 383  *      * WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE if writing a resource for a pipable WIM
 384  *        (and the output file descriptor may be a pipe).
 385  *
 386  * Additional notes:  The SHA1 message digest of the uncompressed data is
 387  * calculated (except when doing a raw copy --- see below).  If the @unhashed
 388  * flag is set on the lookup table entry, this message digest is simply copied
 389  * to it; otherwise, the message digest is compared with the existing one, and
 390  * the function will fail if they do not match.
 391  */
 392 int
 393 write_wim_resource(struct wim_lookup_table_entry *lte,
 394                    struct filedes *out_fd, int out_ctype,
 395                    struct resource_entry *out_res_entry,
 396                    int resource_flags,
 397                    struct wimlib_lzx_context **comp_ctx)
 398 {
 399         struct write_resource_ctx write_ctx;
 400         off_t res_start_offset;
 401         u64 read_size;
 402         int ret;
 403
 404         /* Mask out any irrelevant flags, since this function also uses this
 405          * variable to store WIMLIB_READ_RESOURCE flags.  */
 406         resource_flags &= WIMLIB_WRITE_RESOURCE_MASK;
 407
 408         /* Get current position in output WIM.  */
 409         res_start_offset = out_fd->offset;
 410
 411         /* If we are not forcing the data to be recompressed, and the input
 412          * resource is located in a WIM with the same compression type as that
 413          * desired other than no compression, we can simply copy the compressed
 414          * data without recompressing it.  This also means we must skip
 415          * calculating the SHA1, as we never will see the uncompressed data.  */
 416         if (lte->resource_location == RESOURCE_IN_WIM &&
 417             out_ctype == wim_resource_compression_type(lte) &&
 418             out_ctype != WIMLIB_COMPRESSION_TYPE_NONE &&
 419             !(resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS))
 420         {
 421                 /* Normally we can request a RAW_FULL read, but if we're reading
 422                  * from a pipable resource and writing a non-pipable resource or
 423                  * vice versa, then a RAW_CHUNKS read needs to be requested so
 424                  * that the written resource can be appropriately formatted.
 425                  * However, in neither case is any actual decompression needed.
 426                  */
 427                 if (lte->is_pipable == !!(resource_flags &
 428                                           WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE))
 429                         resource_flags |= WIMLIB_READ_RESOURCE_FLAG_RAW_FULL;
 430                 else
 431                         resource_flags |= WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS;
 432                 write_ctx.doing_sha = false;
 433                 read_size = lte->resource_entry.size;
 434         } else {
 435                 write_ctx.doing_sha = true;
 436                 sha1_init(&write_ctx.sha_ctx);
 437                 read_size = lte->resource_entry.original_size;
 438         }
 439
 440
 441         /* If the output resource is to be compressed, initialize the chunk
 442          * table and set the function to use for chunk compression.  Exceptions:
 443          * no compression function is needed if doing a raw copy; also, no chunk
 444          * table is needed if doing a *full* (not per-chunk) raw copy.  */
 445         write_ctx.out_ctype = WIMLIB_COMPRESSION_TYPE_NONE;
 446         write_ctx.chunk_tab = NULL;
 447         if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
 448                 if (!(resource_flags & WIMLIB_READ_RESOURCE_FLAG_RAW)) {
 449                         write_ctx.out_ctype = out_ctype;
 450                         if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) {
 451                                 ret = alloc_lzx_context(resource_flags, comp_ctx);
 452                                 if (ret)
 453                                         goto out;
 454                         }
 455                         write_ctx.comp_ctx = *comp_ctx;
 456                 }
 457                 if (!(resource_flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL)) {
 458                         ret = begin_wim_resource_chunk_tab(lte, out_fd,
 459                                                            &write_ctx.chunk_tab,
 460                                                            resource_flags);
 461                         if (ret)
 462                                 goto out;
 463                 }
 464         }
 465
 466         /* If writing a pipable resource, write the stream header and update
 467          * @res_start_offset to be the end of the stream header.  */
 468         if (resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) {
 469                 int reshdr_flags = 0;
 470                 if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE)
 471                         reshdr_flags |= WIM_RESHDR_FLAG_COMPRESSED;
 472                 ret = write_pwm_stream_header(lte, out_fd, reshdr_flags);
 473                 if (ret)
 474                         goto out_free_chunk_tab;
 475                 res_start_offset = out_fd->offset;
 476         }
 477
 478         /* Write the entire resource by reading the entire resource and feeding
 479          * the data through the write_resource_cb function. */
 480         write_ctx.out_fd = out_fd;
 481         write_ctx.resource_flags = resource_flags;
 482 try_write_again:
 483         ret = read_resource_prefix(lte, read_size,
 484                                    write_resource_cb, &write_ctx, resource_flags);
 485         if (ret)
 486                 goto out_free_chunk_tab;
 487
 488         /* Verify SHA1 message digest of the resource, or set the hash for the
 489          * first time. */
 490         if (write_ctx.doing_sha) {
 491                 ret = finalize_and_check_sha1(&write_ctx.sha_ctx, lte);
 492                 if (ret)
 493                         goto out_free_chunk_tab;
 494         }
 495
 496         /* Write chunk table if needed.  */
 497         if (write_ctx.chunk_tab) {
 498                 ret = finish_wim_resource_chunk_tab(write_ctx.chunk_tab,
 499                                                     out_fd,
 500                                                     res_start_offset,
 501                                                     resource_flags);
 502                 if (ret)
 503                         goto out_free_chunk_tab;
 504         }
 505
 506         /* Fill in out_res_entry with information about the newly written
 507          * resource.  */
 508         out_res_entry->size          = out_fd->offset - res_start_offset;
 509         out_res_entry->flags         = lte->resource_entry.flags;
 510         if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
 511                 out_res_entry->flags &= ~WIM_RESHDR_FLAG_COMPRESSED;
 512         else
 513                 out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
 514         out_res_entry->offset        = res_start_offset;
 515         out_res_entry->original_size = wim_resource_size(lte);
 516
 517         /* Check for resources compressed to greater than their original size
 518          * and write them uncompressed instead.  (But never do this if writing
 519          * to a pipe, and don't bother if we did a raw copy.)  */
 520         if (out_res_entry->size > out_res_entry->original_size &&
 521             !(resource_flags & (WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE |
 522                                 WIMLIB_READ_RESOURCE_FLAG_RAW)))
 523         {
 524                 DEBUG("Compressed %"PRIu64" => %"PRIu64" bytes; "
 525                       "writing uncompressed instead",
 526                       out_res_entry->original_size, out_res_entry->size);
 527                 ret = seek_and_truncate(out_fd, res_start_offset);
 528                 if (ret)
 529                         goto out_free_chunk_tab;
 530                 out_ctype = WIMLIB_COMPRESSION_TYPE_NONE;
 531                 FREE(write_ctx.chunk_tab);
 532                 write_ctx.out_ctype = WIMLIB_COMPRESSION_TYPE_NONE;
 533                 write_ctx.chunk_tab = NULL;
 534                 write_ctx.doing_sha = false;
 535                 goto try_write_again;
 536         }
 537         if (resource_flags & (WIMLIB_READ_RESOURCE_FLAG_RAW)) {
 538                 DEBUG("Copied raw compressed data "
 539                       "(%"PRIu64" => %"PRIu64" bytes @ +%"PRIu64", flags=0x%02x)",
 540                       out_res_entry->original_size, out_res_entry->size,
 541                       out_res_entry->offset, out_res_entry->flags);
 542         } else if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
 543                 DEBUG("Wrote compressed resource "
 544                       "(%"PRIu64" => %"PRIu64" bytes @ +%"PRIu64", flags=0x%02x)",
 545                       out_res_entry->original_size, out_res_entry->size,
 546                       out_res_entry->offset, out_res_entry->flags);
 547         } else {
 548                 DEBUG("Wrote uncompressed resource "
 549                       "(%"PRIu64" bytes @ +%"PRIu64", flags=0x%02x)",
 550                       out_res_entry->original_size,
 551                       out_res_entry->offset, out_res_entry->flags);
 552         }
 553         ret = 0;
 554 out_free_chunk_tab:
 555         FREE(write_ctx.chunk_tab);
 556 out:
 557         return ret;
 558 }
 559
 560 /* Like write_wim_resource(), but the resource is specified by a buffer of
 561  * uncompressed data rather a lookup table entry; also writes the SHA1 hash of
 562  * the buffer to @hash_ret.  */
 563 int
 564 write_wim_resource_from_buffer(const void *buf, size_t buf_size,
 565                                int reshdr_flags, struct filedes *out_fd,
 566                                int out_ctype,
 567                                struct resource_entry *out_res_entry,
 568                                u8 *hash_ret, int write_resource_flags,
 569                                struct wimlib_lzx_context **comp_ctx)
 570 {
 571         /* Set up a temporary lookup table entry to provide to
 572          * write_wim_resource(). */
 573         struct wim_lookup_table_entry lte;
 574         int ret;
 575
 576         lte.resource_location            = RESOURCE_IN_ATTACHED_BUFFER;
 577         lte.attached_buffer              = (void*)buf;
 578         lte.resource_entry.original_size = buf_size;
 579         lte.resource_entry.flags         = reshdr_flags;
 580
 581         if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) {
 582                 sha1_buffer(buf, buf_size, lte.hash);
 583                 lte.unhashed = 0;
 584         } else {
 585                 lte.unhashed = 1;
 586         }
 587
 588         ret = write_wim_resource(&lte, out_fd, out_ctype, out_res_entry,
 589                                  write_resource_flags, comp_ctx);
 590         if (ret)
 591                 return ret;
 592         if (hash_ret)
 593                 copy_hash(hash_ret, lte.hash);
 594         return 0;
 595 }
 596
 597
 598 #ifdef ENABLE_MULTITHREADED_COMPRESSION
 599
 600 /* Blocking shared queue (solves the producer-consumer problem) */
 601 struct shared_queue {
 602         unsigned size;
 603         unsigned front;
 604         unsigned back;
 605         unsigned filled_slots;
 606         void **array;
 607         pthread_mutex_t lock;
 608         pthread_cond_t msg_avail_cond;
 609         pthread_cond_t space_avail_cond;
 610 };
 611
 612 static int
 613 shared_queue_init(struct shared_queue *q, unsigned size)
 614 {
 615         wimlib_assert(size != 0);
 616         q->array = CALLOC(sizeof(q->array[0]), size);
 617         if (!q->array)
 618                 goto err;
 619         q->filled_slots = 0;
 620         q->front = 0;
 621         q->back = size - 1;
 622         q->size = size;
 623         if (pthread_mutex_init(&q->lock, NULL)) {
 624                 ERROR_WITH_ERRNO("Failed to initialize mutex");
 625                 goto err;
 626         }
 627         if (pthread_cond_init(&q->msg_avail_cond, NULL)) {
 628                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 629                 goto err_destroy_lock;
 630         }
 631         if (pthread_cond_init(&q->space_avail_cond, NULL)) {
 632                 ERROR_WITH_ERRNO("Failed to initialize condition variable");
 633                 goto err_destroy_msg_avail_cond;
 634         }
 635         return 0;
 636 err_destroy_msg_avail_cond:
 637         pthread_cond_destroy(&q->msg_avail_cond);
 638 err_destroy_lock:
 639         pthread_mutex_destroy(&q->lock);
 640 err:
 641         return WIMLIB_ERR_NOMEM;
 642 }
 643
 644 static void
 645 shared_queue_destroy(struct shared_queue *q)
 646 {
 647         FREE(q->array);
 648         pthread_mutex_destroy(&q->lock);
 649         pthread_cond_destroy(&q->msg_avail_cond);
 650         pthread_cond_destroy(&q->space_avail_cond);
 651 }
 652
 653 static void
 654 shared_queue_put(struct shared_queue *q, void *obj)
 655 {
 656         pthread_mutex_lock(&q->lock);
 657         while (q->filled_slots == q->size)
 658                 pthread_cond_wait(&q->space_avail_cond, &q->lock);
 659
 660         q->back = (q->back + 1) % q->size;
 661         q->array[q->back] = obj;
 662         q->filled_slots++;
 663
 664         pthread_cond_broadcast(&q->msg_avail_cond);
 665         pthread_mutex_unlock(&q->lock);
 666 }
 667
 668 static void *
 669 shared_queue_get(struct shared_queue *q)
 670 {
 671         void *obj;
 672
 673         pthread_mutex_lock(&q->lock);
 674         while (q->filled_slots == 0)
 675                 pthread_cond_wait(&q->msg_avail_cond, &q->lock);
 676
 677         obj = q->array[q->front];
 678         q->array[q->front] = NULL;
 679         q->front = (q->front + 1) % q->size;
 680         q->filled_slots--;
 681
 682         pthread_cond_broadcast(&q->space_avail_cond);
 683         pthread_mutex_unlock(&q->lock);
 684         return obj;
 685 }
 686
 687 struct compressor_thread_params {
 688         struct shared_queue *res_to_compress_queue;
 689         struct shared_queue *compressed_res_queue;
 690         int out_ctype;
 691         struct wimlib_lzx_context *comp_ctx;
 692 };
 693
 694 #define MAX_CHUNKS_PER_MSG 2
 695
 696 struct message {
 697         struct wim_lookup_table_entry *lte;
 698         u8 *uncompressed_chunks[MAX_CHUNKS_PER_MSG];
 699         u8 *compressed_chunks[MAX_CHUNKS_PER_MSG];
 700         unsigned uncompressed_chunk_sizes[MAX_CHUNKS_PER_MSG];
 701         struct iovec out_chunks[MAX_CHUNKS_PER_MSG];
 702         unsigned num_chunks;
 703         struct list_head list;
 704         bool complete;
 705         u64 begin_chunk;
 706 };
 707
 708 static void
 709 compress_chunks(struct message *msg, int out_ctype,
 710                 struct wimlib_lzx_context *comp_ctx)
 711 {
 712         for (unsigned i = 0; i < msg->num_chunks; i++) {
 713                 unsigned len;
 714
 715                 len = compress_chunk(msg->uncompressed_chunks[i],
 716                                      msg->uncompressed_chunk_sizes[i],
 717                                      msg->compressed_chunks[i],
 718                                      out_ctype,
 719                                      comp_ctx);
 720
 721                 void *out_chunk;
 722                 unsigned out_len;
 723                 if (len) {
 724                         /* To be written compressed */
 725                         out_chunk = msg->compressed_chunks[i];
 726                         out_len = len;
 727                 } else {
 728                         /* To be written uncompressed */
 729                         out_chunk = msg->uncompressed_chunks[i];
 730                         out_len = msg->uncompressed_chunk_sizes[i];
 731                 }
 732                 msg->out_chunks[i].iov_base = out_chunk;
 733                 msg->out_chunks[i].iov_len = out_len;
 734         }
 735 }
 736
 737 /* Compressor thread routine.  This is a lot simpler than the main thread
 738  * routine: just repeatedly get a group of chunks from the
 739  * res_to_compress_queue, compress them, and put them in the
 740  * compressed_res_queue.  A NULL pointer indicates that the thread should stop.
 741  * */
 742 static void *
 743 compressor_thread_proc(void *arg)
 744 {
 745         struct compressor_thread_params *params = arg;
 746         struct shared_queue *res_to_compress_queue = params->res_to_compress_queue;
 747         struct shared_queue *compressed_res_queue = params->compressed_res_queue;
 748         struct message *msg;
 749
 750         DEBUG("Compressor thread ready");
 751         while ((msg = shared_queue_get(res_to_compress_queue)) != NULL) {
 752                 compress_chunks(msg, params->out_ctype, params->comp_ctx);
 753                 shared_queue_put(compressed_res_queue, msg);
 754         }
 755         DEBUG("Compressor thread terminating");
 756         return NULL;
 757 }
 758 #endif /* ENABLE_MULTITHREADED_COMPRESSION */
 759
 760 struct write_streams_progress_data {
 761         wimlib_progress_func_t progress_func;
 762         union wimlib_progress_info progress;
 763         uint64_t next_progress;
 764         WIMStruct *prev_wim_part;
 765 };
 766
 767 static void
 768 do_write_streams_progress(struct write_streams_progress_data *progress_data,
 769                           struct wim_lookup_table_entry *lte,
 770                           bool stream_discarded)
 771 {
 772         union wimlib_progress_info *progress = &progress_data->progress;
 773         bool new_wim_part;
 774
 775         if (stream_discarded) {
 776                 progress->write_streams.total_bytes -= wim_resource_size(lte);
 777                 if (progress_data->next_progress != ~(uint64_t)0 &&
 778                     progress_data->next_progress > progress->write_streams.total_bytes)
 779                 {
 780                         progress_data->next_progress = progress->write_streams.total_bytes;
 781                 }
 782         } else {
 783                 progress->write_streams.completed_bytes += wim_resource_size(lte);
 784         }
 785         new_wim_part = false;
 786         if (lte->resource_location == RESOURCE_IN_WIM &&
 787             lte->wim != progress_data->prev_wim_part)
 788         {
 789                 if (progress_data->prev_wim_part) {
 790                         new_wim_part = true;
 791                         progress->write_streams.completed_parts++;
 792                 }
 793                 progress_data->prev_wim_part = lte->wim;
 794         }
 795         progress->write_streams.completed_streams++;
 796         if (progress_data->progress_func
 797             && (progress->write_streams.completed_bytes >= progress_data->next_progress
 798                 || new_wim_part))
 799         {
 800                 progress_data->progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
 801                                              progress);
 802                 if (progress_data->next_progress == progress->write_streams.total_bytes) {
 803                         progress_data->next_progress = ~(uint64_t)0;
 804                 } else {
 805                         progress_data->next_progress =
 806                                 min(progress->write_streams.total_bytes,
 807                                     progress->write_streams.completed_bytes +
 808                                         progress->write_streams.total_bytes / 100);
 809                 }
 810         }
 811 }
 812
 813 struct serial_write_stream_ctx {
 814         struct filedes *out_fd;
 815         int out_ctype;
 816         struct wimlib_lzx_context **comp_ctx;
 817         int write_resource_flags;
 818 };
 819
 820 static int
 821 serial_write_stream(struct wim_lookup_table_entry *lte, void *_ctx)
 822 {
 823         struct serial_write_stream_ctx *ctx = _ctx;
 824         return write_wim_resource(lte, ctx->out_fd,
 825                                   ctx->out_ctype, &lte->output_resource_entry,
 826                                   ctx->write_resource_flags,
 827                                   ctx->comp_ctx);
 828 }
 829
 830
 831 /* Write a list of streams, taking into account that some streams may be
 832  * duplicates that are checksummed and discarded on the fly, and also delegating
 833  * the actual writing of a stream to a function @write_stream_cb, which is
 834  * passed the context @write_stream_ctx. */
 835 static int
 836 do_write_stream_list(struct list_head *stream_list,
 837                      struct wim_lookup_table *lookup_table,
 838                      int (*write_stream_cb)(struct wim_lookup_table_entry *, void *),
 839                      void *write_stream_ctx,
 840                      struct write_streams_progress_data *progress_data)
 841 {
 842         int ret = 0;
 843         struct wim_lookup_table_entry *lte;
 844         bool stream_discarded;
 845
 846         /* For each stream in @stream_list ... */
 847         while (!list_empty(stream_list)) {
 848                 stream_discarded = false;
 849                 lte = container_of(stream_list->next,
 850                                    struct wim_lookup_table_entry,
 851                                    write_streams_list);
 852                 list_del(&lte->write_streams_list);
 853                 if (lte->unhashed && !lte->unique_size) {
 854                         /* Unhashed stream that shares a size with some other
 855                          * stream in the WIM we are writing.  The stream must be
 856                          * checksummed to know if we need to write it or not. */
 857                         struct wim_lookup_table_entry *tmp;
 858                         u32 orig_out_refcnt = lte->out_refcnt;
 859
 860                         ret = hash_unhashed_stream(lte, lookup_table, &tmp);
 861                         if (ret)
 862                                 break;
 863                         if (tmp != lte) {
 864                                 /* We found a duplicate stream.  'lte' was
 865                                  * freed, so replace it with the duplicate.  */
 866                                 lte = tmp;
 867
 868                                 /* 'out_refcnt' was transferred to the
 869                                  * duplicate, and we can detect if the duplicate
 870                                  * stream was already referenced for writing by
 871                                  * checking if its 'out_refcnt' is higher than
 872                                  * that of the original stream.  In such cases,
 873                                  * the current stream can be discarded.  We can
 874                                  * also discard the current stream if it was
 875                                  * previously marked as filtered (e.g. already
 876                                  * present in the WIM being written).  */
 877                                 if (lte->out_refcnt > orig_out_refcnt ||
 878                                     lte->filtered) {
 879                                         DEBUG("Discarding duplicate stream of "
 880                                               "length %"PRIu64,
 881                                               wim_resource_size(lte));
 882                                         lte->no_progress = 0;
 883                                         stream_discarded = true;
 884                                         goto skip_to_progress;
 885                                 }
 886                         }
 887                 }
 888
 889                 /* Here, @lte is either a hashed stream or an unhashed stream
 890                  * with a unique size.  In either case we know that the stream
 891                  * has to be written.  In either case the SHA1 message digest
 892                  * will be calculated over the stream while writing it; however,
 893                  * in the former case this is done merely to check the data,
 894                  * while in the latter case this is done because we do not have
 895                  * the SHA1 message digest yet.  */
 896                 wimlib_assert(lte->out_refcnt != 0);
 897                 lte->deferred = 0;
 898                 lte->no_progress = 0;
 899                 ret = (*write_stream_cb)(lte, write_stream_ctx);
 900                 if (ret)
 901                         break;
 902                 /* In parallel mode, some streams are deferred for later,
 903                  * serialized processing; ignore them here. */
 904                 if (lte->deferred)
 905                         continue;
 906                 if (lte->unhashed) {
 907                         list_del(&lte->unhashed_list);
 908                         lookup_table_insert(lookup_table, lte);
 909                         lte->unhashed = 0;
 910                 }
 911         skip_to_progress:
 912                 if (!lte->no_progress) {
 913                         do_write_streams_progress(progress_data,
 914                                                   lte, stream_discarded);
 915                 }
 916         }
 917         return ret;
 918 }
 919
 920 static int
 921 do_write_stream_list_serial(struct list_head *stream_list,
 922                             struct wim_lookup_table *lookup_table,
 923                             struct filedes *out_fd,
 924                             int out_ctype,
 925                             struct wimlib_lzx_context **comp_ctx,
 926                             int write_resource_flags,
 927                             struct write_streams_progress_data *progress_data)
 928 {
 929         struct serial_write_stream_ctx ctx = {
 930                 .out_fd = out_fd,
 931                 .out_ctype = out_ctype,
 932                 .write_resource_flags = write_resource_flags,
 933                 .comp_ctx = comp_ctx,
 934         };
 935         return do_write_stream_list(stream_list,
 936                                     lookup_table,
 937                                     serial_write_stream,
 938                                     &ctx,
 939                                     progress_data);
 940 }
 941
 942 static inline int
 943 write_flags_to_resource_flags(int write_flags)
 944 {
 945         int resource_flags = 0;
 946
 947         if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
 948                 resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS;
 949         if (write_flags & WIMLIB_WRITE_FLAG_COMPRESS_SLOW)
 950                 resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_COMPRESS_SLOW;
 951         if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)
 952                 resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE;
 953         return resource_flags;
 954 }
 955
 956 static int
 957 write_stream_list_serial(struct list_head *stream_list,
 958                          struct wim_lookup_table *lookup_table,
 959                          struct filedes *out_fd,
 960                          int out_ctype,
 961                          struct wimlib_lzx_context **comp_ctx,
 962                          int write_resource_flags,
 963                          struct write_streams_progress_data *progress_data)
 964 {
 965         union wimlib_progress_info *progress = &progress_data->progress;
 966         DEBUG("Writing stream list of size %"PRIu64" (serial version)",
 967               progress->write_streams.total_streams);
 968         progress->write_streams.num_threads = 1;
 969         if (progress_data->progress_func) {
 970                 progress_data->progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
 971                                              progress);
 972         }
 973         return do_write_stream_list_serial(stream_list,
 974                                            lookup_table,
 975                                            out_fd,
 976                                            out_ctype,
 977                                            comp_ctx,
 978                                            write_resource_flags,
 979                                            progress_data);
 980 }
 981
 982 #ifdef ENABLE_MULTITHREADED_COMPRESSION
 983 static int
 984 write_wim_chunks(struct message *msg, struct filedes *out_fd,
 985                  struct chunk_table *chunk_tab,
 986                  int write_resource_flags)
 987 {
 988         struct iovec *vecs;
 989         struct pwm_chunk_hdr *chunk_hdrs;
 990         unsigned nvecs;
 991         int ret;
 992
 993         for (unsigned i = 0; i < msg->num_chunks; i++)
 994                 chunk_tab_record_chunk(chunk_tab, msg->out_chunks[i].iov_len);
 995
 996         if (!(write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE)) {
 997                 nvecs = msg->num_chunks;
 998                 vecs = msg->out_chunks;
 999         } else {
1000                 /* Special case:  If writing a compressed resource to a pipable
1001                  * WIM, prefix each compressed chunk with a header that gives
1002                  * its compressed size.  */
1003                 nvecs = msg->num_chunks * 2;
1004                 vecs = alloca(nvecs * sizeof(vecs[0]));
1005                 chunk_hdrs = alloca(msg->num_chunks * sizeof(chunk_hdrs[0]));
1006
1007                 for (unsigned i = 0; i < msg->num_chunks; i++) {
1008                         chunk_hdrs[i].compressed_size = cpu_to_le32(msg->out_chunks[i].iov_len);
1009                         vecs[i * 2].iov_base = &chunk_hdrs[i];
1010                         vecs[i * 2].iov_len = sizeof(chunk_hdrs[i]);
1011                         vecs[i * 2 + 1].iov_base = msg->out_chunks[i].iov_base;
1012                         vecs[i * 2 + 1].iov_len = msg->out_chunks[i].iov_len;
1013                 }
1014         }
1015         ret = full_writev(out_fd, vecs, nvecs);
1016         if (ret)
1017                 ERROR_WITH_ERRNO("Failed to write WIM chunks");
1018         return ret;
1019 }
1020
1021 struct main_writer_thread_ctx {
1022         struct list_head *stream_list;
1023         struct wim_lookup_table *lookup_table;
1024         struct filedes *out_fd;
1025         off_t res_start_offset;
1026         int out_ctype;
1027         struct wimlib_lzx_context **comp_ctx;
1028         int write_resource_flags;
1029         struct shared_queue *res_to_compress_queue;
1030         struct shared_queue *compressed_res_queue;
1031         size_t num_messages;
1032         struct write_streams_progress_data *progress_data;
1033
1034         struct list_head available_msgs;
1035         struct list_head outstanding_streams;
1036         struct list_head serial_streams;
1037         size_t num_outstanding_messages;
1038
1039         SHA_CTX next_sha_ctx;
1040         u64 next_chunk;
1041         u64 next_num_chunks;
1042         struct wim_lookup_table_entry *next_lte;
1043
1044         struct message *msgs;
1045         struct message *next_msg;
1046         struct chunk_table *cur_chunk_tab;
1047 };
1048
1049 static int
1050 init_message(struct message *msg)
1051 {
1052         for (size_t i = 0; i < MAX_CHUNKS_PER_MSG; i++) {
1053                 msg->compressed_chunks[i] = MALLOC(WIM_CHUNK_SIZE);
1054                 msg->uncompressed_chunks[i] = MALLOC(WIM_CHUNK_SIZE);
1055                 if (msg->compressed_chunks[i] == NULL ||
1056                     msg->uncompressed_chunks[i] == NULL)
1057                         return WIMLIB_ERR_NOMEM;
1058         }
1059         return 0;
1060 }
1061
1062 static void
1063 destroy_message(struct message *msg)
1064 {
1065         for (size_t i = 0; i < MAX_CHUNKS_PER_MSG; i++) {
1066                 FREE(msg->compressed_chunks[i]);
1067                 FREE(msg->uncompressed_chunks[i]);
1068         }
1069 }
1070
1071 static void
1072 free_messages(struct message *msgs, size_t num_messages)
1073 {
1074         if (msgs) {
1075                 for (size_t i = 0; i < num_messages; i++)
1076                         destroy_message(&msgs[i]);
1077                 FREE(msgs);
1078         }
1079 }
1080
1081 static struct message *
1082 allocate_messages(size_t num_messages)
1083 {
1084         struct message *msgs;
1085
1086         msgs = CALLOC(num_messages, sizeof(struct message));
1087         if (!msgs)
1088                 return NULL;
1089         for (size_t i = 0; i < num_messages; i++) {
1090                 if (init_message(&msgs[i])) {
1091                         free_messages(msgs, num_messages);
1092                         return NULL;
1093                 }
1094         }
1095         return msgs;
1096 }
1097
1098 static void
1099 main_writer_thread_destroy_ctx(struct main_writer_thread_ctx *ctx)
1100 {
1101         while (ctx->num_outstanding_messages--)
1102                 shared_queue_get(ctx->compressed_res_queue);
1103         free_messages(ctx->msgs, ctx->num_messages);
1104         FREE(ctx->cur_chunk_tab);
1105 }
1106
1107 static int
1108 main_writer_thread_init_ctx(struct main_writer_thread_ctx *ctx)
1109 {
1110         /* Pre-allocate all the buffers that will be needed to do the chunk
1111          * compression. */
1112         ctx->msgs = allocate_messages(ctx->num_messages);
1113         if (!ctx->msgs)
1114                 return WIMLIB_ERR_NOMEM;
1115
1116         /* Initially, all the messages are available to use. */
1117         INIT_LIST_HEAD(&ctx->available_msgs);
1118         for (size_t i = 0; i < ctx->num_messages; i++)
1119                 list_add_tail(&ctx->msgs[i].list, &ctx->available_msgs);
1120
1121         /* outstanding_streams is the list of streams that currently have had
1122          * chunks sent off for compression.
1123          *
1124          * The first stream in outstanding_streams is the stream that is
1125          * currently being written.
1126          *
1127          * The last stream in outstanding_streams is the stream that is
1128          * currently being read and having chunks fed to the compressor threads.
1129          * */
1130         INIT_LIST_HEAD(&ctx->outstanding_streams);
1131         ctx->num_outstanding_messages = 0;
1132
1133         ctx->next_msg = NULL;
1134
1135         /* Resources that don't need any chunks compressed are added to this
1136          * list and written directly by the main thread. */
1137         INIT_LIST_HEAD(&ctx->serial_streams);
1138
1139         ctx->cur_chunk_tab = NULL;
1140
1141         return 0;
1142 }
1143
1144 static int
1145 receive_compressed_chunks(struct main_writer_thread_ctx *ctx)
1146 {
1147         struct message *msg;
1148         struct wim_lookup_table_entry *cur_lte;
1149         int ret;
1150
1151         wimlib_assert(!list_empty(&ctx->outstanding_streams));
1152         wimlib_assert(ctx->num_outstanding_messages != 0);
1153
1154         cur_lte = container_of(ctx->outstanding_streams.next,
1155                                struct wim_lookup_table_entry,
1156                                being_compressed_list);
1157
1158         /* Get the next message from the queue and process it.
1159          * The message will contain 1 or more data chunks that have been
1160          * compressed. */
1161         msg = shared_queue_get(ctx->compressed_res_queue);
1162         msg->complete = true;
1163         --ctx->num_outstanding_messages;
1164
1165         /* Is this the next chunk in the current resource?  If it's not
1166          * (i.e., an earlier chunk in a same or different resource
1167          * hasn't been compressed yet), do nothing, and keep this
1168          * message around until all earlier chunks are received.
1169          *
1170          * Otherwise, write all the chunks we can. */
1171         while (cur_lte != NULL &&
1172                !list_empty(&cur_lte->msg_list)
1173                && (msg = container_of(cur_lte->msg_list.next,
1174                                       struct message,
1175                                       list))->complete)
1176         {
1177                 list_move(&msg->list, &ctx->available_msgs);
1178                 if (msg->begin_chunk == 0) {
1179                         /* First set of chunks.  */
1180
1181                         /* Write pipable WIM stream header if needed.  */
1182                         if (ctx->write_resource_flags &
1183                             WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE)
1184                         {
1185                                 ret = write_pwm_stream_header(cur_lte, ctx->out_fd,
1186                                                               WIM_RESHDR_FLAG_COMPRESSED);
1187                                 if (ret)
1188                                         return ret;
1189                         }
1190
1191                         /* Save current offset.  */
1192                         ctx->res_start_offset = ctx->out_fd->offset;
1193
1194                         /* Begin building the chunk table, and leave space for
1195                          * it if needed.  */
1196                         ret = begin_wim_resource_chunk_tab(cur_lte,
1197                                                            ctx->out_fd,
1198                                                            &ctx->cur_chunk_tab,
1199                                                            ctx->write_resource_flags);
1200                         if (ret)
1201                                 return ret;
1202
1203                 }
1204
1205                 /* Write the compressed chunks from the message. */
1206                 ret = write_wim_chunks(msg, ctx->out_fd, ctx->cur_chunk_tab,
1207                                        ctx->write_resource_flags);
1208                 if (ret)
1209                         return ret;
1210
1211                 /* Was this the last chunk of the stream?  If so, finish
1212                  * it. */
1213                 if (list_empty(&cur_lte->msg_list) &&
1214                     msg->begin_chunk + msg->num_chunks == ctx->cur_chunk_tab->num_chunks)
1215                 {
1216                         u64 res_csize;
1217
1218                         ret = finish_wim_resource_chunk_tab(ctx->cur_chunk_tab,
1219                                                             ctx->out_fd,
1220                                                             ctx->res_start_offset,
1221                                                             ctx->write_resource_flags);
1222                         if (ret)
1223                                 return ret;
1224
1225                         list_del(&cur_lte->being_compressed_list);
1226
1227                         res_csize = ctx->out_fd->offset - ctx->res_start_offset;
1228
1229                         FREE(ctx->cur_chunk_tab);
1230                         ctx->cur_chunk_tab = NULL;
1231
1232                         /* Check for resources compressed to greater than or
1233                          * equal to their original size and write them
1234                          * uncompressed instead.  (But never do this if writing
1235                          * to a pipe.)  */
1236                         if (res_csize >= wim_resource_size(cur_lte) &&
1237                             !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE))
1238                         {
1239                                 DEBUG("Compressed %"PRIu64" => %"PRIu64" bytes; "
1240                                       "writing uncompressed instead",
1241                                       wim_resource_size(cur_lte), res_csize);
1242                                 ret = seek_and_truncate(ctx->out_fd, ctx->res_start_offset);
1243                                 if (ret)
1244                                         return ret;
1245                                 ret = write_wim_resource(cur_lte,
1246                                                          ctx->out_fd,
1247                                                          WIMLIB_COMPRESSION_TYPE_NONE,
1248                                                          &cur_lte->output_resource_entry,
1249                                                          ctx->write_resource_flags,
1250                                                          ctx->comp_ctx);
1251                                 if (ret)
1252                                         return ret;
1253                         } else {
1254                                 cur_lte->output_resource_entry.size =
1255                                         res_csize;
1256
1257                                 cur_lte->output_resource_entry.original_size =
1258                                         cur_lte->resource_entry.original_size;
1259
1260                                 cur_lte->output_resource_entry.offset =
1261                                         ctx->res_start_offset;
1262
1263                                 cur_lte->output_resource_entry.flags =
1264                                         cur_lte->resource_entry.flags |
1265                                                 WIM_RESHDR_FLAG_COMPRESSED;
1266
1267                                 DEBUG("Wrote compressed resource "
1268                                       "(%"PRIu64" => %"PRIu64" bytes @ +%"PRIu64", flags=0x%02x)",
1269                                       cur_lte->output_resource_entry.original_size,
1270                                       cur_lte->output_resource_entry.size,
1271                                       cur_lte->output_resource_entry.offset,
1272                                       cur_lte->output_resource_entry.flags);
1273                         }
1274
1275                         do_write_streams_progress(ctx->progress_data,
1276                                                   cur_lte, false);
1277
1278                         /* Since we just finished writing a stream, write any
1279                          * streams that have been added to the serial_streams
1280                          * list for direct writing by the main thread (e.g.
1281                          * resources that don't need to be compressed because
1282                          * the desired compression type is the same as the
1283                          * previous compression type). */
1284                         if (!list_empty(&ctx->serial_streams)) {
1285                                 ret = do_write_stream_list_serial(&ctx->serial_streams,
1286                                                                   ctx->lookup_table,
1287                                                                   ctx->out_fd,
1288                                                                   ctx->out_ctype,
1289                                                                   ctx->comp_ctx,
1290                                                                   ctx->write_resource_flags,
1291                                                                   ctx->progress_data);
1292                                 if (ret)
1293                                         return ret;
1294                         }
1295
1296                         /* Advance to the next stream to write. */
1297                         if (list_empty(&ctx->outstanding_streams)) {
1298                                 cur_lte = NULL;
1299                         } else {
1300                                 cur_lte = container_of(ctx->outstanding_streams.next,
1301                                                        struct wim_lookup_table_entry,
1302                                                        being_compressed_list);
1303                         }
1304                 }
1305         }
1306         return 0;
1307 }
1308
1309 /* Called when the main thread has read a new chunk of data. */
1310 static int
1311 main_writer_thread_cb(const void *chunk, size_t chunk_size, void *_ctx)
1312 {
1313         struct main_writer_thread_ctx *ctx = _ctx;
1314         int ret;
1315         struct message *next_msg;
1316         u64 next_chunk_in_msg;
1317
1318         /* Update SHA1 message digest for the stream currently being read by the
1319          * main thread. */
1320         sha1_update(&ctx->next_sha_ctx, chunk, chunk_size);
1321
1322         /* We send chunks of data to the compressor chunks in batches which we
1323          * refer to as "messages".  @next_msg is the message that is currently
1324          * being prepared to send off.  If it is NULL, that indicates that we
1325          * need to start a new message. */
1326         next_msg = ctx->next_msg;
1327         if (!next_msg) {
1328                 /* We need to start a new message.  First check to see if there
1329                  * is a message available in the list of available messages.  If
1330                  * so, we can just take one.  If not, all the messages (there is
1331                  * a fixed number of them, proportional to the number of
1332                  * threads) have been sent off to the compressor threads, so we
1333                  * receive messages from the compressor threads containing
1334                  * compressed chunks of data.
1335                  *
1336                  * We may need to receive multiple messages before one is
1337                  * actually available to use because messages received that are
1338                  * *not* for the very next set of chunks to compress must be
1339                  * buffered until it's time to write those chunks. */
1340                 while (list_empty(&ctx->available_msgs)) {
1341                         ret = receive_compressed_chunks(ctx);
1342                         if (ret)
1343                                 return ret;
1344                 }
1345
1346                 next_msg = container_of(ctx->available_msgs.next,
1347                                         struct message, list);
1348                 list_del(&next_msg->list);
1349                 next_msg->complete = false;
1350                 next_msg->begin_chunk = ctx->next_chunk;
1351                 next_msg->num_chunks = min(MAX_CHUNKS_PER_MSG,
1352                                            ctx->next_num_chunks - ctx->next_chunk);
1353                 ctx->next_msg = next_msg;
1354         }
1355
1356         /* Fill in the next chunk to compress */
1357         next_chunk_in_msg = ctx->next_chunk - next_msg->begin_chunk;
1358
1359         next_msg->uncompressed_chunk_sizes[next_chunk_in_msg] = chunk_size;
1360         memcpy(next_msg->uncompressed_chunks[next_chunk_in_msg],
1361                chunk, chunk_size);
1362         ctx->next_chunk++;
1363         if (++next_chunk_in_msg == next_msg->num_chunks) {
1364                 /* Send off an array of chunks to compress */
1365                 list_add_tail(&next_msg->list, &ctx->next_lte->msg_list);
1366                 shared_queue_put(ctx->res_to_compress_queue, next_msg);
1367                 ++ctx->num_outstanding_messages;
1368                 ctx->next_msg = NULL;
1369         }
1370         return 0;
1371 }
1372
1373 static int
1374 main_writer_thread_finish(void *_ctx)
1375 {
1376         struct main_writer_thread_ctx *ctx = _ctx;
1377         int ret;
1378         while (ctx->num_outstanding_messages != 0) {
1379                 ret = receive_compressed_chunks(ctx);
1380                 if (ret)
1381                         return ret;
1382         }
1383         wimlib_assert(list_empty(&ctx->outstanding_streams));
1384         return do_write_stream_list_serial(&ctx->serial_streams,
1385                                            ctx->lookup_table,
1386                                            ctx->out_fd,
1387                                            ctx->out_ctype,
1388                                            ctx->comp_ctx,
1389                                            ctx->write_resource_flags,
1390                                            ctx->progress_data);
1391 }
1392
1393 static int
1394 submit_stream_for_compression(struct wim_lookup_table_entry *lte,
1395                               struct main_writer_thread_ctx *ctx)
1396 {
1397         int ret;
1398
1399         /* Read the entire stream @lte, feeding its data chunks to the
1400          * compressor threads.  Also SHA1-sum the stream; this is required in
1401          * the case that @lte is unhashed, and a nice additional verification
1402          * when @lte is already hashed. */
1403         sha1_init(&ctx->next_sha_ctx);
1404         ctx->next_chunk = 0;
1405         ctx->next_num_chunks = wim_resource_chunks(lte);
1406         ctx->next_lte = lte;
1407         INIT_LIST_HEAD(&lte->msg_list);
1408         list_add_tail(&lte->being_compressed_list, &ctx->outstanding_streams);
1409         ret = read_resource_prefix(lte, wim_resource_size(lte),
1410                                    main_writer_thread_cb, ctx, 0);
1411         if (ret)
1412                 return ret;
1413         wimlib_assert(ctx->next_chunk == ctx->next_num_chunks);
1414         return finalize_and_check_sha1(&ctx->next_sha_ctx, lte);
1415 }
1416
1417 static int
1418 main_thread_process_next_stream(struct wim_lookup_table_entry *lte, void *_ctx)
1419 {
1420         struct main_writer_thread_ctx *ctx = _ctx;
1421         int ret;
1422
1423         if (wim_resource_size(lte) < 1000 ||
1424             ctx->out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
1425             (lte->resource_location == RESOURCE_IN_WIM &&
1426              !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS) &&
1427              lte->wim->compression_type == ctx->out_ctype))
1428         {
1429                 /* Stream is too small or isn't being compressed.  Process it by
1430                  * the main thread when we have a chance.  We can't necessarily
1431                  * process it right here, as the main thread could be in the
1432                  * middle of writing a different stream. */
1433                 list_add_tail(&lte->write_streams_list, &ctx->serial_streams);
1434                 lte->deferred = 1;
1435                 ret = 0;
1436         } else {
1437                 ret = submit_stream_for_compression(lte, ctx);
1438         }
1439         lte->no_progress = 1;
1440         return ret;
1441 }
1442
1443 static long
1444 get_default_num_threads(void)
1445 {
1446 #ifdef __WIN32__
1447         return win32_get_number_of_processors();
1448 #else
1449         return sysconf(_SC_NPROCESSORS_ONLN);
1450 #endif
1451 }
1452
1453 /* Equivalent to write_stream_list_serial(), except this takes a @num_threads
1454  * parameter and will perform compression using that many threads.  Falls
1455  * back to write_stream_list_serial() on certain errors, such as a failure to
1456  * create the number of threads requested.
1457  *
1458  * High level description of the algorithm for writing compressed streams in
1459  * parallel:  We perform compression on chunks of size WIM_CHUNK_SIZE bytes
1460  * rather than on full files.  The currently executing thread becomes the main
1461  * thread and is entirely in charge of reading the data to compress (which may
1462  * be in any location understood by the resource code--- such as in an external
1463  * file being captured, or in another WIM file from which an image is being
1464  * exported) and actually writing the compressed data to the output file.
1465  * Additional threads are "compressor threads" and all execute the
1466  * compressor_thread_proc, where they repeatedly retrieve buffers of data from
1467  * the main thread, compress them, and hand them back to the main thread.
1468  *
1469  * Certain streams, such as streams that do not need to be compressed (e.g.
1470  * input compression type same as output compression type) or streams of very
1471  * small size are placed in a list (main_writer_thread_ctx.serial_list) and
1472  * handled entirely by the main thread at an appropriate time.
1473  *
1474  * At any given point in time, multiple streams may be having chunks compressed
1475  * concurrently.  The stream that the main thread is currently *reading* may be
1476  * later in the list that the stream that the main thread is currently
1477  * *writing*.
1478  */
1479 static int
1480 write_stream_list_parallel(struct list_head *stream_list,
1481                            struct wim_lookup_table *lookup_table,
1482                            struct filedes *out_fd,
1483                            int out_ctype,
1484                            struct wimlib_lzx_context **comp_ctx,
1485                            int write_resource_flags,
1486                            struct write_streams_progress_data *progress_data,
1487                            unsigned num_threads)
1488 {
1489         int ret;
1490         struct shared_queue res_to_compress_queue;
1491         struct shared_queue compressed_res_queue;
1492         pthread_t *compressor_threads = NULL;
1493         union wimlib_progress_info *progress = &progress_data->progress;
1494
1495         if (num_threads == 0) {
1496                 long nthreads = get_default_num_threads();
1497                 if (nthreads < 1 || nthreads > UINT_MAX) {
1498                         WARNING("Could not determine number of processors! Assuming 1");
1499                         goto out_serial;
1500                 } else if (nthreads == 1) {
1501                         goto out_serial_quiet;
1502                 } else {
1503                         num_threads = nthreads;
1504                 }
1505         }
1506
1507         DEBUG("Writing stream list of size %"PRIu64" "
1508               "(parallel version, num_threads=%u)",
1509               progress->write_streams.total_streams, num_threads);
1510
1511         progress->write_streams.num_threads = num_threads;
1512
1513         static const size_t MESSAGES_PER_THREAD = 2;
1514         size_t queue_size = (size_t)(num_threads * MESSAGES_PER_THREAD);
1515
1516         DEBUG("Initializing shared queues (queue_size=%zu)", queue_size);
1517
1518         ret = shared_queue_init(&res_to_compress_queue, queue_size);
1519         if (ret)
1520                 goto out_serial;
1521
1522         ret = shared_queue_init(&compressed_res_queue, queue_size);
1523         if (ret)
1524                 goto out_destroy_res_to_compress_queue;
1525
1526         struct compressor_thread_params *params;
1527
1528         params = CALLOC(num_threads, sizeof(params[0]));
1529         if (params == NULL) {
1530                 ret = WIMLIB_ERR_NOMEM;
1531                 goto out_destroy_compressed_res_queue;
1532         }
1533
1534         for (unsigned i = 0; i < num_threads; i++) {
1535                 params[i].res_to_compress_queue = &res_to_compress_queue;
1536                 params[i].compressed_res_queue = &compressed_res_queue;
1537                 params[i].out_ctype = out_ctype;
1538                 if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) {
1539                         ret = alloc_lzx_context(write_resource_flags,
1540                                                 &params[i].comp_ctx);
1541                         if (ret)
1542                                 goto out_free_params;
1543                 }
1544         }
1545
1546         compressor_threads = MALLOC(num_threads * sizeof(pthread_t));
1547         if (!compressor_threads) {
1548                 ret = WIMLIB_ERR_NOMEM;
1549                 goto out_free_params;
1550         }
1551
1552         for (unsigned i = 0; i < num_threads; i++) {
1553                 DEBUG("pthread_create thread %u of %u", i + 1, num_threads);
1554                 ret = pthread_create(&compressor_threads[i], NULL,
1555                                      compressor_thread_proc, &params[i]);
1556                 if (ret != 0) {
1557                         ret = -1;
1558                         ERROR_WITH_ERRNO("Failed to create compressor "
1559                                          "thread %u of %u",
1560                                          i + 1, num_threads);
1561                         num_threads = i;
1562                         goto out_join;
1563                 }
1564         }
1565
1566         if (progress_data->progress_func) {
1567                 progress_data->progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
1568                                              progress);
1569         }
1570
1571         struct main_writer_thread_ctx ctx;
1572         ctx.stream_list           = stream_list;
1573         ctx.lookup_table          = lookup_table;
1574         ctx.out_fd                = out_fd;
1575         ctx.out_ctype             = out_ctype;
1576         ctx.comp_ctx              = comp_ctx;
1577         ctx.res_to_compress_queue = &res_to_compress_queue;
1578         ctx.compressed_res_queue  = &compressed_res_queue;
1579         ctx.num_messages          = queue_size;
1580         ctx.write_resource_flags  = write_resource_flags;
1581         ctx.progress_data         = progress_data;
1582         ret = main_writer_thread_init_ctx(&ctx);
1583         if (ret)
1584                 goto out_join;
1585         ret = do_write_stream_list(stream_list, lookup_table,
1586                                    main_thread_process_next_stream,
1587                                    &ctx, progress_data);
1588         if (ret)
1589                 goto out_destroy_ctx;
1590
1591         /* The main thread has finished reading all streams that are going to be
1592          * compressed in parallel, and it now needs to wait for all remaining
1593          * chunks to be compressed so that the remaining streams can actually be
1594          * written to the output file.  Furthermore, any remaining streams that
1595          * had processing deferred to the main thread need to be handled.  These
1596          * tasks are done by the main_writer_thread_finish() function. */
1597         ret = main_writer_thread_finish(&ctx);
1598 out_destroy_ctx:
1599         main_writer_thread_destroy_ctx(&ctx);
1600 out_join:
1601         for (unsigned i = 0; i < num_threads; i++)
1602                 shared_queue_put(&res_to_compress_queue, NULL);
1603
1604         for (unsigned i = 0; i < num_threads; i++) {
1605                 if (pthread_join(compressor_threads[i], NULL)) {
1606                         WARNING_WITH_ERRNO("Failed to join compressor "
1607                                            "thread %u of %u",
1608                                            i + 1, num_threads);
1609                 }
1610         }
1611         FREE(compressor_threads);
1612 out_free_params:
1613         for (unsigned i = 0; i < num_threads; i++)
1614                 wimlib_lzx_free_context(params[i].comp_ctx);
1615         FREE(params);
1616 out_destroy_compressed_res_queue:
1617         shared_queue_destroy(&compressed_res_queue);
1618 out_destroy_res_to_compress_queue:
1619         shared_queue_destroy(&res_to_compress_queue);
1620         if (ret >= 0 && ret != WIMLIB_ERR_NOMEM)
1621                 return ret;
1622 out_serial:
1623         WARNING("Falling back to single-threaded compression");
1624 out_serial_quiet:
1625         return write_stream_list_serial(stream_list,
1626                                         lookup_table,
1627                                         out_fd,
1628                                         out_ctype,
1629                                         comp_ctx,
1630                                         write_resource_flags,
1631                                         progress_data);
1632
1633 }
1634 #endif
1635
1636 /*
1637  * Write a list of streams to a WIM (@out_fd) using the compression type
1638  * @out_ctype and up to @num_threads compressor threads.
1639  */
1640 static int
1641 write_stream_list(struct list_head *stream_list,
1642                   struct wim_lookup_table *lookup_table,
1643                   struct filedes *out_fd, int out_ctype,
1644                   struct wimlib_lzx_context **comp_ctx,
1645                   int write_flags,
1646                   unsigned num_threads, wimlib_progress_func_t progress_func)
1647 {
1648         struct wim_lookup_table_entry *lte;
1649         size_t num_streams = 0;
1650         u64 total_bytes = 0;
1651         u64 total_compression_bytes = 0;
1652         struct write_streams_progress_data progress_data;
1653         int ret;
1654         int write_resource_flags;
1655         unsigned total_parts = 0;
1656         WIMStruct *prev_wim_part = NULL;
1657
1658         if (list_empty(stream_list)) {
1659                 DEBUG("No streams to write.");
1660                 return 0;
1661         }
1662
1663         write_resource_flags = write_flags_to_resource_flags(write_flags);
1664
1665         DEBUG("Writing stream list (offset = %"PRIu64", write_resource_flags=0x%08x)",
1666               out_fd->offset, write_resource_flags);
1667
1668         sort_stream_list_by_sequential_order(stream_list,
1669                                              offsetof(struct wim_lookup_table_entry,
1670                                                       write_streams_list));
1671
1672         /* Calculate the total size of the streams to be written.  Note: this
1673          * will be the uncompressed size, as we may not know the compressed size
1674          * yet, and also this will assume that every unhashed stream will be
1675          * written (which will not necessarily be the case). */
1676         list_for_each_entry(lte, stream_list, write_streams_list) {
1677                 num_streams++;
1678                 total_bytes += wim_resource_size(lte);
1679                 if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE
1680                        && (wim_resource_compression_type(lte) != out_ctype ||
1681                            (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS)))
1682                 {
1683                         total_compression_bytes += wim_resource_size(lte);
1684                 }
1685                 if (lte->resource_location == RESOURCE_IN_WIM) {
1686                         if (prev_wim_part != lte->wim) {
1687                                 prev_wim_part = lte->wim;
1688                                 total_parts++;
1689                         }
1690                 }
1691         }
1692
1693         memset(&progress_data, 0, sizeof(progress_data));
1694         progress_data.progress_func = progress_func;
1695
1696         progress_data.progress.write_streams.total_bytes       = total_bytes;
1697         progress_data.progress.write_streams.total_streams     = num_streams;
1698         progress_data.progress.write_streams.completed_bytes   = 0;
1699         progress_data.progress.write_streams.completed_streams = 0;
1700         progress_data.progress.write_streams.num_threads       = num_threads;
1701         progress_data.progress.write_streams.compression_type  = out_ctype;
1702         progress_data.progress.write_streams.total_parts       = total_parts;
1703         progress_data.progress.write_streams.completed_parts   = 0;
1704
1705         progress_data.next_progress = 0;
1706         progress_data.prev_wim_part = NULL;
1707
1708 #ifdef ENABLE_MULTITHREADED_COMPRESSION
1709         if (total_compression_bytes >= 2000000 && num_threads != 1)
1710                 ret = write_stream_list_parallel(stream_list,
1711                                                  lookup_table,
1712                                                  out_fd,
1713                                                  out_ctype,
1714                                                  comp_ctx,
1715                                                  write_resource_flags,
1716                                                  &progress_data,
1717                                                  num_threads);
1718         else
1719 #endif
1720                 ret = write_stream_list_serial(stream_list,
1721                                                lookup_table,
1722                                                out_fd,
1723                                                out_ctype,
1724                                                comp_ctx,
1725                                                write_resource_flags,
1726                                                &progress_data);
1727         if (ret == 0)
1728                 DEBUG("Successfully wrote stream list.");
1729         else
1730                 DEBUG("Failed to write stream list.");
1731         return ret;
1732 }
1733
1734 struct stream_size_table {
1735         struct hlist_head *array;
1736         size_t num_entries;
1737         size_t capacity;
1738 };
1739
1740 static int
1741 init_stream_size_table(struct stream_size_table *tab, size_t capacity)
1742 {
1743         tab->array = CALLOC(capacity, sizeof(tab->array[0]));
1744         if (!tab->array)
1745                 return WIMLIB_ERR_NOMEM;
1746         tab->num_entries = 0;
1747         tab->capacity = capacity;
1748         return 0;
1749 }
1750
1751 static void
1752 destroy_stream_size_table(struct stream_size_table *tab)
1753 {
1754         FREE(tab->array);
1755 }
1756
1757 static int
1758 stream_size_table_insert(struct wim_lookup_table_entry *lte, void *_tab)
1759 {
1760         struct stream_size_table *tab = _tab;
1761         size_t pos;
1762         struct wim_lookup_table_entry *same_size_lte;
1763         struct hlist_node *tmp;
1764
1765         pos = hash_u64(wim_resource_size(lte)) % tab->capacity;
1766         lte->unique_size = 1;
1767         hlist_for_each_entry(same_size_lte, tmp, &tab->array[pos], hash_list_2) {
1768                 if (wim_resource_size(same_size_lte) == wim_resource_size(lte)) {
1769                         lte->unique_size = 0;
1770                         same_size_lte->unique_size = 0;
1771                         break;
1772                 }
1773         }
1774
1775         hlist_add_head(&lte->hash_list_2, &tab->array[pos]);
1776         tab->num_entries++;
1777         return 0;
1778 }
1779
1780 struct find_streams_ctx {
1781         WIMStruct *wim;
1782         int write_flags;
1783         struct list_head stream_list;
1784         struct stream_size_table stream_size_tab;
1785 };
1786
1787 static void
1788 lte_reference_for_logical_write(struct wim_lookup_table_entry *lte,
1789                                 struct find_streams_ctx *ctx,
1790                                 unsigned nref)
1791 {
1792         if (lte->out_refcnt == 0) {
1793                 stream_size_table_insert(lte, &ctx->stream_size_tab);
1794                 list_add_tail(&lte->write_streams_list, &ctx->stream_list);
1795         }
1796         lte->out_refcnt += nref;
1797 }
1798
1799 static int
1800 do_lte_full_reference_for_logical_write(struct wim_lookup_table_entry *lte,
1801                                         void *_ctx)
1802 {
1803         struct find_streams_ctx *ctx = _ctx;
1804         lte->out_refcnt = 0;
1805         lte_reference_for_logical_write(lte, ctx,
1806                                         (lte->refcnt ? lte->refcnt : 1));
1807         return 0;
1808 }
1809
1810 static int
1811 inode_find_streams_to_write(struct wim_inode *inode,
1812                             struct wim_lookup_table *table,
1813                             struct find_streams_ctx *ctx)
1814 {
1815         struct wim_lookup_table_entry *lte;
1816         unsigned i;
1817
1818         for (i = 0; i <= inode->i_num_ads; i++) {
1819                 lte = inode_stream_lte(inode, i, table);
1820                 if (lte)
1821                         lte_reference_for_logical_write(lte, ctx, inode->i_nlink);
1822                 else if (!is_zero_hash(inode_stream_hash(inode, i)))
1823                         return WIMLIB_ERR_RESOURCE_NOT_FOUND;
1824         }
1825         return 0;
1826 }
1827
1828 static int
1829 image_find_streams_to_write(WIMStruct *wim)
1830 {
1831         struct find_streams_ctx *ctx;
1832         struct wim_image_metadata *imd;
1833         struct wim_inode *inode;
1834         struct wim_lookup_table_entry *lte;
1835         int ret;
1836
1837         ctx = wim->private;
1838         imd = wim_get_current_image_metadata(wim);
1839
1840         image_for_each_unhashed_stream(lte, imd)
1841                 lte->out_refcnt = 0;
1842
1843         /* Go through this image's inodes to find any streams that have not been
1844          * found yet. */
1845         image_for_each_inode(inode, imd) {
1846                 ret = inode_find_streams_to_write(inode, wim->lookup_table, ctx);
1847                 if (ret)
1848                         return ret;
1849         }
1850         return 0;
1851 }
1852
1853 /*
1854  * Build a list of streams (via `struct wim_lookup_table_entry's) included in
1855  * the "logical write" of the WIM, meaning all streams that are referenced at
1856  * least once by dentries in the the image(s) being written.  'out_refcnt' on
1857  * each stream being included in the logical write is set to the number of
1858  * references from dentries in the image(s).  Furthermore, 'unique_size' on each
1859  * stream being included in the logical write is set to indicate whether that
1860  * stream has a unique size relative to the streams being included in the
1861  * logical write.  Still furthermore, 'part_number' on each stream being
1862  * included in the logical write is set to the part number given in the
1863  * in-memory header of @p wim.
1864  *
1865  * This is considered a "logical write" because it does not take into account
1866  * filtering out streams already present in the WIM (in the case of an in place
1867  * overwrite) or present in other WIMs (in case of creating delta WIM).
1868  */
1869 static int
1870 prepare_logical_stream_list(WIMStruct *wim, int image, bool streams_ok,
1871                             struct find_streams_ctx *ctx)
1872 {
1873         int ret;
1874         struct wim_lookup_table_entry *lte;
1875
1876         if (streams_ok && (image == WIMLIB_ALL_IMAGES ||
1877                            (image == 1 && wim->hdr.image_count == 1)))
1878         {
1879                 /* Fast case:  Assume that all streams are being written and
1880                  * that the reference counts are correct.  */
1881                 struct wim_lookup_table_entry *lte;
1882                 struct wim_image_metadata *imd;
1883                 unsigned i;
1884
1885                 for_lookup_table_entry(wim->lookup_table,
1886                                        do_lte_full_reference_for_logical_write, ctx);
1887                 for (i = 0; i < wim->hdr.image_count; i++) {
1888                         imd = wim->image_metadata[i];
1889                         image_for_each_unhashed_stream(lte, imd)
1890                                 do_lte_full_reference_for_logical_write(lte, ctx);
1891                 }
1892         } else {
1893                 /* Slow case:  Walk through the images being written and
1894                  * determine the streams referenced.  */
1895                 for_lookup_table_entry(wim->lookup_table, lte_zero_out_refcnt, NULL);
1896                 wim->private = ctx;
1897                 ret = for_image(wim, image, image_find_streams_to_write);
1898                 if (ret)
1899                         return ret;
1900         }
1901
1902         list_for_each_entry(lte, &ctx->stream_list, write_streams_list)
1903                 lte->part_number = wim->hdr.part_number;
1904         return 0;
1905 }
1906
1907 static int
1908 process_filtered_stream(struct wim_lookup_table_entry *lte, void *_ctx)
1909 {
1910         struct find_streams_ctx *ctx = _ctx;
1911         u16 filtered = 0;
1912
1913         /* Calculate and set lte->filtered.  */
1914         if (lte->resource_location == RESOURCE_IN_WIM) {
1915                 if (lte->wim == ctx->wim &&
1916                     (ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE))
1917                         filtered |= FILTERED_SAME_WIM;
1918                 if (lte->wim != ctx->wim &&
1919                     (ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS))
1920                         filtered |= FILTERED_EXTERNAL_WIM;
1921         }
1922         lte->filtered = filtered;
1923
1924         /* Filtered streams get inserted into the stream size table too, unless
1925          * they already were.  This is because streams that are checksummed
1926          * on-the-fly during the write should not be written if they are
1927          * duplicates of filtered stream.  */
1928         if (lte->filtered && lte->out_refcnt == 0)
1929                 stream_size_table_insert(lte, &ctx->stream_size_tab);
1930         return 0;
1931 }
1932
1933 static int
1934 mark_stream_not_filtered(struct wim_lookup_table_entry *lte, void *_ignore)
1935 {
1936         lte->filtered = 0;
1937         return 0;
1938 }
1939
1940 /* Given the list of streams to include in a logical write of a WIM, handle
1941  * filtering out streams already present in the WIM or already present in
1942  * external WIMs, depending on the write flags provided.  */
1943 static void
1944 handle_stream_filtering(struct find_streams_ctx *ctx)
1945 {
1946         struct wim_lookup_table_entry *lte, *tmp;
1947
1948         if (!(ctx->write_flags & (WIMLIB_WRITE_FLAG_OVERWRITE |
1949                                   WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS)))
1950         {
1951                 for_lookup_table_entry(ctx->wim->lookup_table,
1952                                        mark_stream_not_filtered, ctx);
1953                 return;
1954         }
1955
1956         for_lookup_table_entry(ctx->wim->lookup_table,
1957                                process_filtered_stream, ctx);
1958
1959         /* Streams in logical write list that were filtered can be removed.  */
1960         list_for_each_entry_safe(lte, tmp, &ctx->stream_list,
1961                                  write_streams_list)
1962                 if (lte->filtered)
1963                         list_del(&lte->write_streams_list);
1964 }
1965
1966 /* Prepares list of streams to write for the specified WIM image(s).  This wraps
1967  * around prepare_logical_stream_list() to handle filtering out streams already
1968  * present in the WIM or already present in external WIMs, depending on the
1969  * write flags provided.
1970  *
1971  * Note: some additional data is stored in each `struct wim_lookup_table_entry':
1972  *
1973  * - 'out_refcnt' is set to the number of references found for the logical write.
1974  *    This will be nonzero on all streams in the list returned by this function,
1975  *    but will also be nonzero on streams not in the list that were included in
1976  *    the logical write list, but filtered out from the returned list.
1977  * - 'filtered' is set to nonzero if the stream was filtered.  Filtered streams
1978  *   are not included in the list of streams returned by this function.
1979  * - 'unique_size' is set if the stream has a unique size among all streams in
1980  *   the logical write plus any filtered streams in the entire WIM that could
1981  *   potentially turn out to have the same checksum as a yet-to-be-checksummed
1982  *   stream being written.
1983  */
1984 static int
1985 prepare_stream_list(WIMStruct *wim, int image, int write_flags,
1986                     struct list_head *stream_list)
1987 {
1988         int ret;
1989         bool streams_ok;
1990         struct find_streams_ctx ctx;
1991
1992         INIT_LIST_HEAD(&ctx.stream_list);
1993         ret = init_stream_size_table(&ctx.stream_size_tab,
1994                                      wim->lookup_table->capacity);
1995         if (ret)
1996                 return ret;
1997         ctx.write_flags = write_flags;
1998         ctx.wim = wim;
1999
2000         streams_ok = ((write_flags & WIMLIB_WRITE_FLAG_STREAMS_OK) != 0);
2001
2002         ret = prepare_logical_stream_list(wim, image, streams_ok, &ctx);
2003         if (ret)
2004                 goto out_destroy_table;
2005
2006         handle_stream_filtering(&ctx);
2007         list_transfer(&ctx.stream_list, stream_list);
2008         ret = 0;
2009 out_destroy_table:
2010         destroy_stream_size_table(&ctx.stream_size_tab);
2011         return ret;
2012 }
2013
2014 static int
2015 write_wim_streams(WIMStruct *wim, int image, int write_flags,
2016                   unsigned num_threads,
2017                   wimlib_progress_func_t progress_func,
2018                   struct list_head *stream_list_override)
2019 {
2020         int ret;
2021         struct list_head _stream_list;
2022         struct list_head *stream_list;
2023         struct wim_lookup_table_entry *lte;
2024
2025         if (stream_list_override == NULL) {
2026                 /* Normal case: prepare stream list from image(s) being written.
2027                  */
2028                 stream_list = &_stream_list;
2029                 ret = prepare_stream_list(wim, image, write_flags, stream_list);
2030                 if (ret)
2031                         return ret;
2032         } else {
2033                 /* Currently only as a result of wimlib_split() being called:
2034                  * use stream list already explicitly provided.  Use existing
2035                  * reference counts.  */
2036                 stream_list = stream_list_override;
2037                 list_for_each_entry(lte, stream_list, write_streams_list) {
2038                         lte->out_refcnt = (lte->refcnt ? lte->refcnt : 1);
2039                         lte->part_number = wim->hdr.part_number;
2040                 }
2041         }
2042
2043         return write_stream_list(stream_list,
2044                                  wim->lookup_table,
2045                                  &wim->out_fd,
2046                                  wim->compression_type,
2047                                  &wim->lzx_context,
2048                                  write_flags,
2049                                  num_threads,
2050                                  progress_func);
2051 }
2052
2053 static int
2054 write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags,
2055                              wimlib_progress_func_t progress_func)
2056 {
2057         int ret;
2058         int start_image;
2059         int end_image;
2060         int write_resource_flags;
2061
2062         if (write_flags & WIMLIB_WRITE_FLAG_NO_METADATA) {
2063                 DEBUG("Not writing any metadata resources.");
2064                 return 0;
2065         }
2066
2067         write_resource_flags = write_flags_to_resource_flags(write_flags);
2068
2069         DEBUG("Writing metadata resources (offset=%"PRIu64")",
2070               wim->out_fd.offset);
2071
2072         if (progress_func)
2073                 progress_func(WIMLIB_PROGRESS_MSG_WRITE_METADATA_BEGIN, NULL);
2074
2075         if (image == WIMLIB_ALL_IMAGES) {
2076                 start_image = 1;
2077                 end_image = wim->hdr.image_count;
2078         } else {
2079                 start_image = image;
2080                 end_image = image;
2081         }
2082
2083         for (int i = start_image; i <= end_image; i++) {
2084                 struct wim_image_metadata *imd;
2085
2086                 imd = wim->image_metadata[i - 1];
2087                 /* Build a new metadata resource only if image was modified from
2088                  * the original (or was newly added).  Otherwise just copy the
2089                  * existing one.  */
2090                 if (imd->modified) {
2091                         DEBUG("Image %u was modified; building and writing new "
2092                               "metadata resource", i);
2093                         ret = write_metadata_resource(wim, i,
2094                                                       write_resource_flags);
2095                 } else if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE) {
2096                         DEBUG("Image %u was not modified; re-using existing "
2097                               "metadata resource.", i);
2098                         copy_resource_entry(&imd->metadata_lte->output_resource_entry,
2099                                             &imd->metadata_lte->resource_entry);
2100                         ret = 0;
2101                 } else {
2102                         DEBUG("Image %u was not modified; copying existing "
2103                               "metadata resource.", i);
2104                         ret = write_wim_resource(imd->metadata_lte,
2105                                                  &wim->out_fd,
2106                                                  wim->compression_type,
2107                                                  &imd->metadata_lte->output_resource_entry,
2108                                                  write_resource_flags,
2109                                                  &wim->lzx_context);
2110                 }
2111                 if (ret)
2112                         return ret;
2113         }
2114         if (progress_func)
2115                 progress_func(WIMLIB_PROGRESS_MSG_WRITE_METADATA_END, NULL);
2116         return 0;
2117 }
2118
2119 static int
2120 open_wim_writable(WIMStruct *wim, const tchar *path, int open_flags)
2121 {
2122         int raw_fd;
2123         DEBUG("Opening \"%"TS"\" for writing.", path);
2124
2125         raw_fd = topen(path, open_flags | O_BINARY, 0644);
2126         if (raw_fd < 0) {
2127                 ERROR_WITH_ERRNO("Failed to open \"%"TS"\" for writing", path);
2128                 return WIMLIB_ERR_OPEN;
2129         }
2130         filedes_init(&wim->out_fd, raw_fd);
2131         return 0;
2132 }
2133
2134 static int
2135 close_wim_writable(WIMStruct *wim, int write_flags)
2136 {
2137         int ret = 0;
2138
2139         if (!(write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR))
2140                 if (filedes_valid(&wim->out_fd))
2141                         if (filedes_close(&wim->out_fd))
2142                                 ret = WIMLIB_ERR_WRITE;
2143         filedes_invalidate(&wim->out_fd);
2144         return ret;
2145 }
2146
2147 /*
2148  * finish_write():
2149  *
2150  * Finish writing a WIM file: write the lookup table, xml data, and integrity
2151  * table, then overwrite the WIM header.  By default, closes the WIM file
2152  * descriptor (@wim->out_fd) if successful.
2153  *
2154  * write_flags is a bitwise OR of the following:
2155  *
2156  *      (public) WIMLIB_WRITE_FLAG_CHECK_INTEGRITY:
2157  *              Include an integrity table.
2158  *
2159  *      (public) WIMLIB_WRITE_FLAG_FSYNC:
2160  *              fsync() the output file before closing it.
2161  *
2162  *      (public) WIMLIB_WRITE_FLAG_PIPABLE:
2163  *              Writing a pipable WIM, possibly to a pipe; include pipable WIM
2164  *              stream headers before the lookup table and XML data, and also
2165  *              write the WIM header at the end instead of seeking to the
2166  *              beginning.  Can't be combined with
2167  *              WIMLIB_WRITE_FLAG_CHECK_INTEGRITY.
2168  *
2169  *      (private) WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE:
2170  *              Don't write the lookup table.
2171  *
2172  *      (private) WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE:
2173  *              When (if) writing the integrity table, re-use entries from the
2174  *              existing integrity table, if possible.
2175  *
2176  *      (private) WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML:
2177  *              After writing the XML data but before writing the integrity
2178  *              table, write a temporary WIM header and flush the stream so that
2179  *              the WIM is less likely to become corrupted upon abrupt program
2180  *              termination.
2181  *      (private) WIMLIB_WRITE_FLAG_HEADER_AT_END:
2182  *              Instead of overwriting the WIM header at the beginning of the
2183  *              file, simply append it to the end of the file.  (Used when
2184  *              writing to pipe.)
2185  *      (private) WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR:
2186  *              Do not close the file descriptor @wim->out_fd on either success
2187  *              on failure.
2188  *      (private) WIMLIB_WRITE_FLAG_USE_EXISTING_TOTALBYTES:
2189  *              Use the existing <TOTALBYTES> stored in the in-memory XML
2190  *              information, rather than setting it to the offset of the XML
2191  *              data being written.
2192  */
2193 static int
2194 finish_write(WIMStruct *wim, int image, int write_flags,
2195              wimlib_progress_func_t progress_func,
2196              struct list_head *stream_list_override)
2197 {
2198         int ret;
2199         off_t hdr_offset;
2200         int write_resource_flags;
2201         off_t old_lookup_table_end;
2202         off_t new_lookup_table_end;
2203         u64 xml_totalbytes;
2204
2205         write_resource_flags = write_flags_to_resource_flags(write_flags);
2206
2207         /* In the WIM header, there is room for the resource entry for a
2208          * metadata resource labeled as the "boot metadata".  This entry should
2209          * be zeroed out if there is no bootable image (boot_idx 0).  Otherwise,
2210          * it should be a copy of the resource entry for the image that is
2211          * marked as bootable.  This is not well documented...  */
2212         if (wim->hdr.boot_idx == 0) {
2213                 zero_resource_entry(&wim->hdr.boot_metadata_res_entry);
2214         } else {
2215                 copy_resource_entry(&wim->hdr.boot_metadata_res_entry,
2216                             &wim->image_metadata[wim->hdr.boot_idx- 1
2217                                         ]->metadata_lte->output_resource_entry);
2218         }
2219
2220         /* Write lookup table.  (Save old position first.)  */
2221         old_lookup_table_end = wim->hdr.lookup_table_res_entry.offset +
2222                                wim->hdr.lookup_table_res_entry.size;
2223         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
2224                 ret = write_wim_lookup_table(wim, image, write_flags,
2225                                              &wim->hdr.lookup_table_res_entry,
2226                                              stream_list_override);
2227                 if (ret)
2228                         return ret;
2229         }
2230
2231         /* Write XML data.  */
2232         xml_totalbytes = wim->out_fd.offset;
2233         if (write_flags & WIMLIB_WRITE_FLAG_USE_EXISTING_TOTALBYTES)
2234                 xml_totalbytes = WIM_TOTALBYTES_USE_EXISTING;
2235         ret = write_wim_xml_data(wim, image, xml_totalbytes,
2236                                  &wim->hdr.xml_res_entry,
2237                                  write_resource_flags);
2238         if (ret)
2239                 return ret;
2240
2241         /* Write integrity table (optional).  */
2242         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
2243                 if (write_flags & WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) {
2244                         struct wim_header checkpoint_hdr;
2245                         memcpy(&checkpoint_hdr, &wim->hdr, sizeof(struct wim_header));
2246                         zero_resource_entry(&checkpoint_hdr.integrity);
2247                         checkpoint_hdr.flags |= WIM_HDR_FLAG_WRITE_IN_PROGRESS;
2248                         ret = write_wim_header_at_offset(&checkpoint_hdr,
2249                                                          &wim->out_fd, 0);
2250                         if (ret)
2251                                 return ret;
2252                 }
2253
2254                 if (!(write_flags & WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE))
2255                         old_lookup_table_end = 0;
2256
2257                 new_lookup_table_end = wim->hdr.lookup_table_res_entry.offset +
2258                                        wim->hdr.lookup_table_res_entry.size;
2259
2260                 ret = write_integrity_table(wim,
2261                                             new_lookup_table_end,
2262                                             old_lookup_table_end,
2263                                             progress_func);
2264                 if (ret)
2265                         return ret;
2266         } else {
2267                 /* No integrity table.  */
2268                 zero_resource_entry(&wim->hdr.integrity);
2269         }
2270
2271         /* Now that all information in the WIM header has been determined, the
2272          * preliminary header written earlier can be overwritten, the header of
2273          * the existing WIM file can be overwritten, or the final header can be
2274          * written to the end of the pipable WIM.  */
2275         wim->hdr.flags &= ~WIM_HDR_FLAG_WRITE_IN_PROGRESS;
2276         hdr_offset = 0;
2277         if (write_flags & WIMLIB_WRITE_FLAG_HEADER_AT_END)
2278                 hdr_offset = wim->out_fd.offset;
2279         ret = write_wim_header_at_offset(&wim->hdr, &wim->out_fd, hdr_offset);
2280         if (ret)
2281                 return ret;
2282
2283         /* Possibly sync file data to disk before closing.  On POSIX systems, it
2284          * is necessary to do this before using rename() to overwrite an
2285          * existing file with a new file.  Otherwise, data loss would occur if
2286          * the system is abruptly terminated when the metadata for the rename
2287          * operation has been written to disk, but the new file data has not.
2288          */
2289         if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) {
2290                 if (fsync(wim->out_fd.fd)) {
2291                         ERROR_WITH_ERRNO("Error syncing data to WIM file");
2292                         return WIMLIB_ERR_WRITE;
2293                 }
2294         }
2295
2296         if (close_wim_writable(wim, write_flags)) {
2297                 ERROR_WITH_ERRNO("Failed to close the output WIM file");
2298                 return WIMLIB_ERR_WRITE;
2299         }
2300
2301         return 0;
2302 }
2303
2304 #if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK)
2305 int
2306 lock_wim(WIMStruct *wim, int fd)
2307 {
2308         int ret = 0;
2309         if (fd != -1 && !wim->wim_locked) {
2310                 ret = flock(fd, LOCK_EX | LOCK_NB);
2311                 if (ret != 0) {
2312                         if (errno == EWOULDBLOCK) {
2313                                 ERROR("`%"TS"' is already being modified or has been "
2314                                       "mounted read-write\n"
2315                                       "        by another process!", wim->filename);
2316                                 ret = WIMLIB_ERR_ALREADY_LOCKED;
2317                         } else {
2318                                 WARNING_WITH_ERRNO("Failed to lock `%"TS"'",
2319                                                    wim->filename);
2320                                 ret = 0;
2321                         }
2322                 } else {
2323                         wim->wim_locked = 1;
2324                 }
2325         }
2326         return ret;
2327 }
2328 #endif
2329
2330 /*
2331  * write_pipable_wim():
2332  *
2333  * Perform the intermediate stages of creating a "pipable" WIM (i.e. a WIM
2334  * capable of being applied from a pipe).
2335  *
2336  * Pipable WIMs are a wimlib-specific modification of the WIM format such that
2337  * images can be applied from them sequentially when the file data is sent over
2338  * a pipe.  In addition, a pipable WIM can be written sequentially to a pipe.
2339  * The modifications made to the WIM format for pipable WIMs are:
2340  *
2341  * - Magic characters in header are "WLPWM\0\0\0" (wimlib pipable WIM) instead
2342  *   of "MSWIM\0\0\0".  This lets wimlib know that the WIM is pipable and also
2343  *   stops other software from trying to read the file as a normal WIM.
2344  *
2345  * - The header at the beginning of the file does not contain all the normal
2346  *   information; in particular it will have all 0's for the lookup table and
2347  *   XML data resource entries.  This is because this information cannot be
2348  *   determined until the lookup table and XML data have been written.
2349  *   Consequently, wimlib will write the full header at the very end of the
2350  *   file.  The header at the end, however, is only used when reading the WIM
2351  *   from a seekable file (not a pipe).
2352  *
2353  * - An extra copy of the XML data is placed directly after the header.  This
2354  *   allows image names and sizes to be determined at an appropriate time when
2355  *   reading the WIM from a pipe.  This copy of the XML data is ignored if the
2356  *   WIM is read from a seekable file (not a pipe).
2357  *
2358  * - The format of resources, or streams, has been modified to allow them to be
2359  *   used before the "lookup table" has been read.  Each stream is prefixed with
2360  *   a `struct pwm_stream_hdr' that is basically an abbreviated form of `struct
2361  *   wim_lookup_table_entry_disk' that only contains the SHA1 message digest,
2362  *   uncompressed stream size, and flags that indicate whether the stream is
2363  *   compressed.  The data of uncompressed streams then follows literally, while
2364  *   the data of compressed streams follows in a modified format.  Compressed
2365  *   streams do not begin with a chunk table, since the chunk table cannot be
2366  *   written until all chunks have been compressed.  Instead, each compressed
2367  *   chunk is prefixed by a `struct pwm_chunk_hdr' that gives its size.
2368  *   Furthermore, the chunk table is written at the end of the resource instead
2369  *   of the start.  Note: chunk offsets are given in the chunk table as if the
2370  *   `struct pwm_chunk_hdr's were not present; also, the chunk table is only
2371  *   used if the WIM is being read from a seekable file (not a pipe).
2372  *
2373  * - Metadata resources always come before other file resources (streams).
2374  *   (This does not by itself constitute an incompatibility with normal WIMs,
2375  *   since this is valid in normal WIMs.)
2376  *
2377  * - At least up to the end of the file resources, all components must be packed
2378  *   as tightly as possible; there cannot be any "holes" in the WIM.  (This does
2379  *   not by itself consititute an incompatibility with normal WIMs, since this
2380  *   is valid in normal WIMs.)
2381  *
2382  * Note: the lookup table, XML data, and header at the end are not used when
2383  * applying from a pipe.  They exist to support functionality such as image
2384  * application and export when the WIM is *not* read from a pipe.
2385  *
2386  *   Layout of pipable WIM:
2387  *
2388  * ---------+----------+--------------------+----------------+--------------+-----------+--------+
2389  * | Header | XML data | Metadata resources | File resources | Lookup table | XML data  | Header |
2390  * ---------+----------+--------------------+----------------+--------------+-----------+--------+
2391  *
2392  *   Layout of normal WIM:
2393  *
2394  * +--------+-----------------------------+-------------------------+
2395  * | Header | File and metadata resources | Lookup table | XML data |
2396  * +--------+-----------------------------+-------------------------+
2397  *
2398  * An optional integrity table can follow the final XML data in both normal and
2399  * pipable WIMs.  However, due to implementation details, wimlib currently can
2400  * only include an integrity table in a pipable WIM when writing it to a
2401  * seekable file (not a pipe).
2402  *
2403  * Do note that since pipable WIMs are not supported by Microsoft's software,
2404  * wimlib does not create them unless explicitly requested (with
2405  * WIMLIB_WRITE_FLAG_PIPABLE) and as stated above they use different magic
2406  * characters to identify the file.
2407  */
2408 static int
2409 write_pipable_wim(WIMStruct *wim, int image, int write_flags,
2410                   unsigned num_threads, wimlib_progress_func_t progress_func,
2411                   struct list_head *stream_list_override)
2412 {
2413         int ret;
2414         struct resource_entry xml_res_entry;
2415
2416         WARNING("Creating a pipable WIM, which will "
2417                 "be incompatible\n"
2418                 "          with Microsoft's software (wimgapi/imagex/Dism).");
2419
2420         /* At this point, the header at the beginning of the file has already
2421          * been written.  */
2422
2423         /* For efficiency, when wimlib adds an image to the WIM with
2424          * wimlib_add_image(), the SHA1 message digests of files is not
2425          * calculated; instead, they are calculated while the files are being
2426          * written.  However, this does not work when writing a pipable WIM,
2427          * since when writing a stream to a pipable WIM, its SHA1 message digest
2428          * needs to be known before the stream data is written.  Therefore,
2429          * before getting much farther, we need to pre-calculate the SHA1
2430          * message digests of all streams that will be written.  */
2431         ret = wim_checksum_unhashed_streams(wim);
2432         if (ret)
2433                 return ret;
2434
2435         /* Write extra copy of the XML data.  */
2436         ret = write_wim_xml_data(wim, image, WIM_TOTALBYTES_OMIT,
2437                                  &xml_res_entry,
2438                                  WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE);
2439         if (ret)
2440                 return ret;
2441
2442         /* Write metadata resources for the image(s) being included in the
2443          * output WIM.  */
2444         ret = write_wim_metadata_resources(wim, image, write_flags,
2445                                            progress_func);
2446         if (ret)
2447                 return ret;
2448
2449         /* Write streams needed for the image(s) being included in the output
2450          * WIM, or streams needed for the split WIM part.  */
2451         return write_wim_streams(wim, image, write_flags, num_threads,
2452                                  progress_func, stream_list_override);
2453
2454         /* The lookup table, XML data, and header at end are handled by
2455          * finish_write().  */
2456 }
2457
2458 /* Write a standalone WIM or split WIM (SWM) part to a new file or to a file
2459  * descriptor.  */
2460 int
2461 write_wim_part(WIMStruct *wim,
2462                const void *path_or_fd,
2463                int image,
2464                int write_flags,
2465                unsigned num_threads,
2466                wimlib_progress_func_t progress_func,
2467                unsigned part_number,
2468                unsigned total_parts,
2469                struct list_head *stream_list_override,
2470                const u8 *guid)
2471 {
2472         int ret;
2473         struct wim_header hdr_save;
2474         struct list_head lt_stream_list_override;
2475
2476         if (total_parts == 1)
2477                 DEBUG("Writing standalone WIM.");
2478         else
2479                 DEBUG("Writing split WIM part %u/%u", part_number, total_parts);
2480         if (image == WIMLIB_ALL_IMAGES)
2481                 DEBUG("Including all images.");
2482         else
2483                 DEBUG("Including image %d only.", image);
2484         if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR)
2485                 DEBUG("File descriptor: %d", *(const int*)path_or_fd);
2486         else
2487                 DEBUG("Path: \"%"TS"\"", (const tchar*)path_or_fd);
2488         DEBUG("Write flags: 0x%08x", write_flags);
2489         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
2490                 DEBUG("\tCHECK_INTEGRITY");
2491         if (write_flags & WIMLIB_WRITE_FLAG_REBUILD)
2492                 DEBUG("\tREBUILD");
2493         if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
2494                 DEBUG("\tRECOMPRESS");
2495         if (write_flags & WIMLIB_WRITE_FLAG_FSYNC)
2496                 DEBUG("\tFSYNC");
2497         if (write_flags & WIMLIB_WRITE_FLAG_SOFT_DELETE)
2498                 DEBUG("\tFSYNC");
2499         if (write_flags & WIMLIB_WRITE_FLAG_IGNORE_READONLY_FLAG)
2500                 DEBUG("\tIGNORE_READONLY_FLAG");
2501         if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)
2502                 DEBUG("\tPIPABLE");
2503         if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR)
2504                 DEBUG("\tFILE_DESCRIPTOR");
2505         if (write_flags & WIMLIB_WRITE_FLAG_NO_METADATA)
2506                 DEBUG("\tNO_METADATA");
2507         if (write_flags & WIMLIB_WRITE_FLAG_USE_EXISTING_TOTALBYTES)
2508                 DEBUG("\tUSE_EXISTING_TOTALBYTES");
2509         if (num_threads == 0)
2510                 DEBUG("Number of threads: autodetect");
2511         else
2512                 DEBUG("Number of threads: %u", num_threads);
2513         DEBUG("Progress function: %s", (progress_func ? "yes" : "no"));
2514         DEBUG("Stream list:       %s", (stream_list_override ? "specified" : "autodetect"));
2515         DEBUG("GUID:              %s", ((guid || wim->guid_set_explicitly) ?
2516                                         "specified" : "generate new"));
2517
2518         /* Internally, this is always called with a valid part number and total
2519          * parts.  */
2520         wimlib_assert(total_parts >= 1);
2521         wimlib_assert(part_number >= 1 && part_number <= total_parts);
2522
2523         /* A valid image (or all images) must be specified.  */
2524         if (image != WIMLIB_ALL_IMAGES &&
2525              (image < 1 || image > wim->hdr.image_count))
2526                 return WIMLIB_ERR_INVALID_IMAGE;
2527
2528         /* If we need to write metadata resources, make sure the ::WIMStruct has
2529          * the needed information attached (e.g. is not a resource-only WIM,
2530          * such as a non-first part of a split WIM).  */
2531         if (!wim_has_metadata(wim) &&
2532             !(write_flags & WIMLIB_WRITE_FLAG_NO_METADATA))
2533                 return WIMLIB_ERR_METADATA_NOT_FOUND;
2534
2535         /* Check for contradictory flags.  */
2536         if ((write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY |
2537                             WIMLIB_WRITE_FLAG_NO_CHECK_INTEGRITY))
2538                                 == (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY |
2539                                     WIMLIB_WRITE_FLAG_NO_CHECK_INTEGRITY))
2540                 return WIMLIB_ERR_INVALID_PARAM;
2541
2542         if ((write_flags & (WIMLIB_WRITE_FLAG_PIPABLE |
2543                             WIMLIB_WRITE_FLAG_NOT_PIPABLE))
2544                                 == (WIMLIB_WRITE_FLAG_PIPABLE |
2545                                     WIMLIB_WRITE_FLAG_NOT_PIPABLE))
2546                 return WIMLIB_ERR_INVALID_PARAM;
2547
2548         /* Save previous header, then start initializing the new one.  */
2549         memcpy(&hdr_save, &wim->hdr, sizeof(struct wim_header));
2550
2551         /* Set default integrity and pipable flags.  */
2552         if (!(write_flags & (WIMLIB_WRITE_FLAG_PIPABLE |
2553                              WIMLIB_WRITE_FLAG_NOT_PIPABLE)))
2554                 if (wim_is_pipable(wim))
2555                         write_flags |= WIMLIB_WRITE_FLAG_PIPABLE;
2556
2557         if (!(write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY |
2558                              WIMLIB_WRITE_FLAG_NO_CHECK_INTEGRITY)))
2559                 if (wim_has_integrity_table(wim))
2560                         write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY;
2561
2562         /* Set appropriate magic number.  */
2563         if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)
2564                 wim->hdr.magic = PWM_MAGIC;
2565         else
2566                 wim->hdr.magic = WIM_MAGIC;
2567
2568         /* Clear header flags that will be set automatically.  */
2569         wim->hdr.flags &= ~(WIM_HDR_FLAG_METADATA_ONLY          |
2570                             WIM_HDR_FLAG_RESOURCE_ONLY          |
2571                             WIM_HDR_FLAG_SPANNED                |
2572                             WIM_HDR_FLAG_WRITE_IN_PROGRESS);
2573
2574         /* Set SPANNED header flag if writing part of a split WIM.  */
2575         if (total_parts != 1)
2576                 wim->hdr.flags |= WIM_HDR_FLAG_SPANNED;
2577
2578         /* Set part number and total parts of split WIM.  This will be 1 and 1
2579          * if the WIM is standalone.  */
2580         wim->hdr.part_number = part_number;
2581         wim->hdr.total_parts = total_parts;
2582
2583         /* Use GUID if specified; otherwise generate a new one.  */
2584         if (guid)
2585                 memcpy(wim->hdr.guid, guid, WIMLIB_GUID_LEN);
2586         else if (!wim->guid_set_explicitly)
2587                 randomize_byte_array(wim->hdr.guid, WIMLIB_GUID_LEN);
2588
2589         /* Clear references to resources that have not been written yet.  */
2590         zero_resource_entry(&wim->hdr.lookup_table_res_entry);
2591         zero_resource_entry(&wim->hdr.xml_res_entry);
2592         zero_resource_entry(&wim->hdr.boot_metadata_res_entry);
2593         zero_resource_entry(&wim->hdr.integrity);
2594
2595         /* Set image count and boot index correctly for single image writes.  */
2596         if (image != WIMLIB_ALL_IMAGES) {
2597                 wim->hdr.image_count = 1;
2598                 if (wim->hdr.boot_idx == image)
2599                         wim->hdr.boot_idx = 1;
2600                 else
2601                         wim->hdr.boot_idx = 0;
2602         }
2603
2604         /* Split WIMs can't be bootable.  */
2605         if (total_parts != 1)
2606                 wim->hdr.boot_idx = 0;
2607
2608         /* Initialize output file descriptor.  */
2609         if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR) {
2610                 /* File descriptor was explicitly provided.  Return error if
2611                  * file descriptor is not seekable, unless writing a pipable WIM
2612                  * was requested.  */
2613                 wim->out_fd.fd = *(const int*)path_or_fd;
2614                 wim->out_fd.offset = 0;
2615                 if (!filedes_is_seekable(&wim->out_fd)) {
2616                         ret = WIMLIB_ERR_INVALID_PARAM;
2617                         if (!(write_flags & WIMLIB_WRITE_FLAG_PIPABLE))
2618                                 goto out_restore_hdr;
2619                         if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
2620                                 ERROR("Can't include integrity check when "
2621                                       "writing pipable WIM to pipe!");
2622                                 goto out_restore_hdr;
2623                         }
2624                 }
2625
2626         } else {
2627                 /* Filename of WIM to write was provided; open file descriptor
2628                  * to it.  */
2629                 ret = open_wim_writable(wim, (const tchar*)path_or_fd,
2630                                         O_TRUNC | O_CREAT | O_RDWR);
2631                 if (ret)
2632                         goto out_restore_hdr;
2633         }
2634
2635         /* Write initial header.  This is merely a "dummy" header since it
2636          * doesn't have all the information yet, so it will be overwritten later
2637          * (unless writing a pipable WIM).  */
2638         if (!(write_flags & WIMLIB_WRITE_FLAG_PIPABLE))
2639                 wim->hdr.flags |= WIM_HDR_FLAG_WRITE_IN_PROGRESS;
2640         ret = write_wim_header(&wim->hdr, &wim->out_fd);
2641         wim->hdr.flags &= ~WIM_HDR_FLAG_WRITE_IN_PROGRESS;
2642         if (ret)
2643                 goto out_restore_hdr;
2644
2645         if (stream_list_override) {
2646                 struct wim_lookup_table_entry *lte;
2647                 INIT_LIST_HEAD(&lt_stream_list_override);
2648                 list_for_each_entry(lte, stream_list_override,
2649                                     write_streams_list)
2650                 {
2651                         list_add_tail(&lte->lookup_table_list,
2652                                       &lt_stream_list_override);
2653                 }
2654         }
2655
2656         /* Write metadata resources and streams.  */
2657         if (!(write_flags & WIMLIB_WRITE_FLAG_PIPABLE)) {
2658                 /* Default case: create a normal (non-pipable) WIM.  */
2659                 ret = write_wim_streams(wim, image, write_flags, num_threads,
2660                                         progress_func, stream_list_override);
2661                 if (ret)
2662                         goto out_restore_hdr;
2663
2664                 ret = write_wim_metadata_resources(wim, image, write_flags,
2665                                                    progress_func);
2666                 if (ret)
2667                         goto out_restore_hdr;
2668         } else {
2669                 /* Non-default case: create pipable WIM.  */
2670                 ret = write_pipable_wim(wim, image, write_flags, num_threads,
2671                                         progress_func, stream_list_override);
2672                 if (ret)
2673                         goto out_restore_hdr;
2674                 write_flags |= WIMLIB_WRITE_FLAG_HEADER_AT_END;
2675         }
2676
2677         if (stream_list_override)
2678                 stream_list_override = &lt_stream_list_override;
2679
2680         /* Write lookup table, XML data, and (optional) integrity table.  */
2681         ret = finish_write(wim, image, write_flags, progress_func,
2682                            stream_list_override);
2683 out_restore_hdr:
2684         memcpy(&wim->hdr, &hdr_save, sizeof(struct wim_header));
2685         (void)close_wim_writable(wim, write_flags);
2686         return ret;
2687 }
2688
2689 /* Write a standalone WIM to a file or file descriptor.  */
2690 static int
2691 write_standalone_wim(WIMStruct *wim, const void *path_or_fd,
2692                      int image, int write_flags, unsigned num_threads,
2693                      wimlib_progress_func_t progress_func)
2694 {
2695         return write_wim_part(wim, path_or_fd, image, write_flags,
2696                               num_threads, progress_func, 1, 1, NULL, NULL);
2697 }
2698
2699 /* API function documented in wimlib.h  */
2700 WIMLIBAPI int
2701 wimlib_write(WIMStruct *wim, const tchar *path,
2702              int image, int write_flags, unsigned num_threads,
2703              wimlib_progress_func_t progress_func)
2704 {
2705         if (!path)
2706                 return WIMLIB_ERR_INVALID_PARAM;
2707
2708         write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
2709
2710         return write_standalone_wim(wim, path, image, write_flags,
2711                                     num_threads, progress_func);
2712 }
2713
2714 /* API function documented in wimlib.h  */
2715 WIMLIBAPI int
2716 wimlib_write_to_fd(WIMStruct *wim, int fd,
2717                    int image, int write_flags, unsigned num_threads,
2718                    wimlib_progress_func_t progress_func)
2719 {
2720         if (fd < 0)
2721                 return WIMLIB_ERR_INVALID_PARAM;
2722
2723         write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
2724         write_flags |= WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR;
2725
2726         return write_standalone_wim(wim, &fd, image, write_flags,
2727                                     num_threads, progress_func);
2728 }
2729
2730 static bool
2731 any_images_modified(WIMStruct *wim)
2732 {
2733         for (int i = 0; i < wim->hdr.image_count; i++)
2734                 if (wim->image_metadata[i]->modified)
2735                         return true;
2736         return false;
2737 }
2738
2739 static int
2740 check_resource_offset(struct wim_lookup_table_entry *lte, void *_wim)
2741 {
2742         const WIMStruct *wim = _wim;
2743         off_t end_offset = *(const off_t*)wim->private;
2744
2745         if (lte->resource_location == RESOURCE_IN_WIM && lte->wim == wim &&
2746             lte->resource_entry.offset + lte->resource_entry.size > end_offset)
2747                 return WIMLIB_ERR_RESOURCE_ORDER;
2748         return 0;
2749 }
2750
2751 /* Make sure no file or metadata resources are located after the XML data (or
2752  * integrity table if present)--- otherwise we can't safely overwrite the WIM in
2753  * place and we return WIMLIB_ERR_RESOURCE_ORDER.  */
2754 static int
2755 check_resource_offsets(WIMStruct *wim, off_t end_offset)
2756 {
2757         int ret;
2758         unsigned i;
2759
2760         wim->private = &end_offset;
2761         ret = for_lookup_table_entry(wim->lookup_table, check_resource_offset, wim);
2762         if (ret)
2763                 return ret;
2764
2765         for (i = 0; i < wim->hdr.image_count; i++) {
2766                 ret = check_resource_offset(wim->image_metadata[i]->metadata_lte, wim);
2767                 if (ret)
2768                         return ret;
2769         }
2770         return 0;
2771 }
2772
2773 /*
2774  * Overwrite a WIM, possibly appending streams to it.
2775  *
2776  * A WIM looks like (or is supposed to look like) the following:
2777  *
2778  *                   Header (212 bytes)
2779  *                   Streams and metadata resources (variable size)
2780  *                   Lookup table (variable size)
2781  *                   XML data (variable size)
2782  *                   Integrity table (optional) (variable size)
2783  *
2784  * If we are not adding any streams or metadata resources, the lookup table is
2785  * unchanged--- so we only need to overwrite the XML data, integrity table, and
2786  * header.  This operation is potentially unsafe if the program is abruptly
2787  * terminated while the XML data or integrity table are being overwritten, but
2788  * before the new header has been written.  To partially alleviate this problem,
2789  * a special flag (WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML) is passed to
2790  * finish_write() to cause a temporary WIM header to be written after the XML
2791  * data has been written.  This may prevent the WIM from becoming corrupted if
2792  * the program is terminated while the integrity table is being calculated (but
2793  * no guarantees, due to write re-ordering...).
2794  *
2795  * If we are adding new streams or images (metadata resources), the lookup table
2796  * needs to be changed, and those streams need to be written.  In this case, we
2797  * try to perform a safe update of the WIM file by writing the streams *after*
2798  * the end of the previous WIM, then writing the new lookup table, XML data, and
2799  * (optionally) integrity table following the new streams.  This will produce a
2800  * layout like the following:
2801  *
2802  *                   Header (212 bytes)
2803  *                   (OLD) Streams and metadata resources (variable size)
2804  *                   (OLD) Lookup table (variable size)
2805  *                   (OLD) XML data (variable size)
2806  *                   (OLD) Integrity table (optional) (variable size)
2807  *                   (NEW) Streams and metadata resources (variable size)
2808  *                   (NEW) Lookup table (variable size)
2809  *                   (NEW) XML data (variable size)
2810  *                   (NEW) Integrity table (optional) (variable size)
2811  *
2812  * At all points, the WIM is valid as nothing points to the new data yet.  Then,
2813  * the header is overwritten to point to the new lookup table, XML data, and
2814  * integrity table, to produce the following layout:
2815  *
2816  *                   Header (212 bytes)
2817  *                   Streams and metadata resources (variable size)
2818  *                   Nothing (variable size)
2819  *                   More Streams and metadata resources (variable size)
2820  *                   Lookup table (variable size)
2821  *                   XML data (variable size)
2822  *                   Integrity table (optional) (variable size)
2823  *
2824  * This method allows an image to be appended to a large WIM very quickly, and
2825  * is is crash-safe except in the case of write re-ordering, but the
2826  * disadvantage is that a small hole is left in the WIM where the old lookup
2827  * table, xml data, and integrity table were.  (These usually only take up a
2828  * small amount of space compared to the streams, however.)
2829  */
2830 static int
2831 overwrite_wim_inplace(WIMStruct *wim, int write_flags,
2832                       unsigned num_threads,
2833                       wimlib_progress_func_t progress_func)
2834 {
2835         int ret;
2836         struct list_head stream_list;
2837         off_t old_wim_end;
2838         u64 old_lookup_table_end, old_xml_begin, old_xml_end;
2839         struct wim_header hdr_save;
2840
2841         DEBUG("Overwriting `%"TS"' in-place", wim->filename);
2842
2843         /* Set default integrity flag.  */
2844         if (!(write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY |
2845                              WIMLIB_WRITE_FLAG_NO_CHECK_INTEGRITY)))
2846                 if (wim_has_integrity_table(wim))
2847                         write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY;
2848
2849         /* Set additional flags for overwrite.  */
2850         write_flags |= WIMLIB_WRITE_FLAG_OVERWRITE |
2851                        WIMLIB_WRITE_FLAG_STREAMS_OK;
2852
2853         /* Make sure that the integrity table (if present) is after the XML
2854          * data, and that there are no stream resources, metadata resources, or
2855          * lookup tables after the XML data.  Otherwise, these data would be
2856          * overwritten. */
2857         old_xml_begin = wim->hdr.xml_res_entry.offset;
2858         old_xml_end = old_xml_begin + wim->hdr.xml_res_entry.size;
2859         old_lookup_table_end = wim->hdr.lookup_table_res_entry.offset +
2860                                wim->hdr.lookup_table_res_entry.size;
2861         if (wim->hdr.integrity.offset != 0 && wim->hdr.integrity.offset < old_xml_end) {
2862                 WARNING("Didn't expect the integrity table to be before the XML data");
2863                 return WIMLIB_ERR_RESOURCE_ORDER;
2864         }
2865
2866         if (old_lookup_table_end > old_xml_begin) {
2867                 WARNING("Didn't expect the lookup table to be after the XML data");
2868                 return WIMLIB_ERR_RESOURCE_ORDER;
2869         }
2870
2871         /* Set @old_wim_end, which indicates the point beyond which we don't
2872          * allow any file and metadata resources to appear without returning
2873          * WIMLIB_ERR_RESOURCE_ORDER (due to the fact that we would otherwise
2874          * overwrite these resources). */
2875         if (!wim->deletion_occurred && !any_images_modified(wim)) {
2876                 /* If no images have been modified and no images have been
2877                  * deleted, a new lookup table does not need to be written.  We
2878                  * shall write the new XML data and optional integrity table
2879                  * immediately after the lookup table.  Note that this may
2880                  * overwrite an existing integrity table. */
2881                 DEBUG("Skipping writing lookup table "
2882                       "(no images modified or deleted)");
2883                 old_wim_end = old_lookup_table_end;
2884                 write_flags |= WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE |
2885                                WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML;
2886         } else if (wim->hdr.integrity.offset) {
2887                 /* Old WIM has an integrity table; begin writing new streams
2888                  * after it. */
2889                 old_wim_end = wim->hdr.integrity.offset + wim->hdr.integrity.size;
2890         } else {
2891                 /* No existing integrity table; begin writing new streams after
2892                  * the old XML data. */
2893                 old_wim_end = old_xml_end;
2894         }
2895
2896         ret = check_resource_offsets(wim, old_wim_end);
2897         if (ret)
2898                 return ret;
2899
2900         ret = prepare_stream_list(wim, WIMLIB_ALL_IMAGES, write_flags,
2901                                   &stream_list);
2902         if (ret)
2903                 return ret;
2904
2905         ret = open_wim_writable(wim, wim->filename, O_RDWR);
2906         if (ret)
2907                 return ret;
2908
2909         ret = lock_wim(wim, wim->out_fd.fd);
2910         if (ret)
2911                 goto out_close_wim;
2912
2913         /* Save original header so it can be restored in case of error  */
2914         memcpy(&hdr_save, &wim->hdr, sizeof(struct wim_header));
2915
2916         /* Set WIM_HDR_FLAG_WRITE_IN_PROGRESS flag in header. */
2917         wim->hdr.flags |= WIM_HDR_FLAG_WRITE_IN_PROGRESS;
2918         ret = write_wim_header_flags(wim->hdr.flags, &wim->out_fd);
2919         if (ret) {
2920                 ERROR_WITH_ERRNO("Error updating WIM header flags");
2921                 goto out_restore_memory_hdr;
2922         }
2923
2924         if (filedes_seek(&wim->out_fd, old_wim_end) == -1) {
2925                 ERROR_WITH_ERRNO("Can't seek to end of WIM");
2926                 ret = WIMLIB_ERR_WRITE;
2927                 goto out_restore_physical_hdr;
2928         }
2929
2930         ret = write_stream_list(&stream_list,
2931                                 wim->lookup_table,
2932                                 &wim->out_fd,
2933                                 wim->compression_type,
2934                                 &wim->lzx_context,
2935                                 write_flags,
2936                                 num_threads,
2937                                 progress_func);
2938         if (ret)
2939                 goto out_truncate;
2940
2941         ret = write_wim_metadata_resources(wim, WIMLIB_ALL_IMAGES,
2942                                            write_flags, progress_func);
2943         if (ret)
2944                 goto out_truncate;
2945
2946         write_flags |= WIMLIB_WRITE_FLAG_REUSE_INTEGRITY_TABLE;
2947         ret = finish_write(wim, WIMLIB_ALL_IMAGES, write_flags,
2948                            progress_func, NULL);
2949         if (ret)
2950                 goto out_truncate;
2951
2952         goto out_unlock_wim;
2953
2954 out_truncate:
2955         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) {
2956                 WARNING("Truncating `%"TS"' to its original size (%"PRIu64" bytes)",
2957                         wim->filename, old_wim_end);
2958                 /* Return value of ftruncate() is ignored because this is
2959                  * already an error path.  */
2960                 (void)ftruncate(wim->out_fd.fd, old_wim_end);
2961         }
2962 out_restore_physical_hdr:
2963         (void)write_wim_header_flags(hdr_save.flags, &wim->out_fd);
2964 out_restore_memory_hdr:
2965         memcpy(&wim->hdr, &hdr_save, sizeof(struct wim_header));
2966 out_close_wim:
2967         (void)close_wim_writable(wim, write_flags);
2968 out_unlock_wim:
2969         wim->wim_locked = 0;
2970         return ret;
2971 }
2972
2973 static int
2974 overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags,
2975                           unsigned num_threads,
2976                           wimlib_progress_func_t progress_func)
2977 {
2978         size_t wim_name_len;
2979         int ret;
2980
2981         DEBUG("Overwriting `%"TS"' via a temporary file", wim->filename);
2982
2983         /* Write the WIM to a temporary file in the same directory as the
2984          * original WIM. */
2985         wim_name_len = tstrlen(wim->filename);
2986         tchar tmpfile[wim_name_len + 10];
2987         tmemcpy(tmpfile, wim->filename, wim_name_len);
2988         randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
2989         tmpfile[wim_name_len + 9] = T('\0');
2990
2991         ret = wimlib_write(wim, tmpfile, WIMLIB_ALL_IMAGES,
2992                            write_flags | WIMLIB_WRITE_FLAG_FSYNC,
2993                            num_threads, progress_func);
2994         if (ret) {
2995                 tunlink(tmpfile);
2996                 return ret;
2997         }
2998
2999         close_wim(wim);
3000
3001         /* Rename the new WIM file to the original WIM file.  Note: on Windows
3002          * this actually calls win32_rename_replacement(), not _wrename(), so
3003          * that removing the existing destination file can be handled.  */
3004         DEBUG("Renaming `%"TS"' to `%"TS"'", tmpfile, wim->filename);
3005         ret = trename(tmpfile, wim->filename);
3006         if (ret) {
3007                 ERROR_WITH_ERRNO("Failed to rename `%"TS"' to `%"TS"'",
3008                                  tmpfile, wim->filename);
3009         #ifdef __WIN32__
3010                 if (ret < 0)
3011         #endif
3012                 {
3013                         tunlink(tmpfile);
3014                 }
3015                 return WIMLIB_ERR_RENAME;
3016         }
3017
3018         if (progress_func) {
3019                 union wimlib_progress_info progress;
3020                 progress.rename.from = tmpfile;
3021                 progress.rename.to = wim->filename;
3022                 progress_func(WIMLIB_PROGRESS_MSG_RENAME, &progress);
3023         }
3024         return 0;
3025 }
3026
3027 /* API function documented in wimlib.h  */
3028 WIMLIBAPI int
3029 wimlib_overwrite(WIMStruct *wim, int write_flags,
3030                  unsigned num_threads,
3031                  wimlib_progress_func_t progress_func)
3032 {
3033         int ret;
3034         u32 orig_hdr_flags;
3035
3036         write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
3037
3038         if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR)
3039                 return WIMLIB_ERR_INVALID_PARAM;
3040
3041         if (!wim->filename)
3042                 return WIMLIB_ERR_NO_FILENAME;
3043
3044         orig_hdr_flags = wim->hdr.flags;
3045         if (write_flags & WIMLIB_WRITE_FLAG_IGNORE_READONLY_FLAG)
3046                 wim->hdr.flags &= ~WIM_HDR_FLAG_READONLY;
3047         ret = can_modify_wim(wim);
3048         wim->hdr.flags = orig_hdr_flags;
3049         if (ret)
3050                 return ret;
3051
3052         if ((!wim->deletion_occurred || (write_flags & WIMLIB_WRITE_FLAG_SOFT_DELETE))
3053             && !(write_flags & (WIMLIB_WRITE_FLAG_REBUILD |
3054                                 WIMLIB_WRITE_FLAG_PIPABLE))
3055             && !(wim_is_pipable(wim)))
3056         {
3057                 ret = overwrite_wim_inplace(wim, write_flags, num_threads,
3058                                             progress_func);
3059                 if (ret != WIMLIB_ERR_RESOURCE_ORDER)
3060                         return ret;
3061                 WARNING("Falling back to re-building entire WIM");
3062         }
3063         return overwrite_wim_via_tmpfile(wim, write_flags, num_threads,
3064                                          progress_func);
3065 }