wimlib.net Git - wimlib/blob - src/resource.c

   1 /*
   2  * resource.c
   3  *
   4  * Read uncompressed and compressed metadata and file resources from a WIM file.
   5  */
   6
   7 /*
   8  * Copyright (C) 2012, 2013 Eric Biggers
   9  *
  10  * This file is part of wimlib, a library for working with WIM files.
  11  *
  12  * wimlib is free software; you can redistribute it and/or modify it under the
  13  * terms of the GNU General Public License as published by the Free Software
  14  * Foundation; either version 3 of the License, or (at your option) any later
  15  * version.
  16  *
  17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License along with
  22  * wimlib; if not, see http://www.gnu.org/licenses/.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 #  include "config.h"
  27 #endif
  28
  29 #include "wimlib.h"
  30 #include "wimlib/endianness.h"
  31 #include "wimlib/error.h"
  32 #include "wimlib/file_io.h"
  33 #include "wimlib/lookup_table.h"
  34 #include "wimlib/resource.h"
  35 #include "wimlib/sha1.h"
  36
  37 #ifdef __WIN32__
  38 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
  39 #  include "wimlib/win32.h"
  40 #endif
  41
  42 #ifdef WITH_NTFS_3G
  43 /* for read_ntfs_file_prefix() */
  44 #  include "wimlib/ntfs_3g.h"
  45 #endif
  46
  47 #ifdef HAVE_ALLOCA_H
  48 #  include <alloca.h>
  49 #endif
  50 #include <errno.h>
  51 #include <fcntl.h>
  52 #include <stdlib.h>
  53 #include <unistd.h>
  54
  55 /*
  56  *                         Compressed WIM resources
  57  *
  58  * A compressed resource in a WIM consists of a number of compressed chunks,
  59  * each of which decompresses to a fixed chunk size (given in the WIM header;
  60  * usually 32768) except possibly the last, which always decompresses to any
  61  * remaining bytes.  In addition, immediately before the chunks, a table (the
  62  * "chunk table") provides the offset, in bytes relative to the end of the chunk
  63  * table, of the start of each compressed chunk, except for the first chunk
  64  * which is omitted as it always has an offset of 0.  Therefore, a compressed
  65  * resource with N chunks will have a chunk table with N - 1 entries.
  66  *
  67  * Additional information:
  68  *
  69  * - Entries in the chunk table are 4 bytes each, except if the uncompressed
  70  *   size of the resource is greater than 4 GiB, in which case the entries in
  71  *   the chunk table are 8 bytes each.  In either case, the entries are unsigned
  72  *   little-endian integers.
  73  *
  74  * - The chunk table is included in the compressed size of the resource provided
  75  *   in the corresponding entry in the WIM's stream lookup table.
  76  *
  77  * - The compressed size of a chunk is never greater than the uncompressed size.
  78  *   From the compressor's point of view, chunks that would have compressed to a
  79  *   size greater than or equal to their original size are in fact stored
  80  *   uncompressed.  From the decompresser's point of view, chunks with
  81  *   compressed size equal to their uncompressed size are in fact uncompressed.
  82  *
  83  * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
  84  * structure of compressed resources was modified to allow piped reading and
  85  * writing.  To make sequential writing possible, the chunk table is placed
  86  * after the chunks rather than before the chunks, and to make sequential
  87  * reading possible, each chunk is prefixed with a 4-byte header giving its
  88  * compressed size as a 32-bit, unsigned, little-endian integer.  Otherwise the
  89  * details are the same.
  90  */
  91
  92
  93 /* Decompress the specified chunk that uses the specified compression type
  94  * @ctype, part of a WIM with default chunk size @wim_chunk_size.  For LZX the
  95  * separate @wim_chunk_size is needed because it determines the window size used
  96  * for LZX compression.  */
  97 static int
  98 decompress(const void *cchunk, unsigned clen, void *uchunk, unsigned ulen,
  99            int ctype, u32 wim_chunk_size)
 100 {
 101         switch (ctype) {
 102         case WIMLIB_COMPRESSION_TYPE_LZX:
 103                 return wimlib_lzx_decompress2(cchunk, clen,
 104                                               uchunk, ulen, wim_chunk_size);
 105         case WIMLIB_COMPRESSION_TYPE_XPRESS:
 106                 return wimlib_xpress_decompress(cchunk, clen,
 107                                                 uchunk, ulen);
 108         case WIMLIB_COMPRESSION_TYPE_LZMS:
 109                 return wimlib_lzms_decompress(cchunk, clen, uchunk, ulen);
 110         default:
 111                 wimlib_assert(0);
 112                 return -1;
 113         }
 114 }
 115
 116 struct data_range {
 117         u64 offset;
 118         u64 size;
 119 };
 120
 121 /* Alternate chunk table format for resources with WIM_RESHDR_FLAG_CONCAT set.
 122  */
 123 struct alt_chunk_table_header_disk {
 124         /* Uncompressed size of the resource.  */
 125         le64 res_usize;
 126
 127         /* Number of bytes each compressed chunk decompresses into, except
 128          * possibly the last which decompresses into the remainder.  */
 129         le32 chunk_size;
 130
 131         /* ??? */
 132         le32 unknown;
 133
 134         /* This header is directly followed by a table of compressed sizes of
 135          * the chunks.  */
 136 } _packed_attribute;
 137
 138 /* Read data from a compressed WIM resource.  */
 139 static int
 140 read_compressed_wim_resource(const struct wim_resource_spec * const rspec,
 141                              const struct data_range * const ranges,
 142                              const size_t num_ranges,
 143                              const consume_data_callback_t cb,
 144                              void * const cb_ctx,
 145                              const bool raw_chunks_mode)
 146 {
 147         int ret;
 148         int errno_save;
 149
 150         u64 *chunk_offsets = NULL;
 151         u8 *ubuf = NULL;
 152         void *cbuf = NULL;
 153         bool chunk_offsets_malloced = false;
 154         bool ubuf_malloced = false;
 155         bool cbuf_malloced = false;
 156
 157         /* Sanity checks  */
 158         wimlib_assert(rspec != NULL);
 159         wimlib_assert(rspec->ctype != WIMLIB_COMPRESSION_TYPE_NONE);
 160         wimlib_assert(is_power_of_2(rspec->cchunk_size));
 161         wimlib_assert(cb != NULL);
 162         wimlib_assert(num_ranges != 0);
 163         for (size_t i = 0; i < num_ranges; i++) {
 164                 wimlib_assert(ranges[i].size != 0);
 165                 wimlib_assert(ranges[i].offset + ranges[i].size >= ranges[i].size);
 166                 wimlib_assert(ranges[i].offset + ranges[i].size <= rspec->uncompressed_size);
 167         }
 168         for (size_t i = 0; i < num_ranges - 1; i++)
 169                 wimlib_assert(ranges[i].offset + ranges[i].size <= ranges[i + 1].offset);
 170
 171         /* Get the offsets of the first and last bytes of the read.  */
 172         const u64 first_offset = ranges[0].offset;
 173         const u64 last_offset = ranges[num_ranges - 1].offset + ranges[num_ranges - 1].size - 1;
 174
 175         /* Get the file descriptor for the WIM.  */
 176         struct filedes * const in_fd = &rspec->wim->in_fd;
 177
 178         /* Determine if we're reading a pipable resource from a pipe or not.  */
 179         const bool is_pipe_read = !filedes_is_seekable(in_fd);
 180
 181         /* Determine if the chunk table is in an altenate format.  */
 182         const bool alt_chunk_table = (rspec->flags & WIM_RESHDR_FLAG_CONCAT) && !is_pipe_read;
 183
 184         /* Get the maximum size of uncompressed chunks in this resource, which
 185          * we require be a power of 2.  */
 186         u32 chunk_size;
 187         u64 cur_read_offset = rspec->offset_in_wim;
 188         if (alt_chunk_table) {
 189                 /* Alternate chunk table format.  */
 190                 struct alt_chunk_table_header_disk hdr;
 191
 192                 ret = full_pread(in_fd, &hdr, sizeof(hdr), cur_read_offset);
 193                 if (ret)
 194                         goto read_error;
 195                 cur_read_offset += sizeof(hdr);
 196
 197                 chunk_size = le32_to_cpu(hdr.chunk_size);
 198
 199                 if (!is_power_of_2(chunk_size)) {
 200                         ERROR("Invalid compressed resource: "
 201                               "expected power-of-2 chunk size (got %u)", chunk_size);
 202                         ret = WIMLIB_ERR_INVALID_CHUNK_SIZE;
 203                         goto out_free_memory;
 204                 }
 205         } else {
 206                 chunk_size = rspec->cchunk_size;
 207         }
 208         const u32 chunk_order = bsr32(chunk_size);
 209
 210         /* Calculate the total number of chunks the resource is divided into.  */
 211         const u64 num_chunks = (rspec->uncompressed_size + chunk_size - 1) >> chunk_order;
 212
 213         /* Calculate the 0-based indices of the first and last chunks containing
 214          * data that needs to be passed to the callback.  */
 215         const u64 first_needed_chunk = first_offset >> chunk_order;
 216         const u64 last_needed_chunk = last_offset >> chunk_order;
 217
 218         /* Calculate the 0-based index of the first chunk that actually needs to
 219          * be read.  This is normally first_needed_chunk, but for pipe reads we
 220          * must always start from the 0th chunk.  */
 221         const u64 read_start_chunk = (is_pipe_read ? 0 : first_needed_chunk);
 222
 223         /* Calculate the number of chunk offsets that are needed for the chunks
 224          * being read.  */
 225         const u64 num_needed_chunk_offsets =
 226                 last_needed_chunk - read_start_chunk + 1 +
 227                 (last_needed_chunk < num_chunks - 1);
 228
 229         /* Calculate the number of entries in the chunk table.  Normally, it's
 230          * one less than the number of chunks, since the first chunk has no
 231          * entry.  But in the alternate chunk table format, the chunk entries
 232          * contain chunk sizes, not offsets, and there is one per chunk.  */
 233         const u64 num_chunk_entries = (alt_chunk_table ? num_chunks : num_chunks - 1);
 234
 235         /* Set the size of each chunk table entry based on the resource's
 236          * uncompressed size.  XXX:  Does the alternate chunk table really
 237          * always have 4-byte entries?  */
 238         const u64 chunk_entry_size =
 239                 (rspec->uncompressed_size > (1ULL << 32) && !alt_chunk_table)
 240                         ? 8 : 4;
 241
 242         /* Calculate the size of the chunk table in bytes.  */
 243         const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
 244
 245         /* Includes header  */
 246         const u64 chunk_table_full_size =
 247                 (alt_chunk_table) ? chunk_table_size + sizeof(struct alt_chunk_table_header_disk)
 248                                   : chunk_table_size;
 249
 250         if (!is_pipe_read) {
 251                 /* Read the needed chunk table entries into memory and use them
 252                  * to initialize the chunk_offsets array.  */
 253
 254                 u64 first_chunk_entry_to_read;
 255                 u64 last_chunk_entry_to_read;
 256
 257                 if (alt_chunk_table) {
 258                         /* The alternate chunk table contains chunk sizes, not
 259                          * offsets, so we always must read all preceding entries
 260                          * in order to determine offsets.  */
 261                         first_chunk_entry_to_read = 0;
 262                         last_chunk_entry_to_read = last_needed_chunk;
 263                 } else {
 264                         /* Here we must account for the fact that the first
 265                          * chunk has no explicit chunk table entry.  */
 266
 267                         if (read_start_chunk == 0)
 268                                 first_chunk_entry_to_read = 0;
 269                         else
 270                                 first_chunk_entry_to_read = read_start_chunk - 1;
 271
 272                         if (last_needed_chunk == 0)
 273                                 last_chunk_entry_to_read = 0;
 274                         else
 275                                 last_chunk_entry_to_read = last_needed_chunk - 1;
 276
 277                         if (last_needed_chunk < num_chunks - 1)
 278                                 last_chunk_entry_to_read++;
 279                 }
 280
 281                 const u64 num_chunk_entries_to_read =
 282                         last_chunk_entry_to_read - first_chunk_entry_to_read + 1;
 283
 284                 const u64 chunk_offsets_alloc_size =
 285                         max(num_chunk_entries_to_read,
 286                             num_needed_chunk_offsets) * sizeof(chunk_offsets[0]);
 287
 288                 if ((size_t)chunk_offsets_alloc_size != chunk_offsets_alloc_size)
 289                         goto oom;
 290
 291                 if (chunk_offsets_alloc_size <= STACK_MAX) {
 292                         chunk_offsets = alloca(chunk_offsets_alloc_size);
 293                 } else {
 294                         chunk_offsets = MALLOC(chunk_offsets_alloc_size);
 295                         if (chunk_offsets == NULL)
 296                                 goto oom;
 297                         chunk_offsets_malloced = true;
 298                 }
 299
 300                 const size_t chunk_table_size_to_read =
 301                         num_chunk_entries_to_read * chunk_entry_size;
 302
 303                 const u64 file_offset_of_needed_chunk_entries =
 304                         cur_read_offset
 305                         + (first_chunk_entry_to_read * chunk_entry_size)
 306                         + (rspec->is_pipable ? (rspec->size_in_wim - chunk_table_size) : 0);
 307
 308                 void * const chunk_table_data =
 309                         (u8*)chunk_offsets +
 310                         chunk_offsets_alloc_size -
 311                         chunk_table_size_to_read;
 312
 313                 ret = full_pread(in_fd, chunk_table_data, chunk_table_size,
 314                                  file_offset_of_needed_chunk_entries);
 315                 if (ret)
 316                         goto read_error;
 317
 318                 /* Now fill in chunk_offsets from the entries we have read in
 319                  * chunk_tab_data.  We break aliasing rules here to avoid having
 320                  * to allocate yet another array.  */
 321                 typedef le64 __attribute__((may_alias)) aliased_le64_t;
 322                 typedef le32 __attribute__((may_alias)) aliased_le32_t;
 323                 u64 * chunk_offsets_p = chunk_offsets;
 324
 325                 if (alt_chunk_table) {
 326                         u64 cur_offset = 0;
 327                         aliased_le32_t *raw_entries = chunk_table_data;
 328
 329                         for (size_t i = 0; i < num_chunk_entries_to_read; i++) {
 330                                 u32 entry = le32_to_cpu(raw_entries[i]);
 331                                 if (i >= read_start_chunk)
 332                                         *chunk_offsets_p++ = cur_offset;
 333                                 cur_offset += entry;
 334                         }
 335                 } else {
 336                         if (read_start_chunk == 0)
 337                                 *chunk_offsets_p++ = 0;
 338
 339                         if (chunk_entry_size == 4) {
 340                                 aliased_le32_t *raw_entries = chunk_table_data;
 341                                 for (size_t i = 0; i < num_chunk_entries_to_read; i++)
 342                                         *chunk_offsets_p++ = le32_to_cpu(raw_entries[i]);
 343                         } else {
 344                                 aliased_le64_t *raw_entries = chunk_table_data;
 345                                 for (size_t i = 0; i < num_chunk_entries_to_read; i++)
 346                                         *chunk_offsets_p++ = le64_to_cpu(raw_entries[i]);
 347                         }
 348                 }
 349
 350                 /* Set offset to beginning of first chunk to read.  */
 351                 cur_read_offset += chunk_offsets[0];
 352                 if (rspec->is_pipable)
 353                         cur_read_offset += read_start_chunk * sizeof(struct pwm_chunk_hdr);
 354                 else
 355                         cur_read_offset += chunk_table_size;
 356         }
 357
 358         /* Allocate buffer for holding the uncompressed data of each chunk.  */
 359         if (chunk_size <= STACK_MAX) {
 360                 ubuf = alloca(chunk_size);
 361         } else {
 362                 ubuf = MALLOC(chunk_size);
 363                 if (ubuf == NULL)
 364                         goto oom;
 365                 ubuf_malloced = true;
 366         }
 367
 368         /* Unless the raw compressed data was requested, allocate a temporary
 369          * buffer for reading compressed chunks, each of which can be at most
 370          * @chunk_size - 1 bytes.  This excludes compressed chunks that are a
 371          * full @chunk_size bytes, which are actually stored uncompressed.  */
 372         if (!raw_chunks_mode) {
 373                 if (chunk_size - 1 <= STACK_MAX) {
 374                         cbuf = alloca(chunk_size - 1);
 375                 } else {
 376                         cbuf = MALLOC(chunk_size - 1);
 377                         if (cbuf == NULL)
 378                                 goto oom;
 379                         cbuf_malloced = true;
 380                 }
 381         }
 382
 383         /* Read and process each needed chunk.  */
 384         const struct data_range *cur_range = ranges;
 385         const struct data_range * const end_range = &ranges[num_ranges];
 386         u64 cur_range_pos = cur_range->offset;
 387         u64 cur_range_end = cur_range->offset + cur_range->size;
 388
 389         for (u64 i = read_start_chunk; i <= last_needed_chunk; i++) {
 390
 391                 /* Calculate uncompressed size of next chunk.  */
 392                 u32 chunk_usize;
 393                 if ((i == num_chunks - 1) && (rspec->uncompressed_size & (chunk_size - 1)))
 394                         chunk_usize = (rspec->uncompressed_size & (chunk_size - 1));
 395                 else
 396                         chunk_usize = chunk_size;
 397
 398                 /* Calculate compressed size of next chunk.  */
 399                 u32 chunk_csize;
 400                 if (is_pipe_read) {
 401                         struct pwm_chunk_hdr chunk_hdr;
 402
 403                         ret = full_pread(in_fd, &chunk_hdr,
 404                                          sizeof(chunk_hdr), cur_read_offset);
 405                         if (ret)
 406                                 goto read_error;
 407                         chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
 408                 } else {
 409                         if (i == num_chunks - 1) {
 410                                 chunk_csize = rspec->size_in_wim -
 411                                               chunk_table_full_size -
 412                                               chunk_offsets[i - read_start_chunk];
 413                                 if (rspec->is_pipable)
 414                                         chunk_csize -= num_chunks * sizeof(struct pwm_chunk_hdr);
 415                         } else {
 416                                 chunk_csize = chunk_offsets[i + 1 - read_start_chunk] -
 417                                               chunk_offsets[i - read_start_chunk];
 418                         }
 419                 }
 420                 if (chunk_csize == 0 || chunk_csize > chunk_usize) {
 421                         ERROR("Invalid chunk size in compressed resource!");
 422                         errno = EINVAL;
 423                         ret = WIMLIB_ERR_DECOMPRESSION;
 424                         goto out_free_memory;
 425                 }
 426                 if (rspec->is_pipable)
 427                         cur_read_offset += sizeof(struct pwm_chunk_hdr);
 428
 429                 /* Uncompressed offsets  */
 430                 const u64 chunk_start_offset = i << chunk_order;
 431                 const u64 chunk_end_offset = chunk_start_offset + chunk_usize;
 432
 433                 if (chunk_end_offset <= cur_range_pos) {
 434
 435                         /* The next range does not require data in this chunk,
 436                          * so skip it.  */
 437
 438                         cur_read_offset += chunk_csize;
 439                         if (is_pipe_read) {
 440                                 u8 dummy;
 441
 442                                 ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
 443                                 if (ret)
 444                                         goto read_error;
 445                         }
 446                 } else {
 447
 448                         /* Read the chunk and feed data to the callback
 449                          * function.  */
 450                         u8 *cb_buf;
 451
 452                         ret = full_pread(in_fd,
 453                                          cbuf,
 454                                          chunk_csize,
 455                                          cur_read_offset);
 456                         if (ret)
 457                                 goto read_error;
 458
 459                         if (chunk_csize != chunk_usize && !raw_chunks_mode) {
 460                                 ret = decompress(cbuf,
 461                                                  chunk_csize,
 462                                                  ubuf,
 463                                                  chunk_usize,
 464                                                  rspec->ctype,
 465                                                  chunk_size);
 466                                 if (ret) {
 467                                         ERROR("Failed to decompress data!");
 468                                         ret = WIMLIB_ERR_DECOMPRESSION;
 469                                         errno = EINVAL;
 470                                         goto out_free_memory;
 471                                 }
 472                                 cb_buf = ubuf;
 473                         } else {
 474                                 cb_buf = cbuf;
 475                         }
 476                         cur_read_offset += chunk_csize;
 477
 478                         /* At least one range requires data in this chunk.
 479                          * However, the data fed to the callback function must
 480                          * not overlap range boundaries.  */
 481                         do {
 482                                 size_t start, end, size;
 483
 484                                 start = cur_range_pos - chunk_start_offset;
 485                                 end = min(cur_range_end, chunk_end_offset) - chunk_start_offset;
 486                                 size = end - start;
 487
 488                                 if (raw_chunks_mode)
 489                                         ret = (*cb)(&cb_buf[0], chunk_csize, cb_ctx);
 490                                 else
 491                                         ret = (*cb)(&cb_buf[start], size, cb_ctx);
 492
 493                                 if (ret)
 494                                         goto out_free_memory;
 495
 496                                 cur_range_pos += size;
 497                                 if (cur_range_pos == cur_range_end) {
 498                                         if (++cur_range == end_range) {
 499                                                 cur_range_pos = ~0ULL;
 500                                         } else {
 501                                                 cur_range_pos = cur_range->offset;
 502                                                 cur_range_end = cur_range->offset + cur_range->size;
 503                                         }
 504                                 }
 505                         } while (cur_range_pos < chunk_end_offset);
 506                 }
 507         }
 508
 509         if (is_pipe_read
 510             && last_offset == rspec->uncompressed_size - 1
 511             && chunk_table_size)
 512         {
 513                 u8 dummy;
 514                 /* Skip chunk table at end of pipable resource.  */
 515
 516                 cur_read_offset += chunk_table_size;
 517                 ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
 518                 if (ret)
 519                         goto read_error;
 520         }
 521         ret = 0;
 522 out_free_memory:
 523         errno_save = errno;
 524         if (chunk_offsets_malloced)
 525                 FREE(chunk_offsets);
 526         if (ubuf_malloced)
 527                 FREE(ubuf);
 528         if (cbuf_malloced)
 529                 FREE(cbuf);
 530         errno = errno_save;
 531         return ret;
 532
 533 oom:
 534         ERROR("Not enough memory available to read size=%"PRIu64" bytes "
 535               "from compressed resource!", last_offset - first_offset + 1);
 536         errno = ENOMEM;
 537         ret = WIMLIB_ERR_NOMEM;
 538         goto out_free_memory;
 539
 540 read_error:
 541         ERROR_WITH_ERRNO("Error reading compressed file resource!");
 542         goto out_free_memory;
 543 }
 544
 545 /* Read raw data from a file descriptor at the specified offset.  */
 546 static int
 547 read_raw_file_data(struct filedes *in_fd, u64 size, consume_data_callback_t cb,
 548                    u32 cb_chunk_size, void *ctx_or_buf, u64 offset)
 549 {
 550         int ret;
 551         u8 *tmp_buf;
 552         bool tmp_buf_malloced = false;
 553
 554         if (cb) {
 555                 /* Send data to callback function in chunks.  */
 556                 if (cb_chunk_size <= STACK_MAX) {
 557                         tmp_buf = alloca(cb_chunk_size);
 558                 } else {
 559                         tmp_buf = MALLOC(cb_chunk_size);
 560                         if (tmp_buf == NULL) {
 561                                 ret = WIMLIB_ERR_NOMEM;
 562                                 goto out;
 563                         }
 564                         tmp_buf_malloced = true;
 565                 }
 566
 567                 while (size) {
 568                         size_t bytes_to_read = min(cb_chunk_size, size);
 569                         ret = full_pread(in_fd, tmp_buf, bytes_to_read,
 570                                          offset);
 571                         if (ret)
 572                                 goto read_error;
 573                         ret = cb(tmp_buf, bytes_to_read, ctx_or_buf);
 574                         if (ret)
 575                                 goto out;
 576                         size -= bytes_to_read;
 577                         offset += bytes_to_read;
 578                 }
 579         } else {
 580                 /* Read data directly into buffer.  */
 581                 ret = full_pread(in_fd, ctx_or_buf, size, offset);
 582                 if (ret)
 583                         goto read_error;
 584         }
 585         ret = 0;
 586         goto out;
 587
 588 read_error:
 589         ERROR_WITH_ERRNO("Read error");
 590 out:
 591         if (tmp_buf_malloced)
 592                 FREE(tmp_buf);
 593         return ret;
 594 }
 595
 596 static int
 597 bufferer_cb(const void *chunk, size_t size, void *_ctx)
 598 {
 599         u8 **buf_p = _ctx;
 600
 601         *buf_p = mempcpy(*buf_p, chunk, size);
 602         return 0;
 603 }
 604
 605 struct rechunker_context {
 606         u8 *buffer;
 607         u32 buffer_filled;
 608         u32 cb_chunk_size;
 609
 610         const struct data_range *ranges;
 611         size_t num_ranges;
 612         size_t cur_range;
 613         u64 range_bytes_remaining;
 614
 615         consume_data_callback_t cb;
 616         void *cb_ctx;
 617 };
 618
 619 static int
 620 rechunker_cb(const void *chunk, size_t size, void *_ctx)
 621 {
 622         struct rechunker_context *ctx = _ctx;
 623         const u8 *chunkptr = chunk;
 624         size_t bytes_to_copy;
 625         int ret;
 626
 627         wimlib_assert(ctx->cur_range != ctx->num_ranges);
 628
 629         while (size) {
 630                 bytes_to_copy = size;
 631
 632                 if (bytes_to_copy > ctx->cb_chunk_size - ctx->buffer_filled)
 633                         bytes_to_copy = ctx->cb_chunk_size - ctx->buffer_filled;
 634
 635                 if (bytes_to_copy > ctx->range_bytes_remaining - ctx->buffer_filled)
 636                         bytes_to_copy = ctx->range_bytes_remaining - ctx->buffer_filled;
 637
 638                 memcpy(&ctx->buffer[ctx->buffer_filled], chunkptr, bytes_to_copy);
 639
 640                 ctx->buffer_filled += bytes_to_copy;
 641                 chunkptr += bytes_to_copy;
 642                 size -= bytes_to_copy;
 643                 ctx->range_bytes_remaining -= bytes_to_copy;
 644
 645                 if (ctx->buffer_filled == ctx->cb_chunk_size ||
 646                     ctx->range_bytes_remaining == 0)
 647                 {
 648                         ret = (*ctx->cb)(ctx->buffer, ctx->buffer_filled, ctx->cb_ctx);
 649                         if (ret)
 650                                 return ret;
 651                         ctx->buffer_filled = 0;
 652
 653                         if (ctx->range_bytes_remaining == 0 &&
 654                             ++ctx->cur_range != ctx->num_ranges)
 655                                 ctx->range_bytes_remaining = ctx->ranges[ctx->cur_range].size;
 656                 }
 657         }
 658         return 0;
 659 }
 660
 661 /*
 662  * read_partial_wim_resource()-
 663  *
 664  * Read a range of data from an uncompressed or compressed resource in a WIM
 665  * file.  Data is written into a buffer or fed into a callback function, as
 666  * documented in read_stream_prefix().
 667  *
 668  * By default, this function provides the uncompressed data of the resource, and
 669  * @size and @offset and interpreted relative to the uncompressed contents of
 670  * the resource.  This behavior can be modified by either of the following
 671  * flags:
 672  *
 673  * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
 674  *      Read @size bytes at @offset of the raw contents of the compressed
 675  *      resource.  In the case of pipable resources, this excludes the stream
 676  *      header.  Exclusive with WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS.
 677  *
 678  * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
 679  *      Read the raw compressed chunks of the compressed resource.  @size must
 680  *      be the full uncompressed size, @offset must be 0, and @cb_chunk_size
 681  *      must be the resource chunk size.
 682  *
 683  * Return values:
 684  *      WIMLIB_ERR_SUCCESS (0)
 685  *      WIMLIB_ERR_READ                   (errno set)
 686  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
 687  *      WIMLIB_ERR_NOMEM                  (errno set to ENOMEM)
 688  *      WIMLIB_ERR_DECOMPRESSION          (errno set to EINVAL)
 689  *
 690  *      or other error code returned by the @cb function.
 691  */
 692 int
 693 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
 694                           u64 size, consume_data_callback_t cb,
 695                           u32 cb_chunk_size, void *ctx_or_buf,
 696                           int flags, u64 offset)
 697 {
 698         const struct wim_resource_spec *rspec;
 699         struct filedes *in_fd;
 700
 701         /* Verify parameters.  */
 702         wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
 703         rspec = lte->rspec;
 704         in_fd = &rspec->wim->in_fd;
 705         if (cb)
 706                 wimlib_assert(is_power_of_2(cb_chunk_size));
 707         if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 708                 /* Raw chunks mode is subject to the restrictions noted.  */
 709                 wimlib_assert(!lte_is_partial(lte));
 710                 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL));
 711                 wimlib_assert(cb_chunk_size == rspec->cchunk_size);
 712                 wimlib_assert(size == lte->size);
 713                 wimlib_assert(offset == 0);
 714         } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) {
 715                 /* Raw full mode:  read must not overrun end of store size.  */
 716                 wimlib_assert(!lte_is_partial(lte));
 717                 wimlib_assert(offset + size >= size &&
 718                               offset + size <= rspec->size_in_wim);
 719         } else {
 720                 /* Normal mode:  read must not overrun end of original size.  */
 721                 wimlib_assert(offset + size >= size &&
 722                               offset + size <= lte->size);
 723         }
 724
 725         DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64"[+%"PRIu64"] "
 726               "from %"PRIu64"(%"PRIu64") @ +%"PRIu64" "
 727               "(readflags 0x%08x, resflags 0x%02x%s)",
 728               size, offset, lte->offset_in_res,
 729               rspec->size_in_wim,
 730               rspec->uncompressed_size,
 731               rspec->offset_in_wim,
 732               flags, lte->flags,
 733               (rspec->is_pipable ? ", pipable" : ""));
 734
 735         if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
 736             rspec->ctype == WIMLIB_COMPRESSION_TYPE_NONE)
 737         {
 738                 return read_raw_file_data(in_fd,
 739                                           size,
 740                                           cb,
 741                                           cb_chunk_size,
 742                                           ctx_or_buf,
 743                                           rspec->offset_in_wim + lte->offset_in_res + offset);
 744         } else {
 745                 bool raw_chunks;
 746                 struct data_range range;
 747                 consume_data_callback_t internal_cb;
 748                 void *internal_cb_ctx;
 749                 u8 *buf;
 750                 bool rechunker_buf_malloced = false;
 751                 struct rechunker_context *rechunker_ctx;
 752                 int ret;
 753
 754                 if (size == 0)
 755                         return 0;
 756
 757                 range.offset = lte->offset_in_res + offset;
 758                 range.size = size;
 759                 raw_chunks = !!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS);
 760
 761                 if (cb != NULL &&
 762                     cb_chunk_size == rspec->cchunk_size &&
 763                     !(rspec->flags & WIM_RESHDR_FLAG_CONCAT))
 764                 {
 765                         internal_cb = cb;
 766                         internal_cb_ctx = ctx_or_buf;
 767                 } else if (cb == NULL) {
 768                         buf = ctx_or_buf;
 769                         internal_cb = bufferer_cb;
 770                         internal_cb_ctx = &buf;
 771                 } else {
 772                         rechunker_ctx = alloca(sizeof(struct rechunker_context));
 773
 774                         if (cb_chunk_size <= STACK_MAX) {
 775                                 rechunker_ctx->buffer = alloca(cb_chunk_size);
 776                         } else {
 777                                 rechunker_ctx->buffer = MALLOC(cb_chunk_size);
 778                                 if (rechunker_ctx->buffer == NULL)
 779                                         return WIMLIB_ERR_NOMEM;
 780                                 rechunker_buf_malloced = true;
 781                         }
 782                         rechunker_ctx->buffer_filled = 0;
 783                         rechunker_ctx->cb_chunk_size = cb_chunk_size;
 784
 785                         rechunker_ctx->ranges = &range;
 786                         rechunker_ctx->num_ranges = 1;
 787                         rechunker_ctx->cur_range = 0;
 788                         rechunker_ctx->range_bytes_remaining = range.size;
 789
 790                         rechunker_ctx->cb = cb;
 791                         rechunker_ctx->cb_ctx = ctx_or_buf;
 792
 793                         internal_cb = rechunker_cb;
 794                         internal_cb_ctx = rechunker_ctx;
 795                 }
 796
 797                 ret = read_compressed_wim_resource(rspec, &range, 1,
 798                                                    internal_cb, internal_cb_ctx,
 799                                                    raw_chunks);
 800                 if (rechunker_buf_malloced)
 801                         FREE(rechunker_ctx->buffer);
 802
 803                 return ret;
 804         }
 805 }
 806
 807 int
 808 read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte,
 809                                  size_t size, u64 offset, void *buf)
 810 {
 811         return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
 812 }
 813
 814 static int
 815 read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size,
 816                        consume_data_callback_t cb, u32 cb_chunk_size,
 817                        void *ctx_or_buf, int flags)
 818 {
 819         return read_partial_wim_resource(lte, size, cb, cb_chunk_size,
 820                                          ctx_or_buf, flags, 0);
 821 }
 822
 823 #ifndef __WIN32__
 824 /* This function handles reading stream data that is located in an external
 825  * file,  such as a file that has been added to the WIM image through execution
 826  * of a wimlib_add_command.
 827  *
 828  * This assumes the file can be accessed using the standard POSIX open(),
 829  * read(), and close().  On Windows this will not necessarily be the case (since
 830  * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be
 831  * encrypted), so Windows uses its own code for its equivalent case.
 832  */
 833 static int
 834 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size,
 835                          consume_data_callback_t cb, u32 cb_chunk_size,
 836                          void *ctx_or_buf, int _ignored_flags)
 837 {
 838         int ret;
 839         int raw_fd;
 840         struct filedes fd;
 841
 842         wimlib_assert(size <= lte->size);
 843         DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk);
 844
 845         raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY);
 846         if (raw_fd < 0) {
 847                 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk);
 848                 return WIMLIB_ERR_OPEN;
 849         }
 850         filedes_init(&fd, raw_fd);
 851         ret = read_raw_file_data(&fd, size, cb, cb_chunk_size, ctx_or_buf, 0);
 852         filedes_close(&fd);
 853         return ret;
 854 }
 855 #endif /* !__WIN32__ */
 856
 857 /* This function handles the trivial case of reading stream data that is, in
 858  * fact, already located in an in-memory buffer.  */
 859 static int
 860 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
 861                    u64 size, consume_data_callback_t cb,
 862                    u32 cb_chunk_size, void *ctx_or_buf, int _ignored_flags)
 863 {
 864         wimlib_assert(size <= lte->size);
 865
 866         if (cb) {
 867                 /* Feed the data into the callback function in
 868                  * appropriately-sized chunks.  */
 869                 int ret;
 870                 u32 chunk_size;
 871
 872                 for (u64 offset = 0; offset < size; offset += chunk_size) {
 873                         chunk_size = min(cb_chunk_size, size - offset);
 874                         ret = cb((const u8*)lte->attached_buffer + offset,
 875                                  chunk_size, ctx_or_buf);
 876                         if (ret)
 877                                 return ret;
 878                 }
 879         } else {
 880                 /* Copy the data directly into the specified buffer.  */
 881                 memcpy(ctx_or_buf, lte->attached_buffer, size);
 882         }
 883         return 0;
 884 }
 885
 886 typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
 887                                             u64 size, consume_data_callback_t cb,
 888                                             u32 cb_chunk_size, void *ctx_or_buf,
 889                                             int flags);
 890
 891 /*
 892  * read_stream_prefix()-
 893  *
 894  * Reads the first @size bytes from a generic "stream", which may be located in
 895  * any one of several locations, such as in a WIM file (compressed or
 896  * uncompressed), in an external file, or directly in an in-memory buffer.
 897  *
 898  * This function feeds the data either to a callback function (@cb != NULL,
 899  * passing it @ctx_or_buf), or write it directly into a buffer (@cb == NULL,
 900  * @ctx_or_buf specifies the buffer, which must have room for at least @size
 901  * bytes).
 902  *
 903  * When (@cb != NULL), @cb_chunk_size specifies the maximum size of data chunks
 904  * to feed the callback function.  @cb_chunk_size must be positive, and if the
 905  * stream is in a WIM file, must be a power of 2.  All chunks, except possibly
 906  * the last one, will be this size.  If (@cb == NULL), @cb_chunk_size is
 907  * ignored.
 908  *
 909  * If the stream is located in a WIM file, @flags can be set as documented in
 910  * read_partial_wim_resource().  Otherwise @flags are ignored.
 911  *
 912  * Returns 0 on success; nonzero on error.  A nonzero value will be returned if
 913  * the stream data cannot be successfully read (for a number of different
 914  * reasons, depending on the stream location), or if a callback function was
 915  * specified and it returned nonzero.
 916  */
 917 int
 918 read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size,
 919                    consume_data_callback_t cb, u32 cb_chunk_size,
 920                    void *ctx_or_buf, int flags)
 921 {
 922         /* This function merely verifies several preconditions, then passes
 923          * control to an appropriate function for understanding each possible
 924          * stream location.  */
 925         static const read_stream_prefix_handler_t handlers[] = {
 926                 [RESOURCE_IN_WIM]             = read_wim_stream_prefix,
 927         #ifdef __WIN32__
 928                 [RESOURCE_IN_FILE_ON_DISK]    = read_win32_file_prefix,
 929         #else
 930                 [RESOURCE_IN_FILE_ON_DISK]    = read_file_on_disk_prefix,
 931         #endif
 932                 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
 933         #ifdef WITH_FUSE
 934                 [RESOURCE_IN_STAGING_FILE]    = read_file_on_disk_prefix,
 935         #endif
 936         #ifdef WITH_NTFS_3G
 937                 [RESOURCE_IN_NTFS_VOLUME]     = read_ntfs_file_prefix,
 938         #endif
 939         #ifdef __WIN32__
 940                 [RESOURCE_WIN32_ENCRYPTED]    = read_win32_encrypted_file_prefix,
 941         #endif
 942         };
 943         wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
 944                       && handlers[lte->resource_location] != NULL);
 945         wimlib_assert(cb == NULL || cb_chunk_size > 0);
 946         return handlers[lte->resource_location](lte, size, cb, cb_chunk_size,
 947                                                 ctx_or_buf, flags);
 948 }
 949
 950 /* Read the full uncompressed data of the specified stream into the specified
 951  * buffer, which must have space for at least lte->size bytes.  */
 952 int
 953 read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf)
 954 {
 955         return read_stream_prefix(lte, lte->size, NULL, 0, buf, 0);
 956 }
 957
 958 /* Read the full uncompressed data of the specified stream.  A buffer sufficient
 959  * to hold the data is allocated and returned in @buf_ret.  */
 960 int
 961 read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte,
 962                                 void **buf_ret)
 963 {
 964         int ret;
 965         void *buf;
 966
 967         if ((size_t)lte->size != lte->size) {
 968                 ERROR("Can't read %"PRIu64" byte stream into "
 969                       "memory", lte->size);
 970                 return WIMLIB_ERR_NOMEM;
 971         }
 972
 973         buf = MALLOC(lte->size);
 974         if (buf == NULL)
 975                 return WIMLIB_ERR_NOMEM;
 976
 977         ret = read_full_stream_into_buf(lte, buf);
 978         if (ret) {
 979                 FREE(buf);
 980                 return ret;
 981         }
 982
 983         *buf_ret = buf;
 984         return 0;
 985 }
 986
 987 /* Retrieve the full uncompressed data of the specified WIM resource.  */
 988 static int
 989 wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret)
 990 {
 991         int ret;
 992         struct wim_lookup_table_entry *lte;
 993
 994         lte = new_lookup_table_entry();
 995         if (lte == NULL)
 996                 return WIMLIB_ERR_NOMEM;
 997
 998         lte->unhashed = 1;
 999         lte_bind_wim_resource_spec(lte, rspec);
1000         lte->flags = rspec->flags;
1001         lte->size = rspec->uncompressed_size;
1002         lte->offset_in_res = 0;
1003
1004         ret = read_full_stream_into_alloc_buf(lte, buf_ret);
1005
1006         lte_unbind_wim_resource_spec(lte);
1007         free_lookup_table_entry(lte);
1008         return ret;
1009 }
1010
1011 /* Retrieve the full uncompressed data of the specified WIM resource.  */
1012 int
1013 wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret)
1014 {
1015         DEBUG("offset_in_wim=%"PRIu64", size_in_wim=%"PRIu64", "
1016               "uncompressed_size=%"PRIu64,
1017               reshdr->offset_in_wim, reshdr->size_in_wim, reshdr->uncompressed_size);
1018
1019         struct wim_resource_spec rspec;
1020         wim_res_hdr_to_spec(reshdr, wim, &rspec);
1021         return wim_resource_spec_to_data(&rspec, buf_ret);
1022 }
1023
1024 struct read_stream_list_ctx {
1025         read_stream_list_begin_stream_t begin_stream;
1026         consume_data_callback_t consume_chunk;
1027         read_stream_list_end_stream_t end_stream;
1028         void *begin_stream_ctx;
1029         void *consume_chunk_ctx;
1030         void *end_stream_ctx;
1031         struct wim_lookup_table_entry *cur_stream;
1032         u64 cur_stream_offset;
1033         struct wim_lookup_table_entry *final_stream;
1034         size_t list_head_offset;
1035 };
1036
1037 static int
1038 read_stream_list_wrapper_cb(const void *chunk, size_t size, void *_ctx)
1039 {
1040         struct read_stream_list_ctx *ctx = _ctx;
1041         int ret;
1042
1043         if (ctx->cur_stream_offset == 0) {
1044                 /* Starting a new stream.  */
1045                 ret = (*ctx->begin_stream)(ctx->cur_stream, ctx->begin_stream_ctx);
1046                 if (ret)
1047                         return ret;
1048         }
1049
1050         ret = (*ctx->consume_chunk)(chunk, size, ctx->consume_chunk_ctx);
1051         if (ret)
1052                 return ret;
1053
1054         ctx->cur_stream_offset += size;
1055
1056         if (ctx->cur_stream_offset == ctx->cur_stream->size) {
1057                 /* Finished reading all the data for a stream; advance
1058                  * to the next one.  */
1059                 ret = (*ctx->end_stream)(ctx->cur_stream, ctx->end_stream_ctx);
1060                 if (ret)
1061                         return ret;
1062
1063                 if (ctx->cur_stream == ctx->final_stream)
1064                         return 0;
1065
1066                 struct list_head *cur = (struct list_head *)
1067                                 ((u8*)ctx->cur_stream + ctx->list_head_offset);
1068                 struct list_head *next = cur->next;
1069
1070                 ctx->cur_stream = (struct wim_lookup_table_entry *)
1071                                 ((u8*)next - ctx->list_head_offset);
1072
1073                 ctx->cur_stream_offset = 0;
1074         }
1075         return 0;
1076 }
1077
1078 /*
1079  * Read a list of streams, each of which may be in any supported location (e.g.
1080  * in a WIM or in an external file).  Unlike read_stream_prefix() or the
1081  * functions which call it, this function optimizes the case where multiple
1082  * streams are packed into a single compressed WIM resource and reads them all
1083  * consecutively, only decompressing the data one time.
1084  *
1085  * @stream_list
1086  *      List of streams (represented as `struct wim_lookup_table_entry's) to
1087  *      read.
1088  * @list_head_offset
1089  *      Offset of the `struct list_head' within each `struct
1090  *      wim_lookup_table_entry' that makes up the @stream_list.
1091  * @begin_stream
1092  *      Callback for starting to process a stream.
1093  * @consume_chunk
1094  *      Callback for receiving a chunk of stream data.
1095  * @end_stream
1096  *      Callback for finishing the processing of a stream.
1097  * @cb_chunk_size
1098  *      Size of chunks to provide to @consume_chunk.  For a given stream, all
1099  *      the chunks will be this size, except possibly the last which will be the
1100  *      remainder.
1101  * @cb_ctx
1102  *      Parameter to pass to the callback functions.
1103  *
1104  * Returns 0 on success; a nonzero error code on failure.  Failure can occur due
1105  * to an error reading the data or due to an error status being returned by any
1106  * of the callback functions.
1107  */
1108 int
1109 read_stream_list(struct list_head *stream_list,
1110                  size_t list_head_offset,
1111                  read_stream_list_begin_stream_t begin_stream,
1112                  consume_data_callback_t consume_chunk,
1113                  read_stream_list_end_stream_t end_stream,
1114                  u32 cb_chunk_size,
1115                  void *cb_ctx)
1116 {
1117         int ret;
1118         struct list_head *cur, *next;
1119         struct wim_lookup_table_entry *lte;
1120
1121         ret = sort_stream_list_by_sequential_order(stream_list, list_head_offset);
1122         if (ret)
1123                 return ret;
1124
1125         for (cur = stream_list->next, next = cur->next;
1126              cur != stream_list;
1127              cur = next, next = cur->next)
1128         {
1129                 lte = (struct wim_lookup_table_entry*)((u8*)cur - list_head_offset);
1130
1131                 if (lte_is_partial(lte)) {
1132
1133                         struct wim_lookup_table_entry *lte_next, *lte_last;
1134                         struct list_head *next2;
1135                         size_t stream_count;
1136
1137                         /* The next stream is a proper sub-sequence of a WIM
1138                          * resource.  See if there are other streams in the same
1139                          * resource that need to be read.  Since
1140                          * sort_stream_list_by_sequential_order() sorted the
1141                          * streams by offset in the WIM, this can be determined
1142                          * by simply scanning forward in the list.  */
1143
1144                         lte_last = lte;
1145                         stream_count = 1;
1146                         for (next2 = next;
1147                              next2 != stream_list
1148                              && (lte_next = (struct wim_lookup_table_entry*)
1149                                                 ((u8*)next2 - list_head_offset),
1150                                  lte_next->resource_location == RESOURCE_IN_WIM
1151                                  && lte_next->rspec == lte->rspec);
1152                              next2 = next2->next)
1153                         {
1154                                 lte_last = lte_next;
1155                                 stream_count++;
1156                         }
1157                         if (stream_count > 1) {
1158                                 /* Reading multiple streams combined into a
1159                                  * single WIM resource.  They are in the stream
1160                                  * list, sorted by offset; @lte specifies the
1161                                  * first stream in the resource that needs to be
1162                                  * read and @lte_last specifies the last stream
1163                                  * in the resource that needs to be read.  */
1164
1165                                 next = next2;
1166
1167                                 struct data_range ranges[stream_count];
1168
1169                                 {
1170                                         struct list_head *next3;
1171                                         size_t i;
1172                                         struct wim_lookup_table_entry *lte_cur;
1173
1174                                         next3 = cur;
1175                                         for (i = 0; i < stream_count; i++) {
1176                                                 lte_cur = (struct wim_lookup_table_entry*)
1177                                                         ((u8*)next3 - list_head_offset);
1178                                                 ranges[i].offset = lte_cur->offset_in_res;
1179                                                 ranges[i].size = lte_cur->size;
1180                                                 next3 = next3->next;
1181                                         }
1182                                 }
1183
1184                                 struct rechunker_context rechunker_ctx = {
1185                                         .buffer = MALLOC(cb_chunk_size),
1186                                         .buffer_filled = 0,
1187                                         .cb_chunk_size = cb_chunk_size,
1188                                         .ranges = ranges,
1189                                         .num_ranges = stream_count,
1190                                         .cur_range = 0,
1191                                         .range_bytes_remaining = ranges[0].size,
1192                                         .cb = consume_chunk,
1193                                         .cb_ctx = cb_ctx,
1194                                 };
1195
1196                                 if (rechunker_ctx.buffer == NULL)
1197                                         return WIMLIB_ERR_NOMEM;
1198
1199                                 struct read_stream_list_ctx ctx = {
1200                                         .begin_stream           = begin_stream,
1201                                         .begin_stream_ctx       = cb_ctx,
1202                                         .consume_chunk          = rechunker_cb,
1203                                         .consume_chunk_ctx      = &rechunker_ctx,
1204                                         .end_stream             = end_stream,
1205                                         .end_stream_ctx         = cb_ctx,
1206                                         .cur_stream             = lte,
1207                                         .cur_stream_offset      = 0,
1208                                         .final_stream           = lte_last,
1209                                         .list_head_offset       = list_head_offset,
1210                                 };
1211
1212                                 ret = read_compressed_wim_resource(lte->rspec,
1213                                                                    ranges,
1214                                                                    stream_count,
1215                                                                    read_stream_list_wrapper_cb,
1216                                                                    &ctx,
1217                                                                    false);
1218                                 FREE(rechunker_ctx.buffer);
1219                                 if (ret)
1220                                         return ret;
1221                                 continue;
1222                         }
1223                 }
1224                 ret = (*begin_stream)(lte, cb_ctx);
1225                 if (ret)
1226                         return ret;
1227
1228                 ret = read_stream_prefix(lte, lte->size, consume_chunk,
1229                                          cb_chunk_size, cb_ctx, 0);
1230                 if (ret)
1231                         return ret;
1232
1233                 ret = (*end_stream)(lte, cb_ctx);
1234                 if (ret)
1235                         return ret;
1236         }
1237         return 0;
1238 }
1239
1240 struct extract_ctx {
1241         SHA_CTX sha_ctx;
1242         consume_data_callback_t extract_chunk;
1243         void *extract_chunk_arg;
1244 };
1245
1246 static int
1247 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size, void *_ctx)
1248 {
1249         struct extract_ctx *ctx = _ctx;
1250
1251         sha1_update(&ctx->sha_ctx, chunk, chunk_size);
1252         return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
1253 }
1254
1255 /* Extracts the first @size bytes of a stream to somewhere.  In the process, the
1256  * SHA1 message digest of the uncompressed stream is checked if the full stream
1257  * is being extracted.
1258  *
1259  * @extract_chunk is a function that will be called to extract each chunk of the
1260  * stream.  */
1261 int
1262 extract_stream(const struct wim_lookup_table_entry *lte, u64 size,
1263                consume_data_callback_t extract_chunk, void *extract_chunk_arg)
1264 {
1265         int ret;
1266         if (size == lte->size) {
1267                 /* Do SHA1 */
1268                 struct extract_ctx ctx;
1269                 ctx.extract_chunk = extract_chunk;
1270                 ctx.extract_chunk_arg = extract_chunk_arg;
1271                 sha1_init(&ctx.sha_ctx);
1272                 ret = read_stream_prefix(lte, size,
1273                                          extract_chunk_sha1_wrapper,
1274                                          lte_cchunk_size(lte),
1275                                          &ctx, 0);
1276                 if (ret == 0) {
1277                         u8 hash[SHA1_HASH_SIZE];
1278                         sha1_final(hash, &ctx.sha_ctx);
1279                         if (!hashes_equal(hash, lte->hash)) {
1280                                 if (wimlib_print_errors) {
1281                                         ERROR("Invalid SHA1 message digest "
1282                                               "on the following WIM stream:");
1283                                         print_lookup_table_entry(lte, stderr);
1284                                         if (lte->resource_location == RESOURCE_IN_WIM)
1285                                                 ERROR("The WIM file appears to be corrupt!");
1286                                 }
1287                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
1288                         }
1289                 }
1290         } else {
1291                 /* Don't do SHA1 */
1292                 ret = read_stream_prefix(lte, size, extract_chunk,
1293                                          lte_cchunk_size(lte),
1294                                          extract_chunk_arg, 0);
1295         }
1296         return ret;
1297 }
1298
1299 static int
1300 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
1301 {
1302         struct filedes *fd = _fd_p;
1303         int ret = full_write(fd, buf, len);
1304         if (ret)
1305                 ERROR_WITH_ERRNO("Error writing to file descriptor");
1306         return ret;
1307 }
1308
1309 /* Extract the first @size bytes of the specified stream to the specified file
1310  * descriptor.  If @size is the full size of the stream, its SHA1 message digest
1311  * is also checked.  */
1312 int
1313 extract_stream_to_fd(const struct wim_lookup_table_entry *lte,
1314                      struct filedes *fd, u64 size)
1315 {
1316         return extract_stream(lte, size, extract_wim_chunk_to_fd, fd);
1317 }
1318
1319
1320 static int
1321 sha1_chunk(const void *buf, size_t len, void *ctx)
1322 {
1323         sha1_update(ctx, buf, len);
1324         return 0;
1325 }
1326
1327 /* Calculate the SHA1 message digest of a stream, storing it in @lte->hash.  */
1328 int
1329 sha1_stream(struct wim_lookup_table_entry *lte)
1330 {
1331         int ret;
1332         SHA_CTX sha_ctx;
1333
1334         sha1_init(&sha_ctx);
1335         ret = read_stream_prefix(lte, lte->size,
1336                                  sha1_chunk, lte_cchunk_size(lte),
1337                                  &sha_ctx, 0);
1338         if (ret == 0)
1339                 sha1_final(lte->hash, &sha_ctx);
1340
1341         return ret;
1342 }
1343
1344 /* Convert a WIM resource header to a stand-alone resource specification.  */
1345 void
1346 wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim,
1347                     struct wim_resource_spec *spec)
1348 {
1349         spec->wim = wim;
1350         spec->offset_in_wim = reshdr->offset_in_wim;
1351         spec->size_in_wim = reshdr->size_in_wim;
1352         spec->uncompressed_size = reshdr->uncompressed_size;
1353         INIT_LIST_HEAD(&spec->lte_list);
1354         spec->flags = reshdr->flags;
1355         spec->is_pipable = wim_is_pipable(wim);
1356         if (spec->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_CONCAT)) {
1357                 spec->ctype = wim->compression_type;
1358                 spec->cchunk_size = wim->chunk_size;
1359         } else {
1360                 spec->ctype = WIMLIB_COMPRESSION_TYPE_NONE;
1361                 spec->cchunk_size = 0;
1362         }
1363 }
1364
1365 /* Convert a stand-alone resource specification to a WIM resource header.  */
1366 void
1367 wim_res_spec_to_hdr(const struct wim_resource_spec *rspec,
1368                     struct wim_reshdr *reshdr)
1369 {
1370         reshdr->offset_in_wim     = rspec->offset_in_wim;
1371         reshdr->size_in_wim       = rspec->size_in_wim;
1372         reshdr->flags             = rspec->flags;
1373         reshdr->uncompressed_size = rspec->uncompressed_size;
1374 }
1375
1376 /* Translates a WIM resource header from the on-disk format into an in-memory
1377  * format.  */
1378 int
1379 get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr,
1380                struct wim_reshdr *reshdr)
1381 {
1382         reshdr->offset_in_wim = le64_to_cpu(disk_reshdr->offset_in_wim);
1383         reshdr->size_in_wim = (((u64)disk_reshdr->size_in_wim[0] <<  0) |
1384                               ((u64)disk_reshdr->size_in_wim[1] <<  8) |
1385                               ((u64)disk_reshdr->size_in_wim[2] << 16) |
1386                               ((u64)disk_reshdr->size_in_wim[3] << 24) |
1387                               ((u64)disk_reshdr->size_in_wim[4] << 32) |
1388                               ((u64)disk_reshdr->size_in_wim[5] << 40) |
1389                               ((u64)disk_reshdr->size_in_wim[6] << 48));
1390         reshdr->uncompressed_size = le64_to_cpu(disk_reshdr->uncompressed_size);
1391         reshdr->flags = disk_reshdr->flags;
1392
1393         /* Truncate numbers to 62 bits to avoid possible overflows.  */
1394         if (reshdr->offset_in_wim & 0xc000000000000000ULL)
1395                 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1396
1397         if (reshdr->uncompressed_size & 0xc000000000000000ULL)
1398                 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1399
1400         return 0;
1401 }
1402
1403 /* Translates a WIM resource header from an in-memory format into the on-disk
1404  * format.  */
1405 void
1406 put_wim_reshdr(const struct wim_reshdr *reshdr,
1407                struct wim_reshdr_disk *disk_reshdr)
1408 {
1409         disk_reshdr->size_in_wim[0] = reshdr->size_in_wim  >>  0;
1410         disk_reshdr->size_in_wim[1] = reshdr->size_in_wim  >>  8;
1411         disk_reshdr->size_in_wim[2] = reshdr->size_in_wim  >> 16;
1412         disk_reshdr->size_in_wim[3] = reshdr->size_in_wim  >> 24;
1413         disk_reshdr->size_in_wim[4] = reshdr->size_in_wim  >> 32;
1414         disk_reshdr->size_in_wim[5] = reshdr->size_in_wim  >> 40;
1415         disk_reshdr->size_in_wim[6] = reshdr->size_in_wim  >> 48;
1416         disk_reshdr->flags = reshdr->flags;
1417         disk_reshdr->offset_in_wim = cpu_to_le64(reshdr->offset_in_wim);
1418         disk_reshdr->uncompressed_size = cpu_to_le64(reshdr->uncompressed_size);
1419 }