wimlib.net Git - wimlib/blob - src/resource.c

   1 /*
   2  * resource.c
   3  *
   4  * Read uncompressed and compressed metadata and file resources from a WIM file.
   5  */
   6
   7 /*
   8  * Copyright (C) 2012, 2013 Eric Biggers
   9  *
  10  * This file is part of wimlib, a library for working with WIM files.
  11  *
  12  * wimlib is free software; you can redistribute it and/or modify it under the
  13  * terms of the GNU General Public License as published by the Free Software
  14  * Foundation; either version 3 of the License, or (at your option) any later
  15  * version.
  16  *
  17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License along with
  22  * wimlib; if not, see http://www.gnu.org/licenses/.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 #  include "config.h"
  27 #endif
  28
  29 #include "wimlib.h"
  30 #include "wimlib/endianness.h"
  31 #include "wimlib/error.h"
  32 #include "wimlib/file_io.h"
  33 #include "wimlib/lookup_table.h"
  34 #include "wimlib/lzms.h"
  35 #include "wimlib/resource.h"
  36 #include "wimlib/sha1.h"
  37
  38 #ifdef __WIN32__
  39 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
  40 #  include "wimlib/win32.h"
  41 #endif
  42
  43 #ifdef WITH_NTFS_3G
  44 /* for read_ntfs_file_prefix() */
  45 #  include "wimlib/ntfs_3g.h"
  46 #endif
  47
  48 #ifdef HAVE_ALLOCA_H
  49 #  include <alloca.h>
  50 #endif
  51 #include <errno.h>
  52 #include <fcntl.h>
  53 #include <stdlib.h>
  54 #include <unistd.h>
  55
  56 /*
  57  *                         Compressed WIM resources
  58  *
  59  * A compressed resource in a WIM consists of a number of compressed chunks,
  60  * each of which decompresses to a fixed chunk size (given in the WIM header;
  61  * usually 32768) except possibly the last, which always decompresses to any
  62  * remaining bytes.  In addition, immediately before the chunks, a table (the
  63  * "chunk table") provides the offset, in bytes relative to the end of the chunk
  64  * table, of the start of each compressed chunk, except for the first chunk
  65  * which is omitted as it always has an offset of 0.  Therefore, a compressed
  66  * resource with N chunks will have a chunk table with N - 1 entries.
  67  *
  68  * Additional information:
  69  *
  70  * - Entries in the chunk table are 4 bytes each, except if the uncompressed
  71  *   size of the resource is greater than 4 GiB, in which case the entries in
  72  *   the chunk table are 8 bytes each.  In either case, the entries are unsigned
  73  *   little-endian integers.
  74  *
  75  * - The chunk table is included in the compressed size of the resource provided
  76  *   in the corresponding entry in the WIM's stream lookup table.
  77  *
  78  * - The compressed size of a chunk is never greater than the uncompressed size.
  79  *   From the compressor's point of view, chunks that would have compressed to a
  80  *   size greater than or equal to their original size are in fact stored
  81  *   uncompressed.  From the decompresser's point of view, chunks with
  82  *   compressed size equal to their uncompressed size are in fact uncompressed.
  83  *
  84  * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
  85  * structure of compressed resources was modified to allow piped reading and
  86  * writing.  To make sequential writing possible, the chunk table is placed
  87  * after the chunks rather than before the chunks, and to make sequential
  88  * reading possible, each chunk is prefixed with a 4-byte header giving its
  89  * compressed size as a 32-bit, unsigned, little-endian integer.  Otherwise the
  90  * details are the same.
  91  */
  92
  93
  94 /* Decompress the specified chunk that uses the specified compression type
  95  * @ctype, part of a WIM with default chunk size @wim_chunk_size.  For LZX the
  96  * separate @wim_chunk_size is needed because it determines the window size used
  97  * for LZX compression.  */
  98 static int
  99 decompress(const void *cchunk, unsigned clen, void *uchunk, unsigned ulen,
 100            int ctype, u32 wim_chunk_size)
 101 {
 102         switch (ctype) {
 103         case WIMLIB_COMPRESSION_TYPE_LZX:
 104                 return wimlib_lzx_decompress2(cchunk, clen,
 105                                               uchunk, ulen, wim_chunk_size);
 106         case WIMLIB_COMPRESSION_TYPE_XPRESS:
 107                 return wimlib_xpress_decompress(cchunk, clen,
 108                                                 uchunk, ulen);
 109         case WIMLIB_COMPRESSION_TYPE_LZMS:
 110                 return lzms_decompress(cchunk, clen,
 111                                        uchunk, ulen, wim_chunk_size);
 112         default:
 113                 wimlib_assert(0);
 114                 return -1;
 115         }
 116 }
 117
 118 /* Read data from a compressed WIM resource.  Assumes parameters were already
 119  * verified by read_partial_wim_resource().  */
 120 static int
 121 read_compressed_wim_resource(const struct wim_resource_spec * const rspec,
 122                              const u64 size, const consume_data_callback_t cb,
 123                              const u32 cb_chunk_size, void * const ctx_or_buf,
 124                              const int flags, const u64 offset)
 125 {
 126         int ret;
 127         int errno_save;
 128
 129         const u32 orig_chunk_size = rspec->cchunk_size;
 130         const u32 orig_chunk_order = bsr32(orig_chunk_size);
 131
 132         wimlib_assert(is_power_of_2(orig_chunk_size));
 133
 134         /* Handle the trivial case.  */
 135         if (size == 0)
 136                 return 0;
 137
 138         if (rspec->ctype == WIMLIB_COMPRESSION_TYPE_LZMS) {
 139                 /* TODO */
 140
 141                 unsigned clen = rspec->size_in_wim;
 142                 unsigned ulen = rspec->uncompressed_size;
 143
 144                 fprintf(stderr, "clen=%u, ulen=%u, offset=%lu\n", clen, ulen,
 145                         rspec->offset_in_wim);
 146
 147                 u8 *cbuf = MALLOC(clen);
 148                 u8 *ubuf = MALLOC(ulen);
 149
 150                 ret = full_pread(&rspec->wim->in_fd,
 151                                  cbuf, clen, rspec->offset_in_wim);
 152                 if (ret) {
 153                         ERROR_WITH_ERRNO("Can't read compressed data");
 154                         goto out_free_bufs;
 155                 }
 156
 157                 ret = lzms_decompress(cbuf, clen, ubuf, ulen,
 158                                       orig_chunk_size);
 159                 if (ret) {
 160                         ERROR("LZMS decompression error.");
 161                         errno = EINVAL;
 162                         ret = WIMLIB_ERR_DECOMPRESSION;
 163                         goto out_free_bufs;
 164                 }
 165                 if (cb) {
 166                         u32 chunk_size;
 167                         for (u64 i = offset; i < offset + size; i += chunk_size) {
 168                                 chunk_size = min(offset + size - i, cb_chunk_size);
 169                                 ret = cb(&ubuf[i], chunk_size, ctx_or_buf);
 170                                 if (ret)
 171                                         goto out_free_bufs;
 172                         }
 173                 } else {
 174                         memcpy(ctx_or_buf, &ubuf[offset], size);
 175                 }
 176                 ret = 0;
 177         out_free_bufs:
 178                 FREE(ubuf);
 179                 FREE(cbuf);
 180                 return ret;
 181         }
 182
 183         u64 *chunk_offsets = NULL;
 184         u8 *out_buf = NULL;
 185         u8 *tmp_buf = NULL;
 186         void *compressed_buf = NULL;
 187         bool chunk_offsets_malloced = false;
 188         bool out_buf_malloced = false;
 189         bool tmp_buf_malloced = false;
 190         bool compressed_buf_malloced = false;
 191
 192         /* Get the file descriptor for the WIM.  */
 193         struct filedes * const in_fd = &rspec->wim->in_fd;
 194
 195         /* Determine if we're reading a pipable resource from a pipe or not.  */
 196         const bool is_pipe_read = !filedes_is_seekable(in_fd);
 197
 198         /* Calculate the number of chunks the resource is divided into.  */
 199         const u64 num_chunks = (rspec->uncompressed_size + orig_chunk_size - 1) >> orig_chunk_order;
 200
 201         /* Calculate the 0-based index of the chunk at which the read starts.
 202          */
 203         const u64 start_chunk = offset >> orig_chunk_order;
 204
 205         /* For pipe reads, we always must start from the 0th chunk.  */
 206         const u64 actual_start_chunk = (is_pipe_read ? 0 : start_chunk);
 207
 208         /* Calculate the offset, within the start chunk, of the first byte of
 209          * the read.  */
 210         const u32 start_offset_in_chunk = offset & (orig_chunk_size - 1);
 211
 212         /* Calculate the index of the chunk that contains the last byte of the
 213          * read.  */
 214         const u64 end_chunk = (offset + size - 1) >> orig_chunk_order;
 215
 216         /* Calculate the offset, within the end chunk, of the last byte of the
 217          * read.  */
 218         const u32 end_offset_in_chunk = (offset + size - 1) & (orig_chunk_size - 1);
 219
 220         /* Calculate the number of entries in the chunk table; it's one less
 221          * than the number of chunks, since the first chunk has no entry.  */
 222         const u64 num_chunk_entries = num_chunks - 1;
 223
 224         /* Set the size of each chunk table entry based on the resource's
 225          * uncompressed size.  */
 226         const u64 chunk_entry_size = (rspec->uncompressed_size > (1ULL << 32)) ? 8 : 4;
 227
 228         /* Calculate the size, in bytes, of the full chunk table.  */
 229         const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
 230
 231         /* Current offset to read from.  */
 232         u64 cur_read_offset = rspec->offset_in_wim;
 233         if (!is_pipe_read) {
 234                 /* Read the chunk table into memory.  */
 235
 236                 /* Calculate the number of chunk entries are actually needed to
 237                  * read the requested part of the resource.  Include an entry
 238                  * for the first chunk even though that doesn't exist in the
 239                  * on-disk table, but take into account that if the last chunk
 240                  * required for the read is not the last chunk of the resource,
 241                  * an extra chunk entry is needed so that the compressed size of
 242                  * the last chunk of the read can be determined.  */
 243                 const u64 num_alloc_chunk_entries = end_chunk - start_chunk +
 244                                                     1 + (end_chunk != num_chunks - 1);
 245
 246                 /* Allocate a buffer to hold a subset of the chunk table.  It
 247                  * will only contain offsets for the chunks that are actually
 248                  * needed for this read.  For speed, allocate the buffer on the
 249                  * stack unless it's too large.  */
 250                 if ((size_t)(num_alloc_chunk_entries * sizeof(u64)) !=
 251                             (num_alloc_chunk_entries * sizeof(u64)))
 252                         goto oom;
 253
 254                 if (num_alloc_chunk_entries <= STACK_MAX / sizeof(u64)) {
 255                         chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
 256                 } else {
 257                         chunk_offsets = MALLOC(num_alloc_chunk_entries * sizeof(u64));
 258                         if (chunk_offsets == NULL)
 259                                 goto oom;
 260                         chunk_offsets_malloced = true;
 261                 }
 262
 263                 /* Set the implicit offset of the first chunk if it's included
 264                  * in the needed chunks.  */
 265                 if (start_chunk == 0)
 266                         chunk_offsets[0] = 0;
 267
 268                 /* Calculate the index of the first needed entry in the chunk
 269                  * table.  */
 270                 const u64 start_table_idx = (start_chunk == 0) ?
 271                                 0 : start_chunk - 1;
 272
 273                 /* Calculate the number of entries that need to be read from the
 274                  * chunk table.  */
 275                 const u64 num_needed_chunk_entries = (start_chunk == 0) ?
 276                                 num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
 277
 278                 /* Calculate the number of bytes of data that need to be read
 279                  * from the chunk table.  */
 280                 const size_t chunk_table_needed_size =
 281                                 num_needed_chunk_entries * chunk_entry_size;
 282
 283                 /* Calculate the byte offset, in the WIM file, of the first
 284                  * chunk table entry to read.  Take into account that if the WIM
 285                  * file is in the special "pipable" format, then the chunk table
 286                  * is at the end of the resource, not the beginning.  */
 287                 const u64 file_offset_of_needed_chunk_entries =
 288                         rspec->offset_in_wim
 289                         + (start_table_idx * chunk_entry_size)
 290                         + (rspec->is_pipable ? (rspec->size_in_wim - chunk_table_size) : 0);
 291
 292                 /* Read the needed chunk table entries into the end of the
 293                  * chunk_offsets buffer.  */
 294                 void * const chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
 295                                               chunk_table_needed_size;
 296                 ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
 297                                  file_offset_of_needed_chunk_entries);
 298                 if (ret)
 299                         goto read_error;
 300
 301                 /* Now fill in chunk_offsets from the entries we have read in
 302                  * chunk_tab_data.  Careful: chunk_offsets aliases
 303                  * chunk_tab_data, which breaks C's aliasing rules when we read
 304                  * 32-bit integers and store 64-bit integers.  But since the
 305                  * operations are safe as long as the compiler doesn't mess with
 306                  * their order, we use the gcc may_alias extension to tell the
 307                  * compiler that loads from the 32-bit integers may alias stores
 308                  * to the 64-bit integers.  */
 309                 {
 310                         typedef le64 __attribute__((may_alias)) aliased_le64_t;
 311                         typedef le32 __attribute__((may_alias)) aliased_le32_t;
 312                         u64 * const chunk_offsets_p = chunk_offsets + (start_chunk == 0);
 313                         u64 i;
 314
 315                         if (chunk_entry_size == 4) {
 316                                 aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
 317                                 for (i = 0; i < num_needed_chunk_entries; i++)
 318                                         chunk_offsets_p[i] = le32_to_cpu(raw_entries[i]);
 319                         } else {
 320                                 aliased_le64_t *raw_entries = (aliased_le64_t*)chunk_tab_data;
 321                                 for (i = 0; i < num_needed_chunk_entries; i++)
 322                                         chunk_offsets_p[i] = le64_to_cpu(raw_entries[i]);
 323                         }
 324                 }
 325
 326                 /* Set offset to beginning of first chunk to read.  */
 327                 cur_read_offset += chunk_offsets[0];
 328                 if (rspec->is_pipable)
 329                         cur_read_offset += start_chunk * sizeof(struct pwm_chunk_hdr);
 330                 else
 331                         cur_read_offset += chunk_table_size;
 332         }
 333
 334         /* If using a callback function, allocate a temporary buffer that will
 335          * hold data being passed to it.  If writing directly to a buffer
 336          * instead, arrange to write data directly into it.  */
 337         size_t out_buf_size;
 338         u8 *out_buf_end, *out_p;
 339         if (cb) {
 340                 out_buf_size = max(cb_chunk_size, orig_chunk_size);
 341                 if (out_buf_size <= STACK_MAX) {
 342                         out_buf = alloca(out_buf_size);
 343                 } else {
 344                         out_buf = MALLOC(out_buf_size);
 345                         if (out_buf == NULL)
 346                                 goto oom;
 347                         out_buf_malloced = true;
 348                 }
 349         } else {
 350                 out_buf_size = size;
 351                 out_buf = ctx_or_buf;
 352         }
 353         out_buf_end = out_buf + out_buf_size;
 354         out_p = out_buf;
 355
 356         /* Unless the raw compressed data was requested, allocate a temporary
 357          * buffer for reading compressed chunks, each of which can be at most
 358          * @orig_chunk_size - 1 bytes.  This excludes compressed chunks that are
 359          * a full @orig_chunk_size bytes, which are actually stored
 360          * uncompressed.  */
 361         if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)) {
 362                 if (orig_chunk_size - 1 <= STACK_MAX) {
 363                         compressed_buf = alloca(orig_chunk_size - 1);
 364                 } else {
 365                         compressed_buf = MALLOC(orig_chunk_size - 1);
 366                         if (compressed_buf == NULL)
 367                                 goto oom;
 368                         compressed_buf_malloced = true;
 369                 }
 370         }
 371
 372         /* Allocate yet another temporary buffer, this one for decompressing
 373          * chunks for which only part of the data is needed.  */
 374         if (start_offset_in_chunk != 0 ||
 375             (end_offset_in_chunk != orig_chunk_size - 1 &&
 376              offset + size != rspec->uncompressed_size))
 377         {
 378                 if (orig_chunk_size <= STACK_MAX) {
 379                         tmp_buf = alloca(orig_chunk_size);
 380                 } else {
 381                         tmp_buf = MALLOC(orig_chunk_size);
 382                         if (tmp_buf == NULL)
 383                                 goto oom;
 384                         tmp_buf_malloced = true;
 385                 }
 386         }
 387
 388         /* Read, and possibly decompress, each needed chunk, either writing the
 389          * data directly into the @ctx_or_buf buffer or passing it to the @cb
 390          * callback function.  */
 391         for (u64 i = actual_start_chunk; i <= end_chunk; i++) {
 392
 393                 /* Calculate uncompressed size of next chunk.  */
 394                 u32 chunk_usize;
 395                 if ((i == num_chunks - 1) && (rspec->uncompressed_size & (orig_chunk_size - 1)))
 396                         chunk_usize = (rspec->uncompressed_size & (orig_chunk_size - 1));
 397                 else
 398                         chunk_usize = orig_chunk_size;
 399
 400                 /* Calculate compressed size of next chunk.  */
 401                 u32 chunk_csize;
 402                 if (is_pipe_read) {
 403                         struct pwm_chunk_hdr chunk_hdr;
 404
 405                         ret = full_pread(in_fd, &chunk_hdr,
 406                                          sizeof(chunk_hdr), cur_read_offset);
 407                         if (ret)
 408                                 goto read_error;
 409                         chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
 410                 } else {
 411                         if (i == num_chunks - 1) {
 412                                 chunk_csize = rspec->size_in_wim -
 413                                               chunk_table_size -
 414                                               chunk_offsets[i - start_chunk];
 415                                 if (rspec->is_pipable)
 416                                         chunk_csize -= num_chunks * sizeof(struct pwm_chunk_hdr);
 417                         } else {
 418                                 chunk_csize = chunk_offsets[i + 1 - start_chunk] -
 419                                               chunk_offsets[i - start_chunk];
 420                         }
 421                 }
 422                 if (chunk_csize == 0 || chunk_csize > chunk_usize) {
 423                         ERROR("Invalid chunk size in compressed resource!");
 424                         errno = EINVAL;
 425                         ret = WIMLIB_ERR_DECOMPRESSION;
 426                         goto out_free_memory;
 427                 }
 428                 if (rspec->is_pipable)
 429                         cur_read_offset += sizeof(struct pwm_chunk_hdr);
 430
 431                 if (i >= start_chunk) {
 432                         /* Calculate how much of this chunk needs to be read.  */
 433                         u32 chunk_needed_size;
 434                         u32 start_offset = 0;
 435                         u32 end_offset = orig_chunk_size - 1;
 436
 437                         if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 438                                 chunk_needed_size = chunk_csize;
 439                         } else {
 440                                 if (i == start_chunk)
 441                                         start_offset = start_offset_in_chunk;
 442
 443                                 if (i == end_chunk)
 444                                         end_offset = end_offset_in_chunk;
 445
 446                                 chunk_needed_size = end_offset + 1 - start_offset;
 447                         }
 448
 449                         if (chunk_csize == chunk_usize ||
 450                             (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
 451                         {
 452                                 /* Read the raw chunk data.  */
 453
 454                                 ret = full_pread(in_fd,
 455                                                  out_p,
 456                                                  chunk_needed_size,
 457                                                  cur_read_offset + start_offset);
 458                                 if (ret)
 459                                         goto read_error;
 460                         } else {
 461                                 /* Read and decompress the chunk.  */
 462
 463                                 u8 *target;
 464
 465                                 ret = full_pread(in_fd,
 466                                                  compressed_buf,
 467                                                  chunk_csize,
 468                                                  cur_read_offset);
 469                                 if (ret)
 470                                         goto read_error;
 471
 472                                 if (chunk_needed_size == chunk_usize)
 473                                         target = out_p;
 474                                 else
 475                                         target = tmp_buf;
 476
 477                                 ret = decompress(compressed_buf,
 478                                                  chunk_csize,
 479                                                  target,
 480                                                  chunk_usize,
 481                                                  rspec->ctype,
 482                                                  orig_chunk_size);
 483                                 if (ret) {
 484                                         ERROR("Failed to decompress data!");
 485                                         ret = WIMLIB_ERR_DECOMPRESSION;
 486                                         errno = EINVAL;
 487                                         goto out_free_memory;
 488                                 }
 489                                 if (chunk_needed_size != chunk_usize)
 490                                         memcpy(out_p, tmp_buf + start_offset,
 491                                                chunk_needed_size);
 492                         }
 493
 494                         out_p += chunk_needed_size;
 495
 496                         if (cb) {
 497                                 /* Feed the data to the callback function.  */
 498
 499                                 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 500                                         ret = cb(out_buf, out_p - out_buf, ctx_or_buf);
 501                                         if (ret)
 502                                                 goto out_free_memory;
 503                                         out_p = out_buf;
 504                                 } else if (i == end_chunk || out_p == out_buf_end) {
 505                                         size_t bytes_sent;
 506                                         const u8 *p;
 507
 508                                         for (p = out_buf; p != out_p; p += bytes_sent) {
 509                                                 bytes_sent = min(cb_chunk_size, out_p - p);
 510                                                 ret = cb(p, bytes_sent, ctx_or_buf);
 511                                                 if (ret)
 512                                                         goto out_free_memory;
 513                                         }
 514                                         out_p = out_buf;
 515                                 }
 516                         }
 517                         cur_read_offset += chunk_csize;
 518                 } else {
 519                         u8 dummy;
 520
 521                         /* Skip data only.  */
 522                         cur_read_offset += chunk_csize;
 523                         ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
 524                         if (ret)
 525                                 goto read_error;
 526                 }
 527         }
 528
 529         if (is_pipe_read
 530             && size == rspec->uncompressed_size
 531             && chunk_table_size)
 532         {
 533                 u8 dummy;
 534                 /* Skip chunk table at end of pipable resource.  */
 535
 536                 cur_read_offset += chunk_table_size;
 537                 ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
 538                 if (ret)
 539                         goto read_error;
 540         }
 541         ret = 0;
 542 out_free_memory:
 543         errno_save = errno;
 544         if (chunk_offsets_malloced)
 545                 FREE(chunk_offsets);
 546         if (out_buf_malloced)
 547                 FREE(out_buf);
 548         if (compressed_buf_malloced)
 549                 FREE(compressed_buf);
 550         if (tmp_buf_malloced)
 551                 FREE(tmp_buf);
 552         errno = errno_save;
 553         return ret;
 554
 555 oom:
 556         ERROR("Not enough memory available to read size=%"PRIu64" bytes "
 557               "from compressed resource!", size);
 558         errno = ENOMEM;
 559         ret = WIMLIB_ERR_NOMEM;
 560         goto out_free_memory;
 561
 562 read_error:
 563         ERROR_WITH_ERRNO("Error reading compressed file resource!");
 564         goto out_free_memory;
 565 }
 566
 567 /* Read raw data from a file descriptor at the specified offset.  */
 568 static int
 569 read_raw_file_data(struct filedes *in_fd, u64 size, consume_data_callback_t cb,
 570                    u32 cb_chunk_size, void *ctx_or_buf, u64 offset)
 571 {
 572         int ret;
 573         u8 *tmp_buf;
 574         bool tmp_buf_malloced = false;
 575
 576         if (cb) {
 577                 /* Send data to callback function in chunks.  */
 578                 if (cb_chunk_size <= STACK_MAX) {
 579                         tmp_buf = alloca(cb_chunk_size);
 580                 } else {
 581                         tmp_buf = MALLOC(cb_chunk_size);
 582                         if (tmp_buf == NULL) {
 583                                 ret = WIMLIB_ERR_NOMEM;
 584                                 goto out;
 585                         }
 586                         tmp_buf_malloced = true;
 587                 }
 588
 589                 while (size) {
 590                         size_t bytes_to_read = min(cb_chunk_size, size);
 591                         ret = full_pread(in_fd, tmp_buf, bytes_to_read,
 592                                          offset);
 593                         if (ret)
 594                                 goto read_error;
 595                         ret = cb(tmp_buf, bytes_to_read, ctx_or_buf);
 596                         if (ret)
 597                                 goto out;
 598                         size -= bytes_to_read;
 599                         offset += bytes_to_read;
 600                 }
 601         } else {
 602                 /* Read data directly into buffer.  */
 603                 ret = full_pread(in_fd, ctx_or_buf, size, offset);
 604                 if (ret)
 605                         goto read_error;
 606         }
 607         ret = 0;
 608         goto out;
 609
 610 read_error:
 611         ERROR_WITH_ERRNO("Read error");
 612 out:
 613         if (tmp_buf_malloced)
 614                 FREE(tmp_buf);
 615         return ret;
 616 }
 617
 618 /*
 619  * read_partial_wim_resource()-
 620  *
 621  * Read a range of data from an uncompressed or compressed resource in a WIM
 622  * file.  Data is written into a buffer or fed into a callback function, as
 623  * documented in read_stream_prefix().
 624  *
 625  * By default, this function provides the uncompressed data of the resource, and
 626  * @size and @offset and interpreted relative to the uncompressed contents of
 627  * the resource.  This behavior can be modified by either of the following
 628  * flags:
 629  *
 630  * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
 631  *      Read @size bytes at @offset of the raw contents of the compressed
 632  *      resource.  In the case of pipable resources, this excludes the stream
 633  *      header.  Exclusive with WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS.
 634  *
 635  * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
 636  *      Read the raw compressed chunks of the compressed resource.  @size must
 637  *      be the full uncompressed size, @offset must be 0, and @cb_chunk_size
 638  *      must be the resource chunk size.
 639  *
 640  * Return values:
 641  *      WIMLIB_ERR_SUCCESS (0)
 642  *      WIMLIB_ERR_READ                   (errno set)
 643  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
 644  *      WIMLIB_ERR_NOMEM                  (errno set to ENOMEM)
 645  *      WIMLIB_ERR_DECOMPRESSION          (errno set to EINVAL)
 646  *
 647  *      or other error code returned by the @cb function.
 648  */
 649 int
 650 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
 651                           u64 size, consume_data_callback_t cb,
 652                           u32 cb_chunk_size, void *ctx_or_buf,
 653                           int flags, u64 offset)
 654 {
 655         const struct wim_resource_spec *rspec;
 656         struct filedes *in_fd;
 657
 658         /* Verify parameters.  */
 659         wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
 660         rspec = lte->rspec;
 661         in_fd = &rspec->wim->in_fd;
 662         if (cb)
 663                 wimlib_assert(is_power_of_2(cb_chunk_size));
 664         if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 665                 /* Raw chunks mode is subject to the restrictions noted.  */
 666                 wimlib_assert(!lte_is_partial(lte));
 667                 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL));
 668                 wimlib_assert(cb_chunk_size == rspec->cchunk_size);
 669                 wimlib_assert(size == rspec->uncompressed_size);
 670                 wimlib_assert(offset == 0);
 671         } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) {
 672                 /* Raw full mode:  read must not overrun end of store size.  */
 673                 wimlib_assert(!lte_is_partial(lte));
 674                 wimlib_assert(offset + size >= size &&
 675                               offset + size <= rspec->size_in_wim);
 676         } else {
 677                 /* Normal mode:  read must not overrun end of original size.  */
 678                 wimlib_assert(offset + size >= size &&
 679                               lte->offset_in_res + offset + size <= rspec->uncompressed_size);
 680         }
 681
 682         DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64"[+%"PRIu64"] "
 683               "from %"PRIu64"(%"PRIu64") @ +%"PRIu64" "
 684               "(readflags 0x%08x, resflags 0x%02x%s)",
 685               size, offset, lte->offset_in_res,
 686               rspec->size_in_wim,
 687               rspec->uncompressed_size,
 688               rspec->offset_in_wim,
 689               flags, lte->flags,
 690               (rspec->is_pipable ? ", pipable" : ""));
 691
 692         if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
 693             rspec->ctype == WIMLIB_COMPRESSION_TYPE_NONE)
 694         {
 695                 return read_raw_file_data(in_fd,
 696                                           size,
 697                                           cb,
 698                                           cb_chunk_size,
 699                                           ctx_or_buf,
 700                                           offset + rspec->offset_in_wim);
 701         } else {
 702                 return read_compressed_wim_resource(rspec, size, cb,
 703                                                     cb_chunk_size,
 704                                                     ctx_or_buf, flags, offset + lte->offset_in_res);
 705         }
 706 }
 707
 708 int
 709 read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte,
 710                                  size_t size, u64 offset, void *buf)
 711 {
 712         return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
 713 }
 714
 715 static int
 716 read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size,
 717                        consume_data_callback_t cb, u32 cb_chunk_size,
 718                        void *ctx_or_buf, int flags)
 719 {
 720         return read_partial_wim_resource(lte, size, cb, cb_chunk_size,
 721                                          ctx_or_buf, flags, 0);
 722 }
 723
 724 #ifndef __WIN32__
 725 /* This function handles reading stream data that is located in an external
 726  * file,  such as a file that has been added to the WIM image through execution
 727  * of a wimlib_add_command.
 728  *
 729  * This assumes the file can be accessed using the standard POSIX open(),
 730  * read(), and close().  On Windows this will not necessarily be the case (since
 731  * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be
 732  * encrypted), so Windows uses its own code for its equivalent case.
 733  */
 734 static int
 735 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size,
 736                          consume_data_callback_t cb, u32 cb_chunk_size,
 737                          void *ctx_or_buf, int _ignored_flags)
 738 {
 739         int ret;
 740         int raw_fd;
 741         struct filedes fd;
 742
 743         wimlib_assert(size <= lte->size);
 744         DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk);
 745
 746         raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY);
 747         if (raw_fd < 0) {
 748                 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk);
 749                 return WIMLIB_ERR_OPEN;
 750         }
 751         filedes_init(&fd, raw_fd);
 752         ret = read_raw_file_data(&fd, size, cb, cb_chunk_size, ctx_or_buf, 0);
 753         filedes_close(&fd);
 754         return ret;
 755 }
 756 #endif /* !__WIN32__ */
 757
 758 /* This function handles the trivial case of reading stream data that is, in
 759  * fact, already located in an in-memory buffer.  */
 760 static int
 761 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
 762                    u64 size, consume_data_callback_t cb,
 763                    u32 cb_chunk_size, void *ctx_or_buf, int _ignored_flags)
 764 {
 765         wimlib_assert(size <= lte->size);
 766
 767         if (cb) {
 768                 /* Feed the data into the callback function in
 769                  * appropriately-sized chunks.  */
 770                 int ret;
 771                 u32 chunk_size;
 772
 773                 for (u64 offset = 0; offset < size; offset += chunk_size) {
 774                         chunk_size = min(cb_chunk_size, size - offset);
 775                         ret = cb((const u8*)lte->attached_buffer + offset,
 776                                  chunk_size, ctx_or_buf);
 777                         if (ret)
 778                                 return ret;
 779                 }
 780         } else {
 781                 /* Copy the data directly into the specified buffer.  */
 782                 memcpy(ctx_or_buf, lte->attached_buffer, size);
 783         }
 784         return 0;
 785 }
 786
 787 typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
 788                                             u64 size, consume_data_callback_t cb,
 789                                             u32 cb_chunk_size, void *ctx_or_buf,
 790                                             int flags);
 791
 792 /*
 793  * read_stream_prefix()-
 794  *
 795  * Reads the first @size bytes from a generic "stream", which may be located in
 796  * any one of several locations, such as in a WIM file (compressed or
 797  * uncompressed), in an external file, or directly in an in-memory buffer.
 798  *
 799  * This function feeds the data either to a callback function (@cb != NULL,
 800  * passing it @ctx_or_buf), or write it directly into a buffer (@cb == NULL,
 801  * @ctx_or_buf specifies the buffer, which must have room for at least @size
 802  * bytes).
 803  *
 804  * When (@cb != NULL), @cb_chunk_size specifies the maximum size of data chunks
 805  * to feed the callback function.  @cb_chunk_size must be positive, and if the
 806  * stream is in a WIM file, must be a power of 2.  All chunks, except possibly
 807  * the last one, will be this size.  If (@cb == NULL), @cb_chunk_size is
 808  * ignored.
 809  *
 810  * If the stream is located in a WIM file, @flags can be set as documented in
 811  * read_partial_wim_resource().  Otherwise @flags are ignored.
 812  *
 813  * Returns 0 on success; nonzero on error.  A nonzero value will be returned if
 814  * the stream data cannot be successfully read (for a number of different
 815  * reasons, depending on the stream location), or if a callback function was
 816  * specified and it returned nonzero.
 817  */
 818 int
 819 read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size,
 820                    consume_data_callback_t cb, u32 cb_chunk_size,
 821                    void *ctx_or_buf, int flags)
 822 {
 823         /* This function merely verifies several preconditions, then passes
 824          * control to an appropriate function for understanding each possible
 825          * stream location.  */
 826         static const read_stream_prefix_handler_t handlers[] = {
 827                 [RESOURCE_IN_WIM]             = read_wim_stream_prefix,
 828         #ifdef __WIN32__
 829                 [RESOURCE_IN_FILE_ON_DISK]    = read_win32_file_prefix,
 830         #else
 831                 [RESOURCE_IN_FILE_ON_DISK]    = read_file_on_disk_prefix,
 832         #endif
 833                 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
 834         #ifdef WITH_FUSE
 835                 [RESOURCE_IN_STAGING_FILE]    = read_file_on_disk_prefix,
 836         #endif
 837         #ifdef WITH_NTFS_3G
 838                 [RESOURCE_IN_NTFS_VOLUME]     = read_ntfs_file_prefix,
 839         #endif
 840         #ifdef __WIN32__
 841                 [RESOURCE_WIN32_ENCRYPTED]    = read_win32_encrypted_file_prefix,
 842         #endif
 843         };
 844         wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
 845                       && handlers[lte->resource_location] != NULL);
 846         wimlib_assert(cb == NULL || cb_chunk_size > 0);
 847         return handlers[lte->resource_location](lte, size, cb, cb_chunk_size,
 848                                                 ctx_or_buf, flags);
 849 }
 850
 851 /* Read the full uncompressed data of the specified stream into the specified
 852  * buffer, which must have space for at least lte->size bytes.  */
 853 int
 854 read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf)
 855 {
 856         return read_stream_prefix(lte, lte->size, NULL, 0, buf, 0);
 857 }
 858
 859 /* Read the full uncompressed data of the specified stream.  A buffer sufficient
 860  * to hold the data is allocated and returned in @buf_ret.  */
 861 int
 862 read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte,
 863                                 void **buf_ret)
 864 {
 865         int ret;
 866         void *buf;
 867
 868         if ((size_t)lte->size != lte->size) {
 869                 ERROR("Can't read %"PRIu64" byte stream into "
 870                       "memory", lte->size);
 871                 return WIMLIB_ERR_NOMEM;
 872         }
 873
 874         buf = MALLOC(lte->size);
 875         if (buf == NULL)
 876                 return WIMLIB_ERR_NOMEM;
 877
 878         ret = read_full_stream_into_buf(lte, buf);
 879         if (ret) {
 880                 FREE(buf);
 881                 return ret;
 882         }
 883
 884         *buf_ret = buf;
 885         return 0;
 886 }
 887
 888 /* Retrieve the full uncompressed data of the specified WIM resource.  */
 889 static int
 890 wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret)
 891 {
 892         int ret;
 893         struct wim_lookup_table_entry *lte;
 894
 895         lte = new_lookup_table_entry();
 896         if (lte == NULL)
 897                 return WIMLIB_ERR_NOMEM;
 898
 899         lte->unhashed = 1;
 900         lte_bind_wim_resource_spec(lte, rspec);
 901         lte->flags = rspec->flags;
 902         lte->size = rspec->uncompressed_size;
 903         lte->offset_in_res = 0;
 904
 905         ret = read_full_stream_into_alloc_buf(lte, buf_ret);
 906
 907         lte_unbind_wim_resource_spec(lte);
 908         free_lookup_table_entry(lte);
 909         return ret;
 910 }
 911
 912 /* Retrieve the full uncompressed data of the specified WIM resource.  */
 913 int
 914 wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret)
 915 {
 916         DEBUG("offset_in_wim=%"PRIu64", size_in_wim=%"PRIu64", "
 917               "uncompressed_size=%"PRIu64,
 918               reshdr->offset_in_wim, reshdr->size_in_wim, reshdr->uncompressed_size);
 919
 920         struct wim_resource_spec rspec;
 921         wim_res_hdr_to_spec(reshdr, wim, &rspec);
 922         return wim_resource_spec_to_data(&rspec, buf_ret);
 923 }
 924
 925 struct extract_ctx {
 926         SHA_CTX sha_ctx;
 927         consume_data_callback_t extract_chunk;
 928         void *extract_chunk_arg;
 929 };
 930
 931 static int
 932 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size, void *_ctx)
 933 {
 934         struct extract_ctx *ctx = _ctx;
 935
 936         sha1_update(&ctx->sha_ctx, chunk, chunk_size);
 937         return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
 938 }
 939
 940 /* Extracts the first @size bytes of a stream to somewhere.  In the process, the
 941  * SHA1 message digest of the uncompressed stream is checked if the full stream
 942  * is being extracted.
 943  *
 944  * @extract_chunk is a function that will be called to extract each chunk of the
 945  * stream.  */
 946 int
 947 extract_stream(const struct wim_lookup_table_entry *lte, u64 size,
 948                consume_data_callback_t extract_chunk, void *extract_chunk_arg)
 949 {
 950         int ret;
 951         if (size == lte->size) {
 952                 /* Do SHA1 */
 953                 struct extract_ctx ctx;
 954                 ctx.extract_chunk = extract_chunk;
 955                 ctx.extract_chunk_arg = extract_chunk_arg;
 956                 sha1_init(&ctx.sha_ctx);
 957                 ret = read_stream_prefix(lte, size,
 958                                          extract_chunk_sha1_wrapper,
 959                                          lte_cchunk_size(lte),
 960                                          &ctx, 0);
 961                 if (ret == 0) {
 962                         u8 hash[SHA1_HASH_SIZE];
 963                         sha1_final(hash, &ctx.sha_ctx);
 964                         if (!hashes_equal(hash, lte->hash)) {
 965                                 if (wimlib_print_errors) {
 966                                         ERROR("Invalid SHA1 message digest "
 967                                               "on the following WIM stream:");
 968                                         print_lookup_table_entry(lte, stderr);
 969                                         if (lte->resource_location == RESOURCE_IN_WIM)
 970                                                 ERROR("The WIM file appears to be corrupt!");
 971                                 }
 972                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
 973                         }
 974                 }
 975         } else {
 976                 /* Don't do SHA1 */
 977                 ret = read_stream_prefix(lte, size, extract_chunk,
 978                                          lte_cchunk_size(lte),
 979                                          extract_chunk_arg, 0);
 980         }
 981         return ret;
 982 }
 983
 984 static int
 985 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
 986 {
 987         struct filedes *fd = _fd_p;
 988         int ret = full_write(fd, buf, len);
 989         if (ret)
 990                 ERROR_WITH_ERRNO("Error writing to file descriptor");
 991         return ret;
 992 }
 993
 994 /* Extract the first @size bytes of the specified stream to the specified file
 995  * descriptor.  If @size is the full size of the stream, its SHA1 message digest
 996  * is also checked.  */
 997 int
 998 extract_stream_to_fd(const struct wim_lookup_table_entry *lte,
 999                      struct filedes *fd, u64 size)
1000 {
1001         return extract_stream(lte, size, extract_wim_chunk_to_fd, fd);
1002 }
1003
1004
1005 static int
1006 sha1_chunk(const void *buf, size_t len, void *ctx)
1007 {
1008         sha1_update(ctx, buf, len);
1009         return 0;
1010 }
1011
1012 /* Calculate the SHA1 message digest of a stream, storing it in @lte->hash.  */
1013 int
1014 sha1_stream(struct wim_lookup_table_entry *lte)
1015 {
1016         int ret;
1017         SHA_CTX sha_ctx;
1018
1019         sha1_init(&sha_ctx);
1020         ret = read_stream_prefix(lte, lte->size,
1021                                  sha1_chunk, lte_cchunk_size(lte),
1022                                  &sha_ctx, 0);
1023         if (ret == 0)
1024                 sha1_final(lte->hash, &sha_ctx);
1025
1026         return ret;
1027 }
1028
1029 /* Convert a WIM resource header to a stand-alone resource specification.  */
1030 void
1031 wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim,
1032                     struct wim_resource_spec *spec)
1033 {
1034         spec->wim = wim;
1035         spec->offset_in_wim = reshdr->offset_in_wim;
1036         spec->size_in_wim = reshdr->size_in_wim;
1037         spec->uncompressed_size = reshdr->uncompressed_size;
1038         INIT_LIST_HEAD(&spec->lte_list);
1039         spec->flags = reshdr->flags;
1040         spec->is_pipable = wim_is_pipable(wim);
1041         if (spec->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_CONCAT)) {
1042                 spec->ctype = wim->compression_type;
1043                 spec->cchunk_size = wim->chunk_size;
1044         } else {
1045                 spec->ctype = WIMLIB_COMPRESSION_TYPE_NONE;
1046                 spec->cchunk_size = 0;
1047         }
1048 }
1049
1050 /* Convert a stand-alone resource specification to a WIM resource header.  */
1051 void
1052 wim_res_spec_to_hdr(const struct wim_resource_spec *rspec,
1053                     struct wim_reshdr *reshdr)
1054 {
1055         reshdr->offset_in_wim     = rspec->offset_in_wim;
1056         reshdr->size_in_wim       = rspec->size_in_wim;
1057         reshdr->flags             = rspec->flags;
1058         reshdr->uncompressed_size = rspec->uncompressed_size;
1059 }
1060
1061 /* Translates a WIM resource header from the on-disk format into an in-memory
1062  * format.  */
1063 int
1064 get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr,
1065                struct wim_reshdr *reshdr)
1066 {
1067         reshdr->offset_in_wim = le64_to_cpu(disk_reshdr->offset_in_wim);
1068         reshdr->size_in_wim = (((u64)disk_reshdr->size_in_wim[0] <<  0) |
1069                               ((u64)disk_reshdr->size_in_wim[1] <<  8) |
1070                               ((u64)disk_reshdr->size_in_wim[2] << 16) |
1071                               ((u64)disk_reshdr->size_in_wim[3] << 24) |
1072                               ((u64)disk_reshdr->size_in_wim[4] << 32) |
1073                               ((u64)disk_reshdr->size_in_wim[5] << 40) |
1074                               ((u64)disk_reshdr->size_in_wim[6] << 48));
1075         reshdr->uncompressed_size = le64_to_cpu(disk_reshdr->uncompressed_size);
1076         reshdr->flags = disk_reshdr->flags;
1077
1078         /* Truncate numbers to 62 bits to avoid possible overflows.  */
1079         if (reshdr->offset_in_wim & 0xc000000000000000ULL)
1080                 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1081
1082         if (reshdr->uncompressed_size & 0xc000000000000000ULL)
1083                 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1084
1085         return 0;
1086 }
1087
1088 /* Translates a WIM resource header from an in-memory format into the on-disk
1089  * format.  */
1090 void
1091 put_wim_reshdr(const struct wim_reshdr *reshdr,
1092                struct wim_reshdr_disk *disk_reshdr)
1093 {
1094         disk_reshdr->size_in_wim[0] = reshdr->size_in_wim  >>  0;
1095         disk_reshdr->size_in_wim[1] = reshdr->size_in_wim  >>  8;
1096         disk_reshdr->size_in_wim[2] = reshdr->size_in_wim  >> 16;
1097         disk_reshdr->size_in_wim[3] = reshdr->size_in_wim  >> 24;
1098         disk_reshdr->size_in_wim[4] = reshdr->size_in_wim  >> 32;
1099         disk_reshdr->size_in_wim[5] = reshdr->size_in_wim  >> 40;
1100         disk_reshdr->size_in_wim[6] = reshdr->size_in_wim  >> 48;
1101         disk_reshdr->flags = reshdr->flags;
1102         disk_reshdr->offset_in_wim = cpu_to_le64(reshdr->offset_in_wim);
1103         disk_reshdr->uncompressed_size = cpu_to_le64(reshdr->uncompressed_size);
1104 }