wimlib.net Git - wimlib/blob - src/resource.c

   1 /*
   2  * resource.c
   3  *
   4  * Read uncompressed and compressed metadata and file resources from a WIM file.
   5  */
   6
   7 /*
   8  * Copyright (C) 2012, 2013 Eric Biggers
   9  *
  10  * This file is part of wimlib, a library for working with WIM files.
  11  *
  12  * wimlib is free software; you can redistribute it and/or modify it under the
  13  * terms of the GNU General Public License as published by the Free Software
  14  * Foundation; either version 3 of the License, or (at your option) any later
  15  * version.
  16  *
  17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License along with
  22  * wimlib; if not, see http://www.gnu.org/licenses/.
  23  */
  24
  25 #ifdef HAVE_CONFIG_H
  26 #  include "config.h"
  27 #endif
  28
  29 #include "wimlib.h"
  30 #include "wimlib/endianness.h"
  31 #include "wimlib/error.h"
  32 #include "wimlib/file_io.h"
  33 #include "wimlib/lookup_table.h"
  34 #include "wimlib/resource.h"
  35 #include "wimlib/sha1.h"
  36
  37 #ifdef __WIN32__
  38 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
  39 #  include "wimlib/win32.h"
  40 #endif
  41
  42 #ifdef WITH_NTFS_3G
  43 /* for read_ntfs_file_prefix() */
  44 #  include "wimlib/ntfs_3g.h"
  45 #endif
  46
  47 #ifdef HAVE_ALLOCA_H
  48 #  include <alloca.h>
  49 #endif
  50 #include <errno.h>
  51 #include <fcntl.h>
  52 #include <stdlib.h>
  53 #include <unistd.h>
  54
  55 /*
  56  *                         Compressed WIM resources
  57  *
  58  * A compressed resource in a WIM consists of a number of compressed chunks,
  59  * each of which decompresses to a fixed chunk size (given in the WIM header;
  60  * usually 32768) except possibly the last, which always decompresses to any
  61  * remaining bytes.  In addition, immediately before the chunks, a table (the
  62  * "chunk table") provides the offset, in bytes relative to the end of the chunk
  63  * table, of the start of each compressed chunk, except for the first chunk
  64  * which is omitted as it always has an offset of 0.  Therefore, a compressed
  65  * resource with N chunks will have a chunk table with N - 1 entries.
  66  *
  67  * Additional information:
  68  *
  69  * - Entries in the chunk table are 4 bytes each, except if the uncompressed
  70  *   size of the resource is greater than 4 GiB, in which case the entries in
  71  *   the chunk table are 8 bytes each.  In either case, the entries are unsigned
  72  *   little-endian integers.
  73  *
  74  * - The chunk table is included in the compressed size of the resource provided
  75  *   in the corresponding entry in the WIM's stream lookup table.
  76  *
  77  * - The compressed size of a chunk is never greater than the uncompressed size.
  78  *   From the compressor's point of view, chunks that would have compressed to a
  79  *   size greater than or equal to their original size are in fact stored
  80  *   uncompressed.  From the decompresser's point of view, chunks with
  81  *   compressed size equal to their uncompressed size are in fact uncompressed.
  82  *
  83  * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
  84  * structure of compressed resources was modified to allow piped reading and
  85  * writing.  To make sequential writing possible, the chunk table is placed
  86  * after the chunks rather than before the chunks, and to make sequential
  87  * reading possible, each chunk is prefixed with a 4-byte header giving its
  88  * compressed size as a 32-bit, unsigned, little-endian integer.  Otherwise the
  89  * details are the same.
  90  */
  91
  92
  93 /* Decompress the specified chunk that uses the specified compression type
  94  * @ctype, part of a WIM with default chunk size @wim_chunk_size.  For LZX the
  95  * separate @wim_chunk_size is needed because it determines the window size used
  96  * for LZX compression.  */
  97 static int
  98 decompress(const void *cchunk, unsigned clen,
  99            void *uchunk, unsigned ulen,
 100            int ctype, u32 wim_chunk_size)
 101 {
 102         switch (ctype) {
 103         case WIMLIB_COMPRESSION_TYPE_XPRESS:
 104                 return wimlib_xpress_decompress(cchunk,
 105                                                 clen,
 106                                                 uchunk,
 107                                                 ulen);
 108         case WIMLIB_COMPRESSION_TYPE_LZX:
 109                 return wimlib_lzx_decompress2(cchunk,
 110                                               clen,
 111                                               uchunk,
 112                                               ulen,
 113                                               wim_chunk_size);
 114         default:
 115                 wimlib_assert(0);
 116                 return -1;
 117         }
 118 }
 119
 120 /* Read data from a compressed WIM resource.  Assumes parameters were already
 121  * verified by read_partial_wim_resource().  */
 122 static int
 123 read_compressed_wim_resource(const struct wim_lookup_table_entry * const lte,
 124                              const u64 size, const consume_data_callback_t cb,
 125                              const u32 cb_chunk_size, void * const ctx_or_buf,
 126                              const int flags, const u64 offset)
 127 {
 128         int ret;
 129         int errno_save;
 130
 131         const u32 orig_chunk_size = wim_resource_chunk_size(lte);
 132         const u32 orig_chunk_order = bsr32(orig_chunk_size);
 133
 134         wimlib_assert(is_power_of_2(orig_chunk_size));
 135
 136         /* Handle the trivial case.  */
 137         if (size == 0)
 138                 return 0;
 139
 140         u64 *chunk_offsets = NULL;
 141         u8 *out_buf = NULL;
 142         u8 *tmp_buf = NULL;
 143         void *compressed_buf = NULL;
 144         bool chunk_offsets_malloced = false;
 145         bool out_buf_malloced = false;
 146         bool tmp_buf_malloced = false;
 147         bool compressed_buf_malloced = false;
 148
 149         /* Get the file descriptor for the WIM.  */
 150         struct filedes * const in_fd = &lte->wim->in_fd;
 151
 152         /* Determine if we're reading a pipable resource from a pipe or not.  */
 153         const bool is_pipe_read = !filedes_is_seekable(in_fd);
 154
 155         /* Calculate the number of chunks the resource is divided into.  */
 156         const u64 num_chunks = wim_resource_chunks(lte);
 157
 158         /* Calculate the 0-based index of the chunk at which the read starts.
 159          */
 160         const u64 start_chunk = offset >> orig_chunk_order;
 161
 162         /* For pipe reads, we always must start from the 0th chunk.  */
 163         const u64 actual_start_chunk = (is_pipe_read ? 0 : start_chunk);
 164
 165         /* Calculate the offset, within the start chunk, of the first byte of
 166          * the read.  */
 167         const u32 start_offset_in_chunk = offset & (orig_chunk_size - 1);
 168
 169         /* Calculate the index of the chunk that contains the last byte of the
 170          * read.  */
 171         const u64 end_chunk = (offset + size - 1) >> orig_chunk_order;
 172
 173         /* Calculate the offset, within the end chunk, of the last byte of the
 174          * read.  */
 175         const u32 end_offset_in_chunk = (offset + size - 1) & (orig_chunk_size - 1);
 176
 177         /* Calculate the number of entries in the chunk table; it's one less
 178          * than the number of chunks, since the first chunk has no entry.  */
 179         const u64 num_chunk_entries = num_chunks - 1;
 180
 181         /* Set the size of each chunk table entry based on the resource's
 182          * uncompressed size.  */
 183         const u64 chunk_entry_size = (wim_resource_size(lte) > (1ULL << 32)) ? 8 : 4;
 184
 185         /* Calculate the size, in bytes, of the full chunk table.  */
 186         const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
 187
 188         /* Current offset to read from.  */
 189         u64 cur_read_offset = lte->resource_entry.offset;
 190         if (!is_pipe_read) {
 191                 /* Read the chunk table into memory.  */
 192
 193                 /* Calculate the number of chunk entries are actually needed to
 194                  * read the requested part of the resource.  Include an entry
 195                  * for the first chunk even though that doesn't exist in the
 196                  * on-disk table, but take into account that if the last chunk
 197                  * required for the read is not the last chunk of the resource,
 198                  * an extra chunk entry is needed so that the compressed size of
 199                  * the last chunk of the read can be determined.  */
 200                 const u64 num_alloc_chunk_entries = end_chunk - start_chunk +
 201                                                     1 + (end_chunk != num_chunks - 1);
 202
 203                 /* Allocate a buffer to hold a subset of the chunk table.  It
 204                  * will only contain offsets for the chunks that are actually
 205                  * needed for this read.  For speed, allocate the buffer on the
 206                  * stack unless it's too large.  */
 207                 if ((size_t)(num_alloc_chunk_entries * sizeof(u64)) !=
 208                             (num_alloc_chunk_entries * sizeof(u64)))
 209                         goto oom;
 210
 211                 if (num_alloc_chunk_entries <= STACK_MAX / sizeof(u64)) {
 212                         chunk_offsets = alloca(num_alloc_chunk_entries * sizeof(u64));
 213                 } else {
 214                         chunk_offsets = MALLOC(num_alloc_chunk_entries * sizeof(u64));
 215                         if (chunk_offsets == NULL)
 216                                 goto oom;
 217                         chunk_offsets_malloced = true;
 218                 }
 219
 220                 /* Set the implicit offset of the first chunk if it's included
 221                  * in the needed chunks.  */
 222                 if (start_chunk == 0)
 223                         chunk_offsets[0] = 0;
 224
 225                 /* Calculate the index of the first needed entry in the chunk
 226                  * table.  */
 227                 const u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
 228
 229                 /* Calculate the number of entries that need to be read from the
 230                  * chunk table.  */
 231                 const u64 num_needed_chunk_entries = (start_chunk == 0) ?
 232                                         num_alloc_chunk_entries - 1 : num_alloc_chunk_entries;
 233
 234                 /* Calculate the number of bytes of data that need to be read
 235                  * from the chunk table.  */
 236                 const size_t chunk_table_needed_size =
 237                                         num_needed_chunk_entries * chunk_entry_size;
 238
 239                 /* Calculate the byte offset, in the WIM file, of the first
 240                  * chunk table entry to read.  Take into account that if the WIM
 241                  * file is in the special "pipable" format, then the chunk table
 242                  * is at the end of the resource, not the beginning.  */
 243                 const u64 file_offset_of_needed_chunk_entries =
 244                         lte->resource_entry.offset
 245                         + (start_table_idx * chunk_entry_size)
 246                         + (lte->is_pipable ? (lte->resource_entry.size - chunk_table_size) : 0);
 247
 248                 /* Read the needed chunk table entries into the end of the
 249                  * chunk_offsets buffer.  */
 250                 void * const chunk_tab_data = (u8*)&chunk_offsets[num_alloc_chunk_entries] -
 251                                               chunk_table_needed_size;
 252                 ret = full_pread(in_fd, chunk_tab_data, chunk_table_needed_size,
 253                                  file_offset_of_needed_chunk_entries);
 254                 if (ret)
 255                         goto read_error;
 256
 257                 /* Now fill in chunk_offsets from the entries we have read in
 258                  * chunk_tab_data.  Careful: chunk_offsets aliases
 259                  * chunk_tab_data, which breaks C's aliasing rules when we read
 260                  * 32-bit integers and store 64-bit integers.  But since the
 261                  * operations are safe as long as the compiler doesn't mess with
 262                  * their order, we use the gcc may_alias extension to tell the
 263                  * compiler that loads from the 32-bit integers may alias stores
 264                  * to the 64-bit integers.  */
 265                 {
 266                         typedef le64 __attribute__((may_alias)) aliased_le64_t;
 267                         typedef le32 __attribute__((may_alias)) aliased_le32_t;
 268                         u64 * const chunk_offsets_p = chunk_offsets + (start_chunk == 0);
 269                         u64 i;
 270
 271                         if (chunk_entry_size == 4) {
 272                                 aliased_le32_t *raw_entries = (aliased_le32_t*)chunk_tab_data;
 273                                 for (i = 0; i < num_needed_chunk_entries; i++)
 274                                         chunk_offsets_p[i] = le32_to_cpu(raw_entries[i]);
 275                         } else {
 276                                 aliased_le64_t *raw_entries = (aliased_le64_t*)chunk_tab_data;
 277                                 for (i = 0; i < num_needed_chunk_entries; i++)
 278                                         chunk_offsets_p[i] = le64_to_cpu(raw_entries[i]);
 279                         }
 280                 }
 281
 282                 /* Set offset to beginning of first chunk to read.  */
 283                 cur_read_offset += chunk_table_size + chunk_offsets[0];
 284         }
 285
 286         /* If using a callback function, allocate a temporary buffer that will
 287          * be used to pass data to it.  If writing directly to a buffer instead,
 288          * arrange to write data directly into it.  */
 289         size_t out_buf_size;
 290         u8 *out_buf_end, *out_p;
 291         if (cb) {
 292                 out_buf_size = max(cb_chunk_size, orig_chunk_size);
 293                 if (out_buf_size <= STACK_MAX) {
 294                         out_buf = alloca(out_buf_size);
 295                 } else {
 296                         out_buf = MALLOC(out_buf_size);
 297                         if (out_buf == NULL)
 298                                 goto oom;
 299                         out_buf_malloced = true;
 300                 }
 301         } else {
 302                 out_buf_size = size;
 303                 out_buf = ctx_or_buf;
 304         }
 305         out_buf_end = out_buf + out_buf_size;
 306         out_p = out_buf;
 307
 308         /* Unless the raw compressed data was requested, allocate a temporary
 309          * buffer for reading compressed chunks, each of which can be at most
 310          * @orig_chunk_size - 1 bytes.  This excludes compressed chunks that are
 311          * a full @orig_chunk_size bytes, which are actually stored
 312          * uncompressed.  */
 313         if (!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS)) {
 314                 if (orig_chunk_size - 1 <= STACK_MAX) {
 315                         compressed_buf = alloca(orig_chunk_size - 1);
 316                 } else {
 317                         compressed_buf = MALLOC(orig_chunk_size - 1);
 318                         if (compressed_buf == NULL)
 319                                 goto oom;
 320                         compressed_buf_malloced = true;
 321                 }
 322         }
 323
 324         /* Allocate yet another temporary buffer, this one for decompressing
 325          * chunks for which only part of the data is needed.  */
 326         if (start_offset_in_chunk != 0 ||
 327             (end_offset_in_chunk != orig_chunk_size - 1 &&
 328              offset + size != wim_resource_size(lte)))
 329         {
 330                 if (orig_chunk_size <= STACK_MAX) {
 331                         tmp_buf = alloca(orig_chunk_size);
 332                 } else {
 333                         tmp_buf = MALLOC(orig_chunk_size);
 334                         if (tmp_buf == NULL)
 335                                 goto oom;
 336                         tmp_buf_malloced = true;
 337                 }
 338         }
 339
 340         /* Read, and possibly decompress, each needed chunk, either writing the
 341          * data directly into the @ctx_or_buf buffer or passing it to the @cb
 342          * callback function.  */
 343         for (u64 i = actual_start_chunk; i <= end_chunk; i++) {
 344
 345                 /* Calculate uncompressed size of next chunk.  */
 346                 u32 chunk_usize;
 347                 if ((i == num_chunks - 1) && (wim_resource_size(lte) & (orig_chunk_size - 1)))
 348                         chunk_usize = (wim_resource_size(lte) & (orig_chunk_size - 1));
 349                 else
 350                         chunk_usize = orig_chunk_size;
 351
 352                 /* Calculate compressed size of next chunk.  */
 353                 u32 chunk_csize;
 354                 if (is_pipe_read) {
 355                         struct pwm_chunk_hdr chunk_hdr;
 356
 357                         ret = full_pread(in_fd, &chunk_hdr,
 358                                          sizeof(chunk_hdr), cur_read_offset);
 359                         if (ret)
 360                                 goto read_error;
 361                         chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
 362                 } else {
 363                         if (i == num_chunks - 1) {
 364                                 chunk_csize = lte->resource_entry.size -
 365                                               chunk_table_size -
 366                                               chunk_offsets[i - start_chunk];
 367                                 if (lte->is_pipable)
 368                                         chunk_csize -= num_chunks * sizeof(struct pwm_chunk_hdr);
 369                         } else {
 370                                 chunk_csize = chunk_offsets[i + 1 - start_chunk] -
 371                                               chunk_offsets[i - start_chunk];
 372                         }
 373                 }
 374                 if (chunk_csize == 0 || chunk_csize > orig_chunk_size) {
 375                         ERROR("Invalid chunk size in compressed resource!");
 376                         errno = EINVAL;
 377                         ret = WIMLIB_ERR_INVALID_CHUNK_SIZE;
 378                         goto out_free_memory;
 379                 }
 380                 if (lte->is_pipable)
 381                         cur_read_offset += sizeof(struct pwm_chunk_hdr);
 382
 383                 if (i >= start_chunk) {
 384                         /* Calculate how much of this chunk needs to be read.  */
 385                         u32 chunk_needed_size;
 386                         u32 start_offset = 0;
 387                         u32 end_offset = orig_chunk_size - 1;
 388
 389                         if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 390                                 chunk_needed_size = chunk_csize;
 391                         } else {
 392                                 if (i == start_chunk)
 393                                         start_offset = start_offset_in_chunk;
 394
 395                                 if (i == end_chunk)
 396                                         end_offset = end_offset_in_chunk;
 397
 398                                 chunk_needed_size = end_offset + 1 - start_offset;
 399                         }
 400
 401                         if (chunk_csize == chunk_usize ||
 402                             (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS))
 403                         {
 404                                 /* Read the raw chunk data.  */
 405
 406                                 ret = full_pread(in_fd,
 407                                                  out_p,
 408                                                  chunk_needed_size,
 409                                                  cur_read_offset + start_offset);
 410                                 if (ret)
 411                                         goto read_error;
 412                         } else {
 413                                 /* Read and decompress the chunk.  */
 414
 415                                 u8 *target;
 416
 417                                 ret = full_pread(in_fd,
 418                                                  compressed_buf,
 419                                                  chunk_csize,
 420                                                  cur_read_offset);
 421                                 if (ret)
 422                                         goto read_error;
 423
 424                                 if (chunk_needed_size == chunk_usize)
 425                                         target = out_p;
 426                                 else
 427                                         target = tmp_buf;
 428
 429                                 ret = decompress(compressed_buf,
 430                                                  chunk_csize,
 431                                                  target,
 432                                                  chunk_usize,
 433                                                  wim_resource_compression_type(lte),
 434                                                  orig_chunk_size);
 435                                 if (ret) {
 436                                         ERROR("Failed to decompress data!");
 437                                         ret = WIMLIB_ERR_DECOMPRESSION;
 438                                         errno = EINVAL;
 439                                         goto out_free_memory;
 440                                 }
 441                                 if (chunk_needed_size != chunk_usize)
 442                                         memcpy(out_p, tmp_buf + start_offset,
 443                                                chunk_needed_size);
 444                         }
 445
 446                         out_p += chunk_needed_size;
 447
 448                         if (cb) {
 449                                 /* Feed the data to the callback function.  */
 450
 451                                 if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 452                                         ret = cb(out_buf, out_p - out_buf, ctx_or_buf);
 453                                         if (ret)
 454                                                 goto out_free_memory;
 455                                         out_p = out_buf;
 456                                 } else if (i == end_chunk || out_p == out_buf_end) {
 457                                         size_t bytes_sent;
 458                                         const u8 *p;
 459
 460                                         for (p = out_buf; p != out_p; p += bytes_sent) {
 461                                                 bytes_sent = min(cb_chunk_size, out_p - p);
 462                                                 ret = cb(p, bytes_sent, ctx_or_buf);
 463                                                 if (ret)
 464                                                         goto out_free_memory;
 465                                         }
 466                                         out_p = out_buf;
 467                                 }
 468                         }
 469                         cur_read_offset += chunk_csize;
 470                 } else {
 471                         u8 dummy;
 472
 473                         /* Skip data only.  */
 474                         cur_read_offset += chunk_csize;
 475                         ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
 476                         if (ret)
 477                                 goto read_error;
 478                 }
 479         }
 480
 481         if (is_pipe_read
 482             && size == lte->resource_entry.original_size
 483             && chunk_table_size)
 484         {
 485                 u8 dummy;
 486                 /* Skip chunk table at end of pipable resource.  */
 487
 488                 cur_read_offset += chunk_table_size;
 489                 ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
 490                 if (ret)
 491                         goto read_error;
 492         }
 493         ret = 0;
 494 out_free_memory:
 495         errno_save = errno;
 496         if (chunk_offsets_malloced)
 497                 FREE(chunk_offsets);
 498         if (out_buf_malloced)
 499                 FREE(out_buf);
 500         if (compressed_buf_malloced)
 501                 FREE(compressed_buf);
 502         if (tmp_buf_malloced)
 503                 FREE(tmp_buf);
 504         errno = errno_save;
 505         return ret;
 506
 507 oom:
 508         ERROR("Not enough memory available to read size=%"PRIu64" bytes "
 509               "from compressed resource!", size);
 510         errno = ENOMEM;
 511         ret = WIMLIB_ERR_NOMEM;
 512         goto out_free_memory;
 513
 514 read_error:
 515         ERROR_WITH_ERRNO("Error reading compressed file resource!");
 516         goto out_free_memory;
 517 }
 518
 519 /* Read raw data from a file descriptor at the specified offset.  */
 520 static int
 521 read_raw_file_data(struct filedes *in_fd,
 522                    u64 size,
 523                    consume_data_callback_t cb,
 524                    u32 cb_chunk_size,
 525                    void *ctx_or_buf,
 526                    u64 offset)
 527 {
 528         int ret;
 529         u8 *tmp_buf;
 530         bool tmp_buf_malloced = false;
 531
 532         if (cb) {
 533                 /* Send data to callback function in chunks.  */
 534                 if (cb_chunk_size <= STACK_MAX) {
 535                         tmp_buf = alloca(cb_chunk_size);
 536                 } else {
 537                         tmp_buf = MALLOC(cb_chunk_size);
 538                         if (tmp_buf == NULL) {
 539                                 ret = WIMLIB_ERR_NOMEM;
 540                                 goto out;
 541                         }
 542                         tmp_buf_malloced = true;
 543                 }
 544
 545                 while (size) {
 546                         size_t bytes_to_read = min(cb_chunk_size, size);
 547                         ret = full_pread(in_fd, tmp_buf, bytes_to_read,
 548                                          offset);
 549                         if (ret)
 550                                 goto read_error;
 551                         ret = cb(tmp_buf, bytes_to_read, ctx_or_buf);
 552                         if (ret)
 553                                 goto out;
 554                         size -= bytes_to_read;
 555                         offset += bytes_to_read;
 556                 }
 557         } else {
 558                 /* Read data directly into buffer.  */
 559                 ret = full_pread(in_fd, ctx_or_buf, size, offset);
 560                 if (ret)
 561                         goto read_error;
 562         }
 563         ret = 0;
 564         goto out;
 565
 566 read_error:
 567         ERROR_WITH_ERRNO("Read error");
 568 out:
 569         if (tmp_buf_malloced)
 570                 FREE(tmp_buf);
 571         return ret;
 572 }
 573
 574 /*
 575  * read_partial_wim_resource()-
 576  *
 577  * Read a range of data from a uncompressed or compressed resource in a WIM
 578  * file.  Data is written into a buffer or fed into a callback function, as
 579  * documented in read_resource_prefix().
 580  *
 581  * By default, this function provides the uncompressed data of the resource, and
 582  * @size and @offset and interpreted relative to the uncompressed contents of
 583  * the resource.  The behavior can be modified by any of the following flags:
 584  *
 585  * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
 586  *      Read @size bytes at @offset of the raw contents of the compressed
 587  *      resource.  In the case of pipable resources, this excludes the stream
 588  *      header.  Exclusive with WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS.
 589  *
 590  * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
 591  *      Read the raw compressed chunks of the compressed resource.  @size must
 592  *      be the full uncompressed size, @offset must be 0, and @cb_chunk_size
 593  *      must be the resource chunk size.
 594  *
 595  * Return values:
 596  *      WIMLIB_ERR_SUCCESS (0)
 597  *      WIMLIB_ERR_READ                   (errno set)
 598  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
 599  *      WIMLIB_ERR_NOMEM                  (errno set to ENOMEM)
 600  *      WIMLIB_ERR_DECOMPRESSION          (errno set to EINVAL)
 601  *      WIMLIB_ERR_INVALID_CHUNK_SIZE    (errno set to EINVAL)
 602  *
 603  *      or other error code returned by the @cb function.
 604  */
 605 int
 606 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
 607                           u64 size, consume_data_callback_t cb,
 608                           u32 cb_chunk_size,
 609                           void *ctx_or_buf, int flags, u64 offset)
 610 {
 611         struct filedes *in_fd;
 612         int ret;
 613
 614         /* Verify parameters.  */
 615         wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
 616         in_fd = &lte->wim->in_fd;
 617         if (cb)
 618                 wimlib_assert(is_power_of_2(cb_chunk_size));
 619         if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
 620                 /* Raw chunks mode is subject to the restrictions noted.  */
 621                 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL));
 622                 wimlib_assert(cb_chunk_size == wim_resource_chunk_size(lte));
 623                 wimlib_assert(size == lte->resource_entry.original_size);
 624                 wimlib_assert(offset == 0);
 625         } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) {
 626                 /* Raw full mode:  read must not overrun end of store size.  */
 627                 wimlib_assert(offset + size >= size &&
 628                               offset + size <= lte->resource_entry.size);
 629         } else {
 630                 /* Normal mode:  read must not overrun end of original size.  */
 631                 wimlib_assert(offset + size >= size &&
 632                               offset + size <= lte->resource_entry.original_size);
 633         }
 634
 635         DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64" "
 636               "from %"PRIu64"(%"PRIu64") @ +%"PRIu64" "
 637               "(readflags 0x%08x, resflags 0x%02x%s)",
 638               size, offset,
 639               lte->resource_entry.size,
 640               lte->resource_entry.original_size,
 641               lte->resource_entry.offset,
 642               flags, lte->resource_entry.flags,
 643               (lte->is_pipable ? ", pipable" : ""));
 644
 645         if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
 646             !resource_is_compressed(&lte->resource_entry)) {
 647                 return read_raw_file_data(in_fd,
 648                                           size,
 649                                           cb,
 650                                           cb_chunk_size,
 651                                           ctx_or_buf,
 652                                           offset + lte->resource_entry.offset);
 653         } else {
 654                 return read_compressed_wim_resource(lte, size, cb,
 655                                                     cb_chunk_size,
 656                                                     ctx_or_buf, flags, offset);
 657         }
 658 }
 659
 660 int
 661 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
 662                                    size_t size, u64 offset, void *buf)
 663 {
 664         return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
 665 }
 666
 667 static int
 668 read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
 669                          u64 size,
 670                          consume_data_callback_t cb,
 671                          u32 cb_chunk_size,
 672                          void *ctx_or_buf,
 673                          int flags)
 674 {
 675         return read_partial_wim_resource(lte, size, cb, cb_chunk_size,
 676                                          ctx_or_buf, flags, 0);
 677 }
 678
 679 #ifndef __WIN32__
 680 /* This function handles reading resource data that is located in an external
 681  * file,  such as a file that has been added to the WIM image through execution
 682  * of a wimlib_add_command.
 683  *
 684  * This assumes the file can be accessed using the standard POSIX open(),
 685  * read(), and close().  On Windows this will not necessarily be the case (since
 686  * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be
 687  * encrypted), so Windows uses its own code for its equivalent case.
 688  */
 689 static int
 690 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
 691                          u64 size,
 692                          consume_data_callback_t cb,
 693                          u32 cb_chunk_size,
 694                          void *ctx_or_buf,
 695                          int _ignored_flags)
 696 {
 697         int ret;
 698         int raw_fd;
 699         struct filedes fd;
 700
 701         wimlib_assert(size <= wim_resource_size(lte));
 702         DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk);
 703
 704         raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY);
 705         if (raw_fd < 0) {
 706                 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk);
 707                 return WIMLIB_ERR_OPEN;
 708         }
 709         filedes_init(&fd, raw_fd);
 710         ret = read_raw_file_data(&fd, size, cb, cb_chunk_size, ctx_or_buf, 0);
 711         filedes_close(&fd);
 712         return ret;
 713 }
 714 #endif /* !__WIN32__ */
 715
 716 /* This function handles the trivial case of reading resource data that is, in
 717  * fact, already located in an in-memory buffer.  */
 718 static int
 719 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
 720                    u64 size, consume_data_callback_t cb,
 721                    u32 cb_chunk_size,
 722                    void *ctx_or_buf, int _ignored_flags)
 723 {
 724         wimlib_assert(size <= wim_resource_size(lte));
 725
 726         if (cb) {
 727                 /* Feed the data into the callback function in
 728                  * appropriately-sized chunks.  */
 729                 int ret;
 730                 u32 chunk_size;
 731
 732                 for (u64 offset = 0; offset < size; offset += chunk_size) {
 733                         chunk_size = min(cb_chunk_size, size - offset);
 734                         ret = cb((const u8*)lte->attached_buffer + offset,
 735                                  chunk_size, ctx_or_buf);
 736                         if (ret)
 737                                 return ret;
 738                 }
 739         } else {
 740                 /* Copy the data directly into the specified buffer.  */
 741                 memcpy(ctx_or_buf, lte->attached_buffer, size);
 742         }
 743         return 0;
 744 }
 745
 746 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
 747                                               u64 size,
 748                                               consume_data_callback_t cb,
 749                                               u32 cb_chunk_size,
 750                                               void *ctx_or_buf,
 751                                               int flags);
 752
 753 /*
 754  * read_resource_prefix()-
 755  *
 756  * Reads the first @size bytes from a generic "resource", which may be located
 757  * in any one of several locations, such as in a WIM file (compressed or
 758  * uncompressed), in an external file, or directly in an in-memory buffer.
 759  *
 760  * This function feeds the data either to a callback function (@cb != NULL,
 761  * passing it @ctx_or_buf), or write it directly into a buffer (@cb == NULL,
 762  * @ctx_or_buf specifies the buffer, which must have room for at least @size
 763  * bytes).
 764  *
 765  * When (@cb != NULL), @cb_chunk_size specifies the maximum size of data chunks
 766  * to feed the callback function.  @cb_chunk_size must be positive, and if the
 767  * resource is in a WIM file, must be a power of 2.  All chunks, except possibly
 768  * the last one, will be this size.  If (@cb == NULL), @cb_chunk_size is
 769  * ignored.
 770  *
 771  * If the resource is located in a WIM file, @flags can be set as documented in
 772  * read_partial_wim_resource().  Otherwise @flags are ignored.
 773  *
 774  * Returns 0 on success; nonzero on error.  A nonzero value will be returned if
 775  * the resource data cannot be successfully read (for a number of different
 776  * reasons, depending on the resource location), or if a callback function was
 777  * specified and it returned nonzero.
 778  */
 779 int
 780 read_resource_prefix(const struct wim_lookup_table_entry *lte,
 781                      u64 size, consume_data_callback_t cb, u32 cb_chunk_size,
 782                      void *ctx_or_buf, int flags)
 783 {
 784         /* This function merely verifies several preconditions, then passes
 785          * control to an appropriate function for understanding each possible
 786          * resource location.  */
 787         static const read_resource_prefix_handler_t handlers[] = {
 788                 [RESOURCE_IN_WIM]             = read_wim_resource_prefix,
 789         #ifdef __WIN32__
 790                 [RESOURCE_IN_FILE_ON_DISK]    = read_win32_file_prefix,
 791         #else
 792                 [RESOURCE_IN_FILE_ON_DISK]    = read_file_on_disk_prefix,
 793         #endif
 794                 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
 795         #ifdef WITH_FUSE
 796                 [RESOURCE_IN_STAGING_FILE]    = read_file_on_disk_prefix,
 797         #endif
 798         #ifdef WITH_NTFS_3G
 799                 [RESOURCE_IN_NTFS_VOLUME]     = read_ntfs_file_prefix,
 800         #endif
 801         #ifdef __WIN32__
 802                 [RESOURCE_WIN32_ENCRYPTED]    = read_win32_encrypted_file_prefix,
 803         #endif
 804         };
 805         wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
 806                       && handlers[lte->resource_location] != NULL);
 807         wimlib_assert(cb == NULL || cb_chunk_size > 0);
 808         return handlers[lte->resource_location](lte, size, cb, cb_chunk_size,
 809                                                 ctx_or_buf, flags);
 810 }
 811
 812 /* Read the full uncompressed data of the specified resource into the specified
 813  * buffer, which must have space for at least lte->resource_entry.original_size
 814  * bytes.  */
 815 int
 816 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
 817                             void *buf)
 818 {
 819         return read_resource_prefix(lte, wim_resource_size(lte),
 820                                     NULL, 0, buf, 0);
 821 }
 822
 823 /* Read the full uncompressed data of the specified resource.  A buffer
 824  * sufficient to hold the data is allocated and returned in @buf_ret.  */
 825 int
 826 read_full_resource_into_alloc_buf(const struct wim_lookup_table_entry *lte,
 827                                   void **buf_ret)
 828 {
 829         int ret;
 830         void *buf;
 831
 832         if ((size_t)lte->resource_entry.original_size !=
 833             lte->resource_entry.original_size)
 834         {
 835                 ERROR("Can't read %"PRIu64" byte resource into "
 836                       "memory", lte->resource_entry.original_size);
 837                 return WIMLIB_ERR_NOMEM;
 838         }
 839
 840         buf = MALLOC(lte->resource_entry.original_size);
 841         if (buf == NULL)
 842                 return WIMLIB_ERR_NOMEM;
 843
 844         ret = read_full_resource_into_buf(lte, buf);
 845         if (ret) {
 846                 FREE(buf);
 847                 return ret;
 848         }
 849
 850         *buf_ret = buf;
 851         return 0;
 852 }
 853
 854 /* Retrieve the full uncompressed data of the specified WIM resource, provided
 855  * as a raw `struct resource_entry'.  */
 856 int
 857 res_entry_to_data(const struct resource_entry *res_entry,
 858                   WIMStruct *wim, void **buf_ret)
 859 {
 860         int ret;
 861         struct wim_lookup_table_entry *lte;
 862
 863         lte = new_lookup_table_entry();
 864         if (lte == NULL)
 865                 return WIMLIB_ERR_NOMEM;
 866
 867         copy_resource_entry(&lte->resource_entry, res_entry);
 868         lte->unhashed = 1;
 869         lte->part_number = wim->hdr.part_number;
 870         lte_init_wim(lte, wim);
 871
 872         ret = read_full_resource_into_alloc_buf(lte, buf_ret);
 873         free_lookup_table_entry(lte);
 874         return ret;
 875 }
 876
 877 struct extract_ctx {
 878         SHA_CTX sha_ctx;
 879         consume_data_callback_t extract_chunk;
 880         void *extract_chunk_arg;
 881 };
 882
 883 static int
 884 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size,
 885                            void *_ctx)
 886 {
 887         struct extract_ctx *ctx = _ctx;
 888
 889         sha1_update(&ctx->sha_ctx, chunk, chunk_size);
 890         return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
 891 }
 892
 893 /* Extracts the first @size bytes of a resource to somewhere.  In the process,
 894  * the SHA1 message digest of the uncompressed resource is checked if the full
 895  * resource is being extracted.
 896  *
 897  * @extract_chunk is a function that will be called to extract each chunk of the
 898  * resource.  */
 899 int
 900 extract_wim_resource(const struct wim_lookup_table_entry *lte,
 901                      u64 size,
 902                      consume_data_callback_t extract_chunk,
 903                      void *extract_chunk_arg)
 904 {
 905         int ret;
 906         if (size == wim_resource_size(lte)) {
 907                 /* Do SHA1 */
 908                 struct extract_ctx ctx;
 909                 ctx.extract_chunk = extract_chunk;
 910                 ctx.extract_chunk_arg = extract_chunk_arg;
 911                 sha1_init(&ctx.sha_ctx);
 912                 ret = read_resource_prefix(lte, size,
 913                                            extract_chunk_sha1_wrapper,
 914                                            wim_resource_chunk_size(lte),
 915                                            &ctx, 0);
 916                 if (ret == 0) {
 917                         u8 hash[SHA1_HASH_SIZE];
 918                         sha1_final(hash, &ctx.sha_ctx);
 919                         if (!hashes_equal(hash, lte->hash)) {
 920                                 if (wimlib_print_errors) {
 921                                         ERROR("Invalid SHA1 message digest "
 922                                               "on the following WIM resource:");
 923                                         print_lookup_table_entry(lte, stderr);
 924                                         if (lte->resource_location == RESOURCE_IN_WIM)
 925                                                 ERROR("The WIM file appears to be corrupt!");
 926                                 }
 927                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
 928                         }
 929                 }
 930         } else {
 931                 /* Don't do SHA1 */
 932                 ret = read_resource_prefix(lte, size, extract_chunk,
 933                                            wim_resource_chunk_size(lte),
 934                                            extract_chunk_arg, 0);
 935         }
 936         return ret;
 937 }
 938
 939 static int
 940 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
 941 {
 942         struct filedes *fd = _fd_p;
 943         int ret = full_write(fd, buf, len);
 944         if (ret)
 945                 ERROR_WITH_ERRNO("Error writing to file descriptor");
 946         return ret;
 947 }
 948
 949 /* Extract the first @size bytes of the specified resource to the specified file
 950  * descriptor.  If @size is the full size of the resource, its SHA1 message
 951  * digest is also checked.  */
 952 int
 953 extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
 954                            struct filedes *fd, u64 size)
 955 {
 956         return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, fd);
 957 }
 958
 959
 960 static int
 961 sha1_chunk(const void *buf, size_t len, void *ctx)
 962 {
 963         sha1_update(ctx, buf, len);
 964         return 0;
 965 }
 966
 967 /* Calculate the SHA1 message digest of a resource, storing it in @lte->hash.  */
 968 int
 969 sha1_resource(struct wim_lookup_table_entry *lte)
 970 {
 971         int ret;
 972         SHA_CTX sha_ctx;
 973
 974         sha1_init(&sha_ctx);
 975         ret = read_resource_prefix(lte, wim_resource_size(lte),
 976                                    sha1_chunk, wim_resource_chunk_size(lte),
 977                                    &sha_ctx, 0);
 978         if (ret == 0)
 979                 sha1_final(lte->hash, &sha_ctx);
 980
 981         return ret;
 982 }
 983
 984 /* Translates a WIM resource entry from the on-disk format into an in-memory
 985  * format.  */
 986 void
 987 get_resource_entry(const struct resource_entry_disk *disk_entry,
 988                    struct resource_entry *entry)
 989 {
 990         /* Note: disk_entry may not be 8 byte aligned--- in that case, the
 991          * offset and original_size members will be unaligned.  (This is okay
 992          * since `struct resource_entry_disk' is declared as packed.)  */
 993
 994         /* Read the size and flags into a bitfield portably... */
 995         entry->size = (((u64)disk_entry->size[0] <<  0) |
 996                        ((u64)disk_entry->size[1] <<  8) |
 997                        ((u64)disk_entry->size[2] << 16) |
 998                        ((u64)disk_entry->size[3] << 24) |
 999                        ((u64)disk_entry->size[4] << 32) |
1000                        ((u64)disk_entry->size[5] << 40) |
1001                        ((u64)disk_entry->size[6] << 48));
1002         entry->flags = disk_entry->flags;
1003         entry->offset = le64_to_cpu(disk_entry->offset);
1004         entry->original_size = le64_to_cpu(disk_entry->original_size);
1005
1006         /* offset and original_size are truncated to 62 bits to avoid possible
1007          * overflows, when converting to a signed 64-bit integer (off_t) or when
1008          * adding size or original_size.  This is okay since no one would ever
1009          * actually have a WIM bigger than 4611686018427387903 bytes... */
1010         if (entry->offset & 0xc000000000000000ULL) {
1011                 WARNING("Truncating offset in resource entry");
1012                 entry->offset &= 0x3fffffffffffffffULL;
1013         }
1014         if (entry->original_size & 0xc000000000000000ULL) {
1015                 WARNING("Truncating original_size in resource entry");
1016                 entry->original_size &= 0x3fffffffffffffffULL;
1017         }
1018 }
1019
1020 /* Translates a WIM resource entry from an in-memory format into the on-disk
1021  * format. */
1022 void
1023 put_resource_entry(const struct resource_entry *entry,
1024                    struct resource_entry_disk *disk_entry)
1025 {
1026         /* Note: disk_entry may not be 8 byte aligned--- in that case, the
1027          * offset and original_size members will be unaligned.  (This is okay
1028          * since `struct resource_entry_disk' is declared as packed.)  */
1029         u64 size = entry->size;
1030
1031         disk_entry->size[0] = size >>  0;
1032         disk_entry->size[1] = size >>  8;
1033         disk_entry->size[2] = size >> 16;
1034         disk_entry->size[3] = size >> 24;
1035         disk_entry->size[4] = size >> 32;
1036         disk_entry->size[5] = size >> 40;
1037         disk_entry->size[6] = size >> 48;
1038         disk_entry->flags = entry->flags;
1039         disk_entry->offset = cpu_to_le64(entry->offset);
1040         disk_entry->original_size = cpu_to_le64(entry->original_size);
1041 }