]> wimlib.net Git - wimlib/blob - src/resource.c
Add read_stream_list()
[wimlib] / src / resource.c
1 /*
2  * resource.c
3  *
4  * Read uncompressed and compressed metadata and file resources from a WIM file.
5  */
6
7 /*
8  * Copyright (C) 2012, 2013 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free Software
14  * Foundation; either version 3 of the License, or (at your option) any later
15  * version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License along with
22  * wimlib; if not, see http://www.gnu.org/licenses/.
23  */
24
25 #ifdef HAVE_CONFIG_H
26 #  include "config.h"
27 #endif
28
29 #include "wimlib.h"
30 #include "wimlib/endianness.h"
31 #include "wimlib/error.h"
32 #include "wimlib/file_io.h"
33 #include "wimlib/lookup_table.h"
34 #include "wimlib/resource.h"
35 #include "wimlib/sha1.h"
36
37 #ifdef __WIN32__
38 /* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
39 #  include "wimlib/win32.h"
40 #endif
41
42 #ifdef WITH_NTFS_3G
43 /* for read_ntfs_file_prefix() */
44 #  include "wimlib/ntfs_3g.h"
45 #endif
46
47 #ifdef HAVE_ALLOCA_H
48 #  include <alloca.h>
49 #endif
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <stdlib.h>
53 #include <unistd.h>
54
55 /*
56  *                         Compressed WIM resources
57  *
58  * A compressed resource in a WIM consists of a number of compressed chunks,
59  * each of which decompresses to a fixed chunk size (given in the WIM header;
60  * usually 32768) except possibly the last, which always decompresses to any
61  * remaining bytes.  In addition, immediately before the chunks, a table (the
62  * "chunk table") provides the offset, in bytes relative to the end of the chunk
63  * table, of the start of each compressed chunk, except for the first chunk
64  * which is omitted as it always has an offset of 0.  Therefore, a compressed
65  * resource with N chunks will have a chunk table with N - 1 entries.
66  *
67  * Additional information:
68  *
69  * - Entries in the chunk table are 4 bytes each, except if the uncompressed
70  *   size of the resource is greater than 4 GiB, in which case the entries in
71  *   the chunk table are 8 bytes each.  In either case, the entries are unsigned
72  *   little-endian integers.
73  *
74  * - The chunk table is included in the compressed size of the resource provided
75  *   in the corresponding entry in the WIM's stream lookup table.
76  *
77  * - The compressed size of a chunk is never greater than the uncompressed size.
78  *   From the compressor's point of view, chunks that would have compressed to a
79  *   size greater than or equal to their original size are in fact stored
80  *   uncompressed.  From the decompresser's point of view, chunks with
81  *   compressed size equal to their uncompressed size are in fact uncompressed.
82  *
83  * Furthermore, wimlib supports its own "pipable" WIM format, and for this the
84  * structure of compressed resources was modified to allow piped reading and
85  * writing.  To make sequential writing possible, the chunk table is placed
86  * after the chunks rather than before the chunks, and to make sequential
87  * reading possible, each chunk is prefixed with a 4-byte header giving its
88  * compressed size as a 32-bit, unsigned, little-endian integer.  Otherwise the
89  * details are the same.
90  */
91
92
93 /* Decompress the specified chunk that uses the specified compression type
94  * @ctype, part of a WIM with default chunk size @wim_chunk_size.  For LZX the
95  * separate @wim_chunk_size is needed because it determines the window size used
96  * for LZX compression.  */
97 static int
98 decompress(const void *cchunk, unsigned clen, void *uchunk, unsigned ulen,
99            int ctype, u32 wim_chunk_size)
100 {
101         switch (ctype) {
102         case WIMLIB_COMPRESSION_TYPE_LZX:
103                 return wimlib_lzx_decompress2(cchunk, clen,
104                                               uchunk, ulen, wim_chunk_size);
105         case WIMLIB_COMPRESSION_TYPE_XPRESS:
106                 return wimlib_xpress_decompress(cchunk, clen,
107                                                 uchunk, ulen);
108         case WIMLIB_COMPRESSION_TYPE_LZMS:
109                 return wimlib_lzms_decompress(cchunk, clen, uchunk, ulen);
110         default:
111                 wimlib_assert(0);
112                 return -1;
113         }
114 }
115
116 struct data_range {
117         u64 offset;
118         u64 size;
119 };
120
121 /* Alternate chunk table format for resources with WIM_RESHDR_FLAG_CONCAT set.
122  */
123 struct alt_chunk_table_header_disk {
124         /* Uncompressed size of the resource.  */
125         le64 res_usize;
126
127         /* Number of bytes each compressed chunk decompresses into, except
128          * possibly the last which decompresses into the remainder.  */
129         le32 chunk_size;
130
131         /* ??? */
132         le32 unknown;
133
134         /* This header is directly followed by a table of compressed sizes of
135          * the chunks.  */
136 } _packed_attribute;
137
138 /* Read data from a compressed WIM resource.  */
139 static int
140 read_compressed_wim_resource(const struct wim_resource_spec * const rspec,
141                              const struct data_range * const ranges,
142                              const size_t num_ranges,
143                              const consume_data_callback_t cb,
144                              void * const cb_ctx,
145                              const bool raw_chunks_mode)
146 {
147         int ret;
148         int errno_save;
149
150         u64 *chunk_offsets = NULL;
151         u8 *ubuf = NULL;
152         void *cbuf = NULL;
153         bool chunk_offsets_malloced = false;
154         bool ubuf_malloced = false;
155         bool cbuf_malloced = false;
156
157         /* Sanity checks  */
158         wimlib_assert(rspec != NULL);
159         wimlib_assert(rspec->ctype != WIMLIB_COMPRESSION_TYPE_NONE);
160         wimlib_assert(is_power_of_2(rspec->cchunk_size));
161         wimlib_assert(cb != NULL);
162         wimlib_assert(num_ranges != 0);
163         for (size_t i = 0; i < num_ranges; i++) {
164                 wimlib_assert(ranges[i].size != 0);
165                 wimlib_assert(ranges[i].offset + ranges[i].size >= ranges[i].size);
166                 wimlib_assert(ranges[i].offset + ranges[i].size <= rspec->uncompressed_size);
167         }
168         for (size_t i = 0; i < num_ranges - 1; i++)
169                 wimlib_assert(ranges[i].offset + ranges[i].size <= ranges[i + 1].offset);
170
171         /* Get the offsets of the first and last bytes of the read.  */
172         const u64 first_offset = ranges[0].offset;
173         const u64 last_offset = ranges[num_ranges - 1].offset + ranges[num_ranges - 1].size - 1;
174
175         /* Get the file descriptor for the WIM.  */
176         struct filedes * const in_fd = &rspec->wim->in_fd;
177
178         /* Determine if we're reading a pipable resource from a pipe or not.  */
179         const bool is_pipe_read = !filedes_is_seekable(in_fd);
180
181         /* Determine if the chunk table is in an altenate format.  */
182         const bool alt_chunk_table = (rspec->flags & WIM_RESHDR_FLAG_CONCAT) && !is_pipe_read;
183
184         /* Get the maximum size of uncompressed chunks in this resource, which
185          * we require be a power of 2.  */
186         u32 chunk_size;
187         u64 cur_read_offset = rspec->offset_in_wim;
188         if (alt_chunk_table) {
189                 /* Alternate chunk table format.  */
190                 struct alt_chunk_table_header_disk hdr;
191
192                 ret = full_pread(in_fd, &hdr, sizeof(hdr), cur_read_offset);
193                 if (ret)
194                         goto read_error;
195                 cur_read_offset += sizeof(hdr);
196
197                 chunk_size = le32_to_cpu(hdr.chunk_size);
198
199                 if (!is_power_of_2(chunk_size)) {
200                         ERROR("Invalid compressed resource: "
201                               "expected power-of-2 chunk size (got %u)", chunk_size);
202                         ret = WIMLIB_ERR_INVALID_CHUNK_SIZE;
203                         goto out_free_memory;
204                 }
205         } else {
206                 chunk_size = rspec->cchunk_size;
207         }
208         const u32 chunk_order = bsr32(chunk_size);
209
210         /* Calculate the total number of chunks the resource is divided into.  */
211         const u64 num_chunks = (rspec->uncompressed_size + chunk_size - 1) >> chunk_order;
212
213         /* Calculate the 0-based indices of the first and last chunks containing
214          * data that needs to be passed to the callback.  */
215         const u64 first_needed_chunk = first_offset >> chunk_order;
216         const u64 last_needed_chunk = last_offset >> chunk_order;
217
218         /* Calculate the 0-based index of the first chunk that actually needs to
219          * be read.  This is normally first_needed_chunk, but for pipe reads we
220          * must always start from the 0th chunk.  */
221         const u64 read_start_chunk = (is_pipe_read ? 0 : first_needed_chunk);
222
223         /* Calculate the number of chunk offsets that are needed for the chunks
224          * being read.  */
225         const u64 num_needed_chunk_offsets =
226                 last_needed_chunk - read_start_chunk + 1 +
227                 (last_needed_chunk < num_chunks - 1);
228
229         /* Calculate the number of entries in the chunk table.  Normally, it's
230          * one less than the number of chunks, since the first chunk has no
231          * entry.  But in the alternate chunk table format, the chunk entries
232          * contain chunk sizes, not offsets, and there is one per chunk.  */
233         const u64 num_chunk_entries = (alt_chunk_table ? num_chunks : num_chunks - 1);
234
235         /* Set the size of each chunk table entry based on the resource's
236          * uncompressed size.  XXX:  Does the alternate chunk table really
237          * always have 4-byte entries?  */
238         const u64 chunk_entry_size =
239                 (rspec->uncompressed_size > (1ULL << 32) && !alt_chunk_table)
240                         ? 8 : 4;
241
242         /* Calculate the size of the chunk table in bytes.  */
243         const u64 chunk_table_size = num_chunk_entries * chunk_entry_size;
244
245         /* Includes header  */
246         const u64 chunk_table_full_size =
247                 (alt_chunk_table) ? chunk_table_size + sizeof(struct alt_chunk_table_header_disk)
248                                   : chunk_table_size;
249
250         if (!is_pipe_read) {
251                 /* Read the needed chunk table entries into memory and use them
252                  * to initialize the chunk_offsets array.  */
253
254                 u64 first_chunk_entry_to_read;
255                 u64 last_chunk_entry_to_read;
256
257                 if (alt_chunk_table) {
258                         /* The alternate chunk table contains chunk sizes, not
259                          * offsets, so we always must read all preceding entries
260                          * in order to determine offsets.  */
261                         first_chunk_entry_to_read = 0;
262                         last_chunk_entry_to_read = last_needed_chunk;
263                 } else {
264                         /* Here we must account for the fact that the first
265                          * chunk has no explicit chunk table entry.  */
266
267                         if (read_start_chunk == 0)
268                                 first_chunk_entry_to_read = 0;
269                         else
270                                 first_chunk_entry_to_read = read_start_chunk - 1;
271
272                         if (last_needed_chunk == 0)
273                                 last_chunk_entry_to_read = 0;
274                         else
275                                 last_chunk_entry_to_read = last_needed_chunk - 1;
276
277                         if (last_needed_chunk < num_chunks - 1)
278                                 last_chunk_entry_to_read++;
279                 }
280
281                 const u64 num_chunk_entries_to_read =
282                         last_chunk_entry_to_read - first_chunk_entry_to_read + 1;
283
284                 const u64 chunk_offsets_alloc_size =
285                         max(num_chunk_entries_to_read,
286                             num_needed_chunk_offsets) * sizeof(chunk_offsets[0]);
287
288                 if ((size_t)chunk_offsets_alloc_size != chunk_offsets_alloc_size)
289                         goto oom;
290
291                 if (chunk_offsets_alloc_size <= STACK_MAX) {
292                         chunk_offsets = alloca(chunk_offsets_alloc_size);
293                 } else {
294                         chunk_offsets = MALLOC(chunk_offsets_alloc_size);
295                         if (chunk_offsets == NULL)
296                                 goto oom;
297                         chunk_offsets_malloced = true;
298                 }
299
300                 const size_t chunk_table_size_to_read =
301                         num_chunk_entries_to_read * chunk_entry_size;
302
303                 const u64 file_offset_of_needed_chunk_entries =
304                         cur_read_offset
305                         + (first_chunk_entry_to_read * chunk_entry_size)
306                         + (rspec->is_pipable ? (rspec->size_in_wim - chunk_table_size) : 0);
307
308                 void * const chunk_table_data =
309                         (u8*)chunk_offsets +
310                         chunk_offsets_alloc_size -
311                         chunk_table_size_to_read;
312
313                 ret = full_pread(in_fd, chunk_table_data, chunk_table_size,
314                                  file_offset_of_needed_chunk_entries);
315                 if (ret)
316                         goto read_error;
317
318                 /* Now fill in chunk_offsets from the entries we have read in
319                  * chunk_tab_data.  We break aliasing rules here to avoid having
320                  * to allocate yet another array.  */
321                 typedef le64 __attribute__((may_alias)) aliased_le64_t;
322                 typedef le32 __attribute__((may_alias)) aliased_le32_t;
323                 u64 * chunk_offsets_p = chunk_offsets;
324
325                 if (alt_chunk_table) {
326                         u64 cur_offset = 0;
327                         aliased_le32_t *raw_entries = chunk_table_data;
328
329                         for (size_t i = 0; i < num_chunk_entries_to_read; i++) {
330                                 u32 entry = le32_to_cpu(raw_entries[i]);
331                                 if (i >= read_start_chunk)
332                                         *chunk_offsets_p++ = cur_offset;
333                                 cur_offset += entry;
334                         }
335                 } else {
336                         if (read_start_chunk == 0)
337                                 *chunk_offsets_p++ = 0;
338
339                         if (chunk_entry_size == 4) {
340                                 aliased_le32_t *raw_entries = chunk_table_data;
341                                 for (size_t i = 0; i < num_chunk_entries_to_read; i++)
342                                         *chunk_offsets_p++ = le32_to_cpu(raw_entries[i]);
343                         } else {
344                                 aliased_le64_t *raw_entries = chunk_table_data;
345                                 for (size_t i = 0; i < num_chunk_entries_to_read; i++)
346                                         *chunk_offsets_p++ = le64_to_cpu(raw_entries[i]);
347                         }
348                 }
349
350                 /* Set offset to beginning of first chunk to read.  */
351                 cur_read_offset += chunk_offsets[0];
352                 if (rspec->is_pipable)
353                         cur_read_offset += read_start_chunk * sizeof(struct pwm_chunk_hdr);
354                 else
355                         cur_read_offset += chunk_table_size;
356         }
357
358         /* Allocate buffer for holding the uncompressed data of each chunk.  */
359         if (chunk_size <= STACK_MAX) {
360                 ubuf = alloca(chunk_size);
361         } else {
362                 ubuf = MALLOC(chunk_size);
363                 if (ubuf == NULL)
364                         goto oom;
365                 ubuf_malloced = true;
366         }
367
368         /* Unless the raw compressed data was requested, allocate a temporary
369          * buffer for reading compressed chunks, each of which can be at most
370          * @chunk_size - 1 bytes.  This excludes compressed chunks that are a
371          * full @chunk_size bytes, which are actually stored uncompressed.  */
372         if (!raw_chunks_mode) {
373                 if (chunk_size - 1 <= STACK_MAX) {
374                         cbuf = alloca(chunk_size - 1);
375                 } else {
376                         cbuf = MALLOC(chunk_size - 1);
377                         if (cbuf == NULL)
378                                 goto oom;
379                         cbuf_malloced = true;
380                 }
381         }
382
383         /* Read and process each needed chunk.  */
384         const struct data_range *cur_range = ranges;
385         const struct data_range * const end_range = &ranges[num_ranges];
386         u64 cur_range_pos = cur_range->offset;
387         u64 cur_range_end = cur_range->offset + cur_range->size;
388
389         for (u64 i = read_start_chunk; i <= last_needed_chunk; i++) {
390
391                 /* Calculate uncompressed size of next chunk.  */
392                 u32 chunk_usize;
393                 if ((i == num_chunks - 1) && (rspec->uncompressed_size & (chunk_size - 1)))
394                         chunk_usize = (rspec->uncompressed_size & (chunk_size - 1));
395                 else
396                         chunk_usize = chunk_size;
397
398                 /* Calculate compressed size of next chunk.  */
399                 u32 chunk_csize;
400                 if (is_pipe_read) {
401                         struct pwm_chunk_hdr chunk_hdr;
402
403                         ret = full_pread(in_fd, &chunk_hdr,
404                                          sizeof(chunk_hdr), cur_read_offset);
405                         if (ret)
406                                 goto read_error;
407                         chunk_csize = le32_to_cpu(chunk_hdr.compressed_size);
408                 } else {
409                         if (i == num_chunks - 1) {
410                                 chunk_csize = rspec->size_in_wim -
411                                               chunk_table_full_size -
412                                               chunk_offsets[i - read_start_chunk];
413                                 if (rspec->is_pipable)
414                                         chunk_csize -= num_chunks * sizeof(struct pwm_chunk_hdr);
415                         } else {
416                                 chunk_csize = chunk_offsets[i + 1 - read_start_chunk] -
417                                               chunk_offsets[i - read_start_chunk];
418                         }
419                 }
420                 if (chunk_csize == 0 || chunk_csize > chunk_usize) {
421                         ERROR("Invalid chunk size in compressed resource!");
422                         errno = EINVAL;
423                         ret = WIMLIB_ERR_DECOMPRESSION;
424                         goto out_free_memory;
425                 }
426                 if (rspec->is_pipable)
427                         cur_read_offset += sizeof(struct pwm_chunk_hdr);
428
429                 /* Uncompressed offsets  */
430                 const u64 chunk_start_offset = i << chunk_order;
431                 const u64 chunk_end_offset = chunk_start_offset + chunk_usize;
432
433                 if (chunk_end_offset <= cur_range_pos) {
434
435                         /* The next range does not require data in this chunk,
436                          * so skip it.  */
437
438                         cur_read_offset += chunk_csize;
439                         if (is_pipe_read) {
440                                 u8 dummy;
441
442                                 ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
443                                 if (ret)
444                                         goto read_error;
445                         }
446                 } else {
447
448                         /* Read the chunk and feed data to the callback
449                          * function.  */
450                         u8 *cb_buf;
451
452                         ret = full_pread(in_fd,
453                                          cbuf,
454                                          chunk_csize,
455                                          cur_read_offset);
456                         if (ret)
457                                 goto read_error;
458
459                         if (chunk_csize != chunk_usize && !raw_chunks_mode) {
460                                 ret = decompress(cbuf,
461                                                  chunk_csize,
462                                                  ubuf,
463                                                  chunk_usize,
464                                                  rspec->ctype,
465                                                  chunk_size);
466                                 if (ret) {
467                                         ERROR("Failed to decompress data!");
468                                         ret = WIMLIB_ERR_DECOMPRESSION;
469                                         errno = EINVAL;
470                                         goto out_free_memory;
471                                 }
472                                 cb_buf = ubuf;
473                         } else {
474                                 cb_buf = cbuf;
475                         }
476                         cur_read_offset += chunk_csize;
477
478                         /* At least one range requires data in this chunk.
479                          * However, the data fed to the callback function must
480                          * not overlap range boundaries.  */
481                         do {
482                                 size_t start, end, size;
483
484                                 start = cur_range_pos - chunk_start_offset;
485                                 end = min(cur_range_end, chunk_end_offset) - chunk_start_offset;
486                                 size = end - start;
487
488                                 if (raw_chunks_mode)
489                                         ret = (*cb)(&cb_buf[0], chunk_csize, cb_ctx);
490                                 else
491                                         ret = (*cb)(&cb_buf[start], size, cb_ctx);
492
493                                 if (ret)
494                                         goto out_free_memory;
495
496                                 cur_range_pos += size;
497                                 if (cur_range_pos == cur_range_end) {
498                                         if (++cur_range == end_range) {
499                                                 cur_range_pos = ~0ULL;
500                                         } else {
501                                                 cur_range_pos = cur_range->offset;
502                                                 cur_range_end = cur_range->offset + cur_range->size;
503                                         }
504                                 }
505                         } while (cur_range_pos < chunk_end_offset);
506                 }
507         }
508
509         if (is_pipe_read
510             && last_offset == rspec->uncompressed_size - 1
511             && chunk_table_size)
512         {
513                 u8 dummy;
514                 /* Skip chunk table at end of pipable resource.  */
515
516                 cur_read_offset += chunk_table_size;
517                 ret = full_pread(in_fd, &dummy, 1, cur_read_offset - 1);
518                 if (ret)
519                         goto read_error;
520         }
521         ret = 0;
522 out_free_memory:
523         errno_save = errno;
524         if (chunk_offsets_malloced)
525                 FREE(chunk_offsets);
526         if (ubuf_malloced)
527                 FREE(ubuf);
528         if (cbuf_malloced)
529                 FREE(cbuf);
530         errno = errno_save;
531         return ret;
532
533 oom:
534         ERROR("Not enough memory available to read size=%"PRIu64" bytes "
535               "from compressed resource!", last_offset - first_offset + 1);
536         errno = ENOMEM;
537         ret = WIMLIB_ERR_NOMEM;
538         goto out_free_memory;
539
540 read_error:
541         ERROR_WITH_ERRNO("Error reading compressed file resource!");
542         goto out_free_memory;
543 }
544
545 /* Read raw data from a file descriptor at the specified offset.  */
546 static int
547 read_raw_file_data(struct filedes *in_fd, u64 size, consume_data_callback_t cb,
548                    u32 cb_chunk_size, void *ctx_or_buf, u64 offset)
549 {
550         int ret;
551         u8 *tmp_buf;
552         bool tmp_buf_malloced = false;
553
554         if (cb) {
555                 /* Send data to callback function in chunks.  */
556                 if (cb_chunk_size <= STACK_MAX) {
557                         tmp_buf = alloca(cb_chunk_size);
558                 } else {
559                         tmp_buf = MALLOC(cb_chunk_size);
560                         if (tmp_buf == NULL) {
561                                 ret = WIMLIB_ERR_NOMEM;
562                                 goto out;
563                         }
564                         tmp_buf_malloced = true;
565                 }
566
567                 while (size) {
568                         size_t bytes_to_read = min(cb_chunk_size, size);
569                         ret = full_pread(in_fd, tmp_buf, bytes_to_read,
570                                          offset);
571                         if (ret)
572                                 goto read_error;
573                         ret = cb(tmp_buf, bytes_to_read, ctx_or_buf);
574                         if (ret)
575                                 goto out;
576                         size -= bytes_to_read;
577                         offset += bytes_to_read;
578                 }
579         } else {
580                 /* Read data directly into buffer.  */
581                 ret = full_pread(in_fd, ctx_or_buf, size, offset);
582                 if (ret)
583                         goto read_error;
584         }
585         ret = 0;
586         goto out;
587
588 read_error:
589         ERROR_WITH_ERRNO("Read error");
590 out:
591         if (tmp_buf_malloced)
592                 FREE(tmp_buf);
593         return ret;
594 }
595
596 static int
597 bufferer_cb(const void *chunk, size_t size, void *_ctx)
598 {
599         u8 **buf_p = _ctx;
600
601         *buf_p = mempcpy(*buf_p, chunk, size);
602         return 0;
603 }
604
605 struct rechunker_context {
606         u8 *buffer;
607         u32 buffer_filled;
608         u32 cb_chunk_size;
609
610         const struct data_range *ranges;
611         size_t num_ranges;
612         size_t cur_range;
613         u64 range_bytes_remaining;
614
615         consume_data_callback_t cb;
616         void *cb_ctx;
617 };
618
619 static int
620 rechunker_cb(const void *chunk, size_t size, void *_ctx)
621 {
622         struct rechunker_context *ctx = _ctx;
623         const u8 *chunkptr = chunk;
624         size_t bytes_to_copy;
625         int ret;
626
627         wimlib_assert(ctx->cur_range != ctx->num_ranges);
628
629         while (size) {
630                 bytes_to_copy = size;
631
632                 if (bytes_to_copy > ctx->cb_chunk_size - ctx->buffer_filled)
633                         bytes_to_copy = ctx->cb_chunk_size - ctx->buffer_filled;
634
635                 if (bytes_to_copy > ctx->range_bytes_remaining - ctx->buffer_filled)
636                         bytes_to_copy = ctx->range_bytes_remaining - ctx->buffer_filled;
637
638                 memcpy(&ctx->buffer[ctx->buffer_filled], chunkptr, bytes_to_copy);
639
640                 ctx->buffer_filled += bytes_to_copy;
641                 chunkptr += bytes_to_copy;
642                 size -= bytes_to_copy;
643                 ctx->range_bytes_remaining -= bytes_to_copy;
644
645                 if (ctx->buffer_filled == ctx->cb_chunk_size ||
646                     ctx->range_bytes_remaining == 0)
647                 {
648                         ret = (*ctx->cb)(ctx->buffer, ctx->buffer_filled, ctx->cb_ctx);
649                         if (ret)
650                                 return ret;
651                         ctx->buffer_filled = 0;
652
653                         if (ctx->range_bytes_remaining == 0 &&
654                             ++ctx->cur_range != ctx->num_ranges)
655                                 ctx->range_bytes_remaining = ctx->ranges[ctx->cur_range].size;
656                 }
657         }
658         return 0;
659 }
660
661 /*
662  * read_partial_wim_resource()-
663  *
664  * Read a range of data from an uncompressed or compressed resource in a WIM
665  * file.  Data is written into a buffer or fed into a callback function, as
666  * documented in read_stream_prefix().
667  *
668  * By default, this function provides the uncompressed data of the resource, and
669  * @size and @offset and interpreted relative to the uncompressed contents of
670  * the resource.  This behavior can be modified by either of the following
671  * flags:
672  *
673  * WIMLIB_READ_RESOURCE_FLAG_RAW_FULL:
674  *      Read @size bytes at @offset of the raw contents of the compressed
675  *      resource.  In the case of pipable resources, this excludes the stream
676  *      header.  Exclusive with WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS.
677  *
678  * WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS:
679  *      Read the raw compressed chunks of the compressed resource.  @size must
680  *      be the full uncompressed size, @offset must be 0, and @cb_chunk_size
681  *      must be the resource chunk size.
682  *
683  * Return values:
684  *      WIMLIB_ERR_SUCCESS (0)
685  *      WIMLIB_ERR_READ                   (errno set)
686  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
687  *      WIMLIB_ERR_NOMEM                  (errno set to ENOMEM)
688  *      WIMLIB_ERR_DECOMPRESSION          (errno set to EINVAL)
689  *
690  *      or other error code returned by the @cb function.
691  */
692 int
693 read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
694                           u64 size, consume_data_callback_t cb,
695                           u32 cb_chunk_size, void *ctx_or_buf,
696                           int flags, u64 offset)
697 {
698         const struct wim_resource_spec *rspec;
699         struct filedes *in_fd;
700
701         /* Verify parameters.  */
702         wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
703         rspec = lte->rspec;
704         in_fd = &rspec->wim->in_fd;
705         if (cb)
706                 wimlib_assert(is_power_of_2(cb_chunk_size));
707         if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS) {
708                 /* Raw chunks mode is subject to the restrictions noted.  */
709                 wimlib_assert(!lte_is_partial(lte));
710                 wimlib_assert(!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL));
711                 wimlib_assert(cb_chunk_size == rspec->cchunk_size);
712                 wimlib_assert(size == lte->size);
713                 wimlib_assert(offset == 0);
714         } else if (flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) {
715                 /* Raw full mode:  read must not overrun end of store size.  */
716                 wimlib_assert(!lte_is_partial(lte));
717                 wimlib_assert(offset + size >= size &&
718                               offset + size <= rspec->size_in_wim);
719         } else {
720                 /* Normal mode:  read must not overrun end of original size.  */
721                 wimlib_assert(offset + size >= size &&
722                               offset + size <= lte->size);
723         }
724
725         DEBUG("Reading WIM resource: %"PRIu64" @ +%"PRIu64"[+%"PRIu64"] "
726               "from %"PRIu64"(%"PRIu64") @ +%"PRIu64" "
727               "(readflags 0x%08x, resflags 0x%02x%s)",
728               size, offset, lte->offset_in_res,
729               rspec->size_in_wim,
730               rspec->uncompressed_size,
731               rspec->offset_in_wim,
732               flags, lte->flags,
733               (rspec->is_pipable ? ", pipable" : ""));
734
735         if ((flags & WIMLIB_READ_RESOURCE_FLAG_RAW_FULL) ||
736             rspec->ctype == WIMLIB_COMPRESSION_TYPE_NONE)
737         {
738                 return read_raw_file_data(in_fd,
739                                           size,
740                                           cb,
741                                           cb_chunk_size,
742                                           ctx_or_buf,
743                                           rspec->offset_in_wim + lte->offset_in_res + offset);
744         } else {
745                 bool raw_chunks;
746                 struct data_range range;
747                 consume_data_callback_t internal_cb;
748                 void *internal_cb_ctx;
749                 u8 *buf;
750                 bool rechunker_buf_malloced = false;
751                 struct rechunker_context *rechunker_ctx;
752                 int ret;
753
754                 if (size == 0)
755                         return 0;
756
757                 range.offset = lte->offset_in_res + offset;
758                 range.size = size;
759                 raw_chunks = !!(flags & WIMLIB_READ_RESOURCE_FLAG_RAW_CHUNKS);
760
761                 if (cb != NULL &&
762                     cb_chunk_size == rspec->cchunk_size &&
763                     !(rspec->flags & WIM_RESHDR_FLAG_CONCAT))
764                 {
765                         internal_cb = cb;
766                         internal_cb_ctx = ctx_or_buf;
767                 } else if (cb == NULL) {
768                         buf = ctx_or_buf;
769                         internal_cb = bufferer_cb;
770                         internal_cb_ctx = &buf;
771                 } else {
772                         rechunker_ctx = alloca(sizeof(struct rechunker_context));
773
774                         if (cb_chunk_size <= STACK_MAX) {
775                                 rechunker_ctx->buffer = alloca(cb_chunk_size);
776                         } else {
777                                 rechunker_ctx->buffer = MALLOC(cb_chunk_size);
778                                 if (rechunker_ctx->buffer == NULL)
779                                         return WIMLIB_ERR_NOMEM;
780                                 rechunker_buf_malloced = true;
781                         }
782                         rechunker_ctx->buffer_filled = 0;
783                         rechunker_ctx->cb_chunk_size = cb_chunk_size;
784
785                         rechunker_ctx->ranges = &range;
786                         rechunker_ctx->num_ranges = 1;
787                         rechunker_ctx->cur_range = 0;
788                         rechunker_ctx->range_bytes_remaining = range.size;
789
790                         rechunker_ctx->cb = cb;
791                         rechunker_ctx->cb_ctx = ctx_or_buf;
792
793                         internal_cb = rechunker_cb;
794                         internal_cb_ctx = rechunker_ctx;
795                 }
796
797                 ret = read_compressed_wim_resource(rspec, &range, 1,
798                                                    internal_cb, internal_cb_ctx,
799                                                    raw_chunks);
800                 if (rechunker_buf_malloced)
801                         FREE(rechunker_ctx->buffer);
802
803                 return ret;
804         }
805 }
806
807 int
808 read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte,
809                                  size_t size, u64 offset, void *buf)
810 {
811         return read_partial_wim_resource(lte, size, NULL, 0, buf, 0, offset);
812 }
813
814 static int
815 read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size,
816                        consume_data_callback_t cb, u32 cb_chunk_size,
817                        void *ctx_or_buf, int flags)
818 {
819         return read_partial_wim_resource(lte, size, cb, cb_chunk_size,
820                                          ctx_or_buf, flags, 0);
821 }
822
823 #ifndef __WIN32__
824 /* This function handles reading stream data that is located in an external
825  * file,  such as a file that has been added to the WIM image through execution
826  * of a wimlib_add_command.
827  *
828  * This assumes the file can be accessed using the standard POSIX open(),
829  * read(), and close().  On Windows this will not necessarily be the case (since
830  * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be
831  * encrypted), so Windows uses its own code for its equivalent case.
832  */
833 static int
834 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size,
835                          consume_data_callback_t cb, u32 cb_chunk_size,
836                          void *ctx_or_buf, int _ignored_flags)
837 {
838         int ret;
839         int raw_fd;
840         struct filedes fd;
841
842         wimlib_assert(size <= lte->size);
843         DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk);
844
845         raw_fd = open(lte->file_on_disk, O_BINARY | O_RDONLY);
846         if (raw_fd < 0) {
847                 ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk);
848                 return WIMLIB_ERR_OPEN;
849         }
850         filedes_init(&fd, raw_fd);
851         ret = read_raw_file_data(&fd, size, cb, cb_chunk_size, ctx_or_buf, 0);
852         filedes_close(&fd);
853         return ret;
854 }
855 #endif /* !__WIN32__ */
856
857 /* This function handles the trivial case of reading stream data that is, in
858  * fact, already located in an in-memory buffer.  */
859 static int
860 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
861                    u64 size, consume_data_callback_t cb,
862                    u32 cb_chunk_size, void *ctx_or_buf, int _ignored_flags)
863 {
864         wimlib_assert(size <= lte->size);
865
866         if (cb) {
867                 /* Feed the data into the callback function in
868                  * appropriately-sized chunks.  */
869                 int ret;
870                 u32 chunk_size;
871
872                 for (u64 offset = 0; offset < size; offset += chunk_size) {
873                         chunk_size = min(cb_chunk_size, size - offset);
874                         ret = cb((const u8*)lte->attached_buffer + offset,
875                                  chunk_size, ctx_or_buf);
876                         if (ret)
877                                 return ret;
878                 }
879         } else {
880                 /* Copy the data directly into the specified buffer.  */
881                 memcpy(ctx_or_buf, lte->attached_buffer, size);
882         }
883         return 0;
884 }
885
886 typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
887                                             u64 size, consume_data_callback_t cb,
888                                             u32 cb_chunk_size, void *ctx_or_buf,
889                                             int flags);
890
891 /*
892  * read_stream_prefix()-
893  *
894  * Reads the first @size bytes from a generic "stream", which may be located in
895  * any one of several locations, such as in a WIM file (compressed or
896  * uncompressed), in an external file, or directly in an in-memory buffer.
897  *
898  * This function feeds the data either to a callback function (@cb != NULL,
899  * passing it @ctx_or_buf), or write it directly into a buffer (@cb == NULL,
900  * @ctx_or_buf specifies the buffer, which must have room for at least @size
901  * bytes).
902  *
903  * When (@cb != NULL), @cb_chunk_size specifies the maximum size of data chunks
904  * to feed the callback function.  @cb_chunk_size must be positive, and if the
905  * stream is in a WIM file, must be a power of 2.  All chunks, except possibly
906  * the last one, will be this size.  If (@cb == NULL), @cb_chunk_size is
907  * ignored.
908  *
909  * If the stream is located in a WIM file, @flags can be set as documented in
910  * read_partial_wim_resource().  Otherwise @flags are ignored.
911  *
912  * Returns 0 on success; nonzero on error.  A nonzero value will be returned if
913  * the stream data cannot be successfully read (for a number of different
914  * reasons, depending on the stream location), or if a callback function was
915  * specified and it returned nonzero.
916  */
917 int
918 read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size,
919                    consume_data_callback_t cb, u32 cb_chunk_size,
920                    void *ctx_or_buf, int flags)
921 {
922         /* This function merely verifies several preconditions, then passes
923          * control to an appropriate function for understanding each possible
924          * stream location.  */
925         static const read_stream_prefix_handler_t handlers[] = {
926                 [RESOURCE_IN_WIM]             = read_wim_stream_prefix,
927         #ifdef __WIN32__
928                 [RESOURCE_IN_FILE_ON_DISK]    = read_win32_file_prefix,
929         #else
930                 [RESOURCE_IN_FILE_ON_DISK]    = read_file_on_disk_prefix,
931         #endif
932                 [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
933         #ifdef WITH_FUSE
934                 [RESOURCE_IN_STAGING_FILE]    = read_file_on_disk_prefix,
935         #endif
936         #ifdef WITH_NTFS_3G
937                 [RESOURCE_IN_NTFS_VOLUME]     = read_ntfs_file_prefix,
938         #endif
939         #ifdef __WIN32__
940                 [RESOURCE_WIN32_ENCRYPTED]    = read_win32_encrypted_file_prefix,
941         #endif
942         };
943         wimlib_assert(lte->resource_location < ARRAY_LEN(handlers)
944                       && handlers[lte->resource_location] != NULL);
945         wimlib_assert(cb == NULL || cb_chunk_size > 0);
946         return handlers[lte->resource_location](lte, size, cb, cb_chunk_size,
947                                                 ctx_or_buf, flags);
948 }
949
950 /* Read the full uncompressed data of the specified stream into the specified
951  * buffer, which must have space for at least lte->size bytes.  */
952 int
953 read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf)
954 {
955         return read_stream_prefix(lte, lte->size, NULL, 0, buf, 0);
956 }
957
958 /* Read the full uncompressed data of the specified stream.  A buffer sufficient
959  * to hold the data is allocated and returned in @buf_ret.  */
960 int
961 read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte,
962                                 void **buf_ret)
963 {
964         int ret;
965         void *buf;
966
967         if ((size_t)lte->size != lte->size) {
968                 ERROR("Can't read %"PRIu64" byte stream into "
969                       "memory", lte->size);
970                 return WIMLIB_ERR_NOMEM;
971         }
972
973         buf = MALLOC(lte->size);
974         if (buf == NULL)
975                 return WIMLIB_ERR_NOMEM;
976
977         ret = read_full_stream_into_buf(lte, buf);
978         if (ret) {
979                 FREE(buf);
980                 return ret;
981         }
982
983         *buf_ret = buf;
984         return 0;
985 }
986
987 /* Retrieve the full uncompressed data of the specified WIM resource.  */
988 static int
989 wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret)
990 {
991         int ret;
992         struct wim_lookup_table_entry *lte;
993
994         lte = new_lookup_table_entry();
995         if (lte == NULL)
996                 return WIMLIB_ERR_NOMEM;
997
998         lte->unhashed = 1;
999         lte_bind_wim_resource_spec(lte, rspec);
1000         lte->flags = rspec->flags;
1001         lte->size = rspec->uncompressed_size;
1002         lte->offset_in_res = 0;
1003
1004         ret = read_full_stream_into_alloc_buf(lte, buf_ret);
1005
1006         lte_unbind_wim_resource_spec(lte);
1007         free_lookup_table_entry(lte);
1008         return ret;
1009 }
1010
1011 /* Retrieve the full uncompressed data of the specified WIM resource.  */
1012 int
1013 wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret)
1014 {
1015         DEBUG("offset_in_wim=%"PRIu64", size_in_wim=%"PRIu64", "
1016               "uncompressed_size=%"PRIu64,
1017               reshdr->offset_in_wim, reshdr->size_in_wim, reshdr->uncompressed_size);
1018
1019         struct wim_resource_spec rspec;
1020         wim_res_hdr_to_spec(reshdr, wim, &rspec);
1021         return wim_resource_spec_to_data(&rspec, buf_ret);
1022 }
1023
1024 struct read_stream_list_ctx {
1025         read_stream_list_begin_stream_t begin_stream;
1026         consume_data_callback_t consume_chunk;
1027         read_stream_list_end_stream_t end_stream;
1028         void *begin_stream_ctx;
1029         void *consume_chunk_ctx;
1030         void *end_stream_ctx;
1031         struct wim_lookup_table_entry *cur_stream;
1032         u64 cur_stream_offset;
1033         struct wim_lookup_table_entry *final_stream;
1034         size_t list_head_offset;
1035 };
1036
1037 static int
1038 read_stream_list_wrapper_cb(const void *chunk, size_t size, void *_ctx)
1039 {
1040         struct read_stream_list_ctx *ctx = _ctx;
1041         int ret;
1042
1043         if (ctx->cur_stream_offset == 0) {
1044                 /* Starting a new stream.  */
1045                 ret = (*ctx->begin_stream)(ctx->cur_stream, ctx->begin_stream_ctx);
1046                 if (ret)
1047                         return ret;
1048         }
1049
1050         ret = (*ctx->consume_chunk)(chunk, size, ctx->consume_chunk_ctx);
1051         if (ret)
1052                 return ret;
1053
1054         ctx->cur_stream_offset += size;
1055
1056         if (ctx->cur_stream_offset == ctx->cur_stream->size) {
1057                 /* Finished reading all the data for a stream; advance
1058                  * to the next one.  */
1059                 ret = (*ctx->end_stream)(ctx->cur_stream, ctx->end_stream_ctx);
1060                 if (ret)
1061                         return ret;
1062
1063                 if (ctx->cur_stream == ctx->final_stream)
1064                         return 0;
1065
1066                 struct list_head *cur = (struct list_head *)
1067                                 ((u8*)ctx->cur_stream + ctx->list_head_offset);
1068                 struct list_head *next = cur->next;
1069
1070                 ctx->cur_stream = (struct wim_lookup_table_entry *)
1071                                 ((u8*)next - ctx->list_head_offset);
1072
1073                 ctx->cur_stream_offset = 0;
1074         }
1075         return 0;
1076 }
1077
1078 /*
1079  * Read a list of streams, each of which may be in any supported location (e.g.
1080  * in a WIM or in an external file).  Unlike read_stream_prefix() or the
1081  * functions which call it, this function optimizes the case where multiple
1082  * streams are packed into a single compressed WIM resource and reads them all
1083  * consecutively, only decompressing the data one time.
1084  *
1085  * @stream_list
1086  *      List of streams (represented as `struct wim_lookup_table_entry's) to
1087  *      read.
1088  * @list_head_offset
1089  *      Offset of the `struct list_head' within each `struct
1090  *      wim_lookup_table_entry' that makes up the @stream_list.
1091  * @begin_stream
1092  *      Callback for starting to process a stream.
1093  * @consume_chunk
1094  *      Callback for receiving a chunk of stream data.
1095  * @end_stream
1096  *      Callback for finishing the processing of a stream.
1097  * @cb_chunk_size
1098  *      Size of chunks to provide to @consume_chunk.  For a given stream, all
1099  *      the chunks will be this size, except possibly the last which will be the
1100  *      remainder.
1101  * @cb_ctx
1102  *      Parameter to pass to the callback functions.
1103  *
1104  * Returns 0 on success; a nonzero error code on failure.  Failure can occur due
1105  * to an error reading the data or due to an error status being returned by any
1106  * of the callback functions.
1107  */
1108 int
1109 read_stream_list(struct list_head *stream_list,
1110                  size_t list_head_offset,
1111                  read_stream_list_begin_stream_t begin_stream,
1112                  consume_data_callback_t consume_chunk,
1113                  read_stream_list_end_stream_t end_stream,
1114                  u32 cb_chunk_size,
1115                  void *cb_ctx)
1116 {
1117         int ret;
1118         struct list_head *cur, *next;
1119         struct wim_lookup_table_entry *lte;
1120
1121         ret = sort_stream_list_by_sequential_order(stream_list, list_head_offset);
1122         if (ret)
1123                 return ret;
1124
1125         for (cur = stream_list->next, next = cur->next;
1126              cur != stream_list;
1127              cur = next, next = cur->next)
1128         {
1129                 lte = (struct wim_lookup_table_entry*)((u8*)cur - list_head_offset);
1130
1131                 if (lte_is_partial(lte)) {
1132
1133                         struct wim_lookup_table_entry *lte_next, *lte_last;
1134                         struct list_head *next2;
1135                         size_t stream_count;
1136
1137                         /* The next stream is a proper sub-sequence of a WIM
1138                          * resource.  See if there are other streams in the same
1139                          * resource that need to be read.  Since
1140                          * sort_stream_list_by_sequential_order() sorted the
1141                          * streams by offset in the WIM, this can be determined
1142                          * by simply scanning forward in the list.  */
1143
1144                         lte_last = lte;
1145                         stream_count = 1;
1146                         for (next2 = next;
1147                              next2 != stream_list
1148                              && (lte_next = (struct wim_lookup_table_entry*)
1149                                                 ((u8*)next2 - list_head_offset),
1150                                  lte_next->resource_location == RESOURCE_IN_WIM
1151                                  && lte_next->rspec == lte->rspec);
1152                              next2 = next2->next)
1153                         {
1154                                 lte_last = lte_next;
1155                                 stream_count++;
1156                         }
1157                         if (stream_count > 1) {
1158                                 /* Reading multiple streams combined into a
1159                                  * single WIM resource.  They are in the stream
1160                                  * list, sorted by offset; @lte specifies the
1161                                  * first stream in the resource that needs to be
1162                                  * read and @lte_last specifies the last stream
1163                                  * in the resource that needs to be read.  */
1164
1165                                 next = next2;
1166
1167                                 struct data_range ranges[stream_count];
1168
1169                                 {
1170                                         struct list_head *next3;
1171                                         size_t i;
1172                                         struct wim_lookup_table_entry *lte_cur;
1173
1174                                         next3 = cur;
1175                                         for (i = 0; i < stream_count; i++) {
1176                                                 lte_cur = (struct wim_lookup_table_entry*)
1177                                                         ((u8*)next3 - list_head_offset);
1178                                                 ranges[i].offset = lte_cur->offset_in_res;
1179                                                 ranges[i].size = lte_cur->size;
1180                                                 next3 = next3->next;
1181                                         }
1182                                 }
1183
1184                                 struct rechunker_context rechunker_ctx = {
1185                                         .buffer = MALLOC(cb_chunk_size),
1186                                         .buffer_filled = 0,
1187                                         .cb_chunk_size = cb_chunk_size,
1188                                         .ranges = ranges,
1189                                         .num_ranges = stream_count,
1190                                         .cur_range = 0,
1191                                         .range_bytes_remaining = ranges[0].size,
1192                                         .cb = consume_chunk,
1193                                         .cb_ctx = cb_ctx,
1194                                 };
1195
1196                                 if (rechunker_ctx.buffer == NULL)
1197                                         return WIMLIB_ERR_NOMEM;
1198
1199                                 struct read_stream_list_ctx ctx = {
1200                                         .begin_stream           = begin_stream,
1201                                         .begin_stream_ctx       = cb_ctx,
1202                                         .consume_chunk          = rechunker_cb,
1203                                         .consume_chunk_ctx      = &rechunker_ctx,
1204                                         .end_stream             = end_stream,
1205                                         .end_stream_ctx         = cb_ctx,
1206                                         .cur_stream             = lte,
1207                                         .cur_stream_offset      = 0,
1208                                         .final_stream           = lte_last,
1209                                         .list_head_offset       = list_head_offset,
1210                                 };
1211
1212                                 ret = read_compressed_wim_resource(lte->rspec,
1213                                                                    ranges,
1214                                                                    stream_count,
1215                                                                    read_stream_list_wrapper_cb,
1216                                                                    &ctx,
1217                                                                    false);
1218                                 FREE(rechunker_ctx.buffer);
1219                                 if (ret)
1220                                         return ret;
1221                                 continue;
1222                         }
1223                 }
1224                 ret = (*begin_stream)(lte, cb_ctx);
1225                 if (ret)
1226                         return ret;
1227
1228                 ret = read_stream_prefix(lte, lte->size, consume_chunk,
1229                                          cb_chunk_size, cb_ctx, 0);
1230                 if (ret)
1231                         return ret;
1232
1233                 ret = (*end_stream)(lte, cb_ctx);
1234                 if (ret)
1235                         return ret;
1236         }
1237         return 0;
1238 }
1239
1240 struct extract_ctx {
1241         SHA_CTX sha_ctx;
1242         consume_data_callback_t extract_chunk;
1243         void *extract_chunk_arg;
1244 };
1245
1246 static int
1247 extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size, void *_ctx)
1248 {
1249         struct extract_ctx *ctx = _ctx;
1250
1251         sha1_update(&ctx->sha_ctx, chunk, chunk_size);
1252         return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
1253 }
1254
1255 /* Extracts the first @size bytes of a stream to somewhere.  In the process, the
1256  * SHA1 message digest of the uncompressed stream is checked if the full stream
1257  * is being extracted.
1258  *
1259  * @extract_chunk is a function that will be called to extract each chunk of the
1260  * stream.  */
1261 int
1262 extract_stream(const struct wim_lookup_table_entry *lte, u64 size,
1263                consume_data_callback_t extract_chunk, void *extract_chunk_arg)
1264 {
1265         int ret;
1266         if (size == lte->size) {
1267                 /* Do SHA1 */
1268                 struct extract_ctx ctx;
1269                 ctx.extract_chunk = extract_chunk;
1270                 ctx.extract_chunk_arg = extract_chunk_arg;
1271                 sha1_init(&ctx.sha_ctx);
1272                 ret = read_stream_prefix(lte, size,
1273                                          extract_chunk_sha1_wrapper,
1274                                          lte_cchunk_size(lte),
1275                                          &ctx, 0);
1276                 if (ret == 0) {
1277                         u8 hash[SHA1_HASH_SIZE];
1278                         sha1_final(hash, &ctx.sha_ctx);
1279                         if (!hashes_equal(hash, lte->hash)) {
1280                                 if (wimlib_print_errors) {
1281                                         ERROR("Invalid SHA1 message digest "
1282                                               "on the following WIM stream:");
1283                                         print_lookup_table_entry(lte, stderr);
1284                                         if (lte->resource_location == RESOURCE_IN_WIM)
1285                                                 ERROR("The WIM file appears to be corrupt!");
1286                                 }
1287                                 ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
1288                         }
1289                 }
1290         } else {
1291                 /* Don't do SHA1 */
1292                 ret = read_stream_prefix(lte, size, extract_chunk,
1293                                          lte_cchunk_size(lte),
1294                                          extract_chunk_arg, 0);
1295         }
1296         return ret;
1297 }
1298
1299 static int
1300 extract_wim_chunk_to_fd(const void *buf, size_t len, void *_fd_p)
1301 {
1302         struct filedes *fd = _fd_p;
1303         int ret = full_write(fd, buf, len);
1304         if (ret)
1305                 ERROR_WITH_ERRNO("Error writing to file descriptor");
1306         return ret;
1307 }
1308
1309 /* Extract the first @size bytes of the specified stream to the specified file
1310  * descriptor.  If @size is the full size of the stream, its SHA1 message digest
1311  * is also checked.  */
1312 int
1313 extract_stream_to_fd(const struct wim_lookup_table_entry *lte,
1314                      struct filedes *fd, u64 size)
1315 {
1316         return extract_stream(lte, size, extract_wim_chunk_to_fd, fd);
1317 }
1318
1319
1320 static int
1321 sha1_chunk(const void *buf, size_t len, void *ctx)
1322 {
1323         sha1_update(ctx, buf, len);
1324         return 0;
1325 }
1326
1327 /* Calculate the SHA1 message digest of a stream, storing it in @lte->hash.  */
1328 int
1329 sha1_stream(struct wim_lookup_table_entry *lte)
1330 {
1331         int ret;
1332         SHA_CTX sha_ctx;
1333
1334         sha1_init(&sha_ctx);
1335         ret = read_stream_prefix(lte, lte->size,
1336                                  sha1_chunk, lte_cchunk_size(lte),
1337                                  &sha_ctx, 0);
1338         if (ret == 0)
1339                 sha1_final(lte->hash, &sha_ctx);
1340
1341         return ret;
1342 }
1343
1344 /* Convert a WIM resource header to a stand-alone resource specification.  */
1345 void
1346 wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim,
1347                     struct wim_resource_spec *spec)
1348 {
1349         spec->wim = wim;
1350         spec->offset_in_wim = reshdr->offset_in_wim;
1351         spec->size_in_wim = reshdr->size_in_wim;
1352         spec->uncompressed_size = reshdr->uncompressed_size;
1353         INIT_LIST_HEAD(&spec->lte_list);
1354         spec->flags = reshdr->flags;
1355         spec->is_pipable = wim_is_pipable(wim);
1356         if (spec->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_CONCAT)) {
1357                 spec->ctype = wim->compression_type;
1358                 spec->cchunk_size = wim->chunk_size;
1359         } else {
1360                 spec->ctype = WIMLIB_COMPRESSION_TYPE_NONE;
1361                 spec->cchunk_size = 0;
1362         }
1363 }
1364
1365 /* Convert a stand-alone resource specification to a WIM resource header.  */
1366 void
1367 wim_res_spec_to_hdr(const struct wim_resource_spec *rspec,
1368                     struct wim_reshdr *reshdr)
1369 {
1370         reshdr->offset_in_wim     = rspec->offset_in_wim;
1371         reshdr->size_in_wim       = rspec->size_in_wim;
1372         reshdr->flags             = rspec->flags;
1373         reshdr->uncompressed_size = rspec->uncompressed_size;
1374 }
1375
1376 /* Translates a WIM resource header from the on-disk format into an in-memory
1377  * format.  */
1378 int
1379 get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr,
1380                struct wim_reshdr *reshdr)
1381 {
1382         reshdr->offset_in_wim = le64_to_cpu(disk_reshdr->offset_in_wim);
1383         reshdr->size_in_wim = (((u64)disk_reshdr->size_in_wim[0] <<  0) |
1384                               ((u64)disk_reshdr->size_in_wim[1] <<  8) |
1385                               ((u64)disk_reshdr->size_in_wim[2] << 16) |
1386                               ((u64)disk_reshdr->size_in_wim[3] << 24) |
1387                               ((u64)disk_reshdr->size_in_wim[4] << 32) |
1388                               ((u64)disk_reshdr->size_in_wim[5] << 40) |
1389                               ((u64)disk_reshdr->size_in_wim[6] << 48));
1390         reshdr->uncompressed_size = le64_to_cpu(disk_reshdr->uncompressed_size);
1391         reshdr->flags = disk_reshdr->flags;
1392
1393         /* Truncate numbers to 62 bits to avoid possible overflows.  */
1394         if (reshdr->offset_in_wim & 0xc000000000000000ULL)
1395                 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1396
1397         if (reshdr->uncompressed_size & 0xc000000000000000ULL)
1398                 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1399
1400         return 0;
1401 }
1402
1403 /* Translates a WIM resource header from an in-memory format into the on-disk
1404  * format.  */
1405 void
1406 put_wim_reshdr(const struct wim_reshdr *reshdr,
1407                struct wim_reshdr_disk *disk_reshdr)
1408 {
1409         disk_reshdr->size_in_wim[0] = reshdr->size_in_wim  >>  0;
1410         disk_reshdr->size_in_wim[1] = reshdr->size_in_wim  >>  8;
1411         disk_reshdr->size_in_wim[2] = reshdr->size_in_wim  >> 16;
1412         disk_reshdr->size_in_wim[3] = reshdr->size_in_wim  >> 24;
1413         disk_reshdr->size_in_wim[4] = reshdr->size_in_wim  >> 32;
1414         disk_reshdr->size_in_wim[5] = reshdr->size_in_wim  >> 40;
1415         disk_reshdr->size_in_wim[6] = reshdr->size_in_wim  >> 48;
1416         disk_reshdr->flags = reshdr->flags;
1417         disk_reshdr->offset_in_wim = cpu_to_le64(reshdr->offset_in_wim);
1418         disk_reshdr->uncompressed_size = cpu_to_le64(reshdr->uncompressed_size);
1419 }