wimlib.net Git - wimlib/blob - src/resource.c

   1 /*
   2  * resource.c
   3  *
   4  * Read uncompressed and compressed metadata and file resources.
   5  */
   6
   7 /*
   8  * Copyright (C) 2012 Eric Biggers
   9  *
  10  * This file is part of wimlib, a library for working with WIM files.
  11  *
  12  * wimlib is free software; you can redistribute it and/or modify it under the
  13  * terms of the GNU General Public License as published by the Free Software
  14  * Foundation; either version 3 of the License, or (at your option) any later
  15  * version.
  16  *
  17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
  18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  19  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU General Public License along with
  22  * wimlib; if not, see http://www.gnu.org/licenses/.
  23  */
  24
  25 #include "config.h"
  26
  27 #include <stdlib.h>
  28 #include <stdarg.h>
  29
  30 #include "dentry.h"
  31
  32 #ifdef WITH_NTFS_3G
  33 #include <time.h>
  34 #include <ntfs-3g/attrib.h>
  35 #include <ntfs-3g/inode.h>
  36 #include <ntfs-3g/dir.h>
  37 #endif
  38
  39 #include "wimlib_internal.h"
  40 #include "lookup_table.h"
  41 #include "io.h"
  42 #include "lzx.h"
  43 #include "xpress.h"
  44 #include "sha1.h"
  45 #include <unistd.h>
  46 #include <errno.h>
  47 #ifdef HAVE_ALLOCA_H
  48 #include <alloca.h>
  49 #endif
  50
  51
  52 /*
  53  * Reads all or part of a compressed resource into an in-memory buffer.
  54  *
  55  * @fp:                 The FILE* for the WIM file.
  56  * @resource_compressed_size:    The compressed size of the resource.
  57  * @resource_uncompressed_size:  The uncompressed size of the resource.
  58  * @resource_offset:             The offset of the start of the resource from
  59  *                                      the start of the stream @fp.
  60  * @resource_ctype:     The compression type of the resource.
  61  * @len:                The number of bytes of uncompressed data to read from
  62  *                              the resource.
  63  * @offset:             The offset of the bytes to read within the uncompressed
  64  *                              resource.
  65  * @contents_len:       An array into which the uncompressed data is written.
  66  *                              It must be at least @len bytes long.
  67  *
  68  * Returns zero on success, nonzero on failure.
  69  */
  70 static int read_compressed_resource(FILE *fp, u64 resource_compressed_size,
  71                                     u64 resource_uncompressed_size,
  72                                     u64 resource_offset, int resource_ctype,
  73                                     u64 len, u64 offset, u8  contents_ret[])
  74 {
  75
  76         DEBUG2("comp size = %"PRIu64", uncomp size = %"PRIu64", "
  77                "res offset = %"PRIu64"",
  78                resource_compressed_size,
  79                resource_uncompressed_size,
  80                resource_offset);
  81         DEBUG2("resource_ctype = %s, len = %"PRIu64", offset = %"PRIu64"",
  82                wimlib_get_compression_type_string(resource_ctype), len, offset);
  83         /* Trivial case */
  84         if (len == 0)
  85                 return 0;
  86
  87         int (*decompress)(const void *, uint, void *, uint);
  88         /* Set the appropriate decompress function. */
  89         if (resource_ctype == WIM_COMPRESSION_TYPE_LZX)
  90                 decompress = lzx_decompress;
  91         else
  92                 decompress = xpress_decompress;
  93
  94         /* The structure of a compressed resource consists of a table of chunk
  95          * offsets followed by the chunks themselves.  Each chunk consists of
  96          * compressed data, and there is one chunk for each WIM_CHUNK_SIZE =
  97          * 32768 bytes of the uncompressed file, with the last chunk having any
  98          * remaining bytes.
  99          *
 100          * The chunk offsets are measured relative to the end of the chunk
 101          * table.  The first chunk is omitted from the table in the WIM file
 102          * because its offset is implicitly given by the fact that it directly
 103          * follows the chunk table and therefore must have an offset of 0.
 104          */
 105
 106         /* Calculate how many chunks the resource conists of in its entirety. */
 107         u64 num_chunks = (resource_uncompressed_size + WIM_CHUNK_SIZE - 1) /
 108                                                                 WIM_CHUNK_SIZE;
 109         /* As mentioned, the first chunk has no entry in the chunk table. */
 110         u64 num_chunk_entries = num_chunks - 1;
 111
 112
 113         /* The index of the chunk that the read starts at. */
 114         u64 start_chunk = offset / WIM_CHUNK_SIZE;
 115         /* The byte offset at which the read starts, within the start chunk. */
 116         u64 start_chunk_offset = offset % WIM_CHUNK_SIZE;
 117
 118         /* The index of the chunk that contains the last byte of the read. */
 119         u64 end_chunk   = (offset + len - 1) / WIM_CHUNK_SIZE;
 120         /* The byte offset of the last byte of the read, within the end chunk */
 121         u64 end_chunk_offset = (offset + len - 1) % WIM_CHUNK_SIZE;
 122
 123         /* Number of chunks that are actually needed to read the requested part
 124          * of the file. */
 125         u64 num_needed_chunks = end_chunk - start_chunk + 1;
 126
 127         /* If the end chunk is not the last chunk, an extra chunk entry is
 128          * needed because we need to know the offset of the chunk after the last
 129          * chunk read to figure out the size of the last read chunk. */
 130         if (end_chunk != num_chunks - 1)
 131                 num_needed_chunks++;
 132
 133         /* Declare the chunk table.  It will only contain offsets for the chunks
 134          * that are actually needed for this read. */
 135         u64 chunk_offsets[num_needed_chunks];
 136
 137         /* Set the implicit offset of the first chunk if it is included in the
 138          * needed chunks.
 139          *
 140          * Note: M$'s documentation includes a picture that shows the first
 141          * chunk starting right after the chunk entry table, labeled as offset
 142          * 0x10.  However, in the actual file format, the offset is measured
 143          * from the end of the chunk entry table, so the first chunk has an
 144          * offset of 0. */
 145         if (start_chunk == 0)
 146                 chunk_offsets[0] = 0;
 147
 148         /* According to M$'s documentation, if the uncompressed size of
 149          * the file is greater than 4 GB, the chunk entries are 8-byte
 150          * integers.  Otherwise, they are 4-byte integers. */
 151         u64 chunk_entry_size = (resource_uncompressed_size >= (u64)1 << 32) ?
 152                                                                         8 : 4;
 153
 154         /* Size of the full chunk table in the WIM file. */
 155         u64 chunk_table_size = chunk_entry_size * num_chunk_entries;
 156
 157         /* Read the needed chunk offsets from the table in the WIM file. */
 158
 159         /* Index, in the WIM file, of the first needed entry in the
 160          * chunk table. */
 161         u64 start_table_idx = (start_chunk == 0) ? 0 : start_chunk - 1;
 162
 163         /* Number of entries we need to actually read from the chunk
 164          * table (excludes the implicit first chunk). */
 165         u64 num_needed_chunk_entries = (start_chunk == 0) ?
 166                                 num_needed_chunks - 1 : num_needed_chunks;
 167
 168         /* Skip over unneeded chunk table entries. */
 169         u64 file_offset_of_needed_chunk_entries = resource_offset +
 170                                 start_table_idx * chunk_entry_size;
 171         if (fseeko(fp, file_offset_of_needed_chunk_entries, SEEK_SET) != 0) {
 172                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" to read "
 173                                  "chunk table of compressed resource",
 174                                  file_offset_of_needed_chunk_entries);
 175                 return WIMLIB_ERR_READ;
 176         }
 177
 178         /* Number of bytes we need to read from the chunk table. */
 179         size_t size = num_needed_chunk_entries * chunk_entry_size;
 180
 181         u8 chunk_tab_buf[size];
 182
 183         if (fread(chunk_tab_buf, 1, size, fp) != size)
 184                 goto err;
 185
 186         /* Now fill in chunk_offsets from the entries we have read in
 187          * chunk_tab_buf. */
 188
 189         u64 *chunk_tab_p = chunk_offsets;
 190         if (start_chunk == 0)
 191                 chunk_tab_p++;
 192
 193         if (chunk_entry_size == 4) {
 194                 u32 *entries = (u32*)chunk_tab_buf;
 195                 while (num_needed_chunk_entries--)
 196                         *chunk_tab_p++ = le32_to_cpu(*entries++);
 197         } else {
 198                 u64 *entries = (u64*)chunk_tab_buf;
 199                 while (num_needed_chunk_entries--)
 200                         *chunk_tab_p++ = le64_to_cpu(*entries++);
 201         }
 202
 203         /* Done with the chunk table now.  We must now seek to the first chunk
 204          * that is needed for the read. */
 205
 206         u64 file_offset_of_first_needed_chunk = resource_offset +
 207                                 chunk_table_size + chunk_offsets[0];
 208         if (fseeko(fp, file_offset_of_first_needed_chunk, SEEK_SET) != 0) {
 209                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" to read "
 210                                  "first chunk of compressed resource",
 211                                  file_offset_of_first_needed_chunk);
 212                 return WIMLIB_ERR_READ;
 213         }
 214
 215         /* Pointer to current position in the output buffer for uncompressed
 216          * data. */
 217         u8 *out_p = (u8*)contents_ret;
 218
 219         /* Buffer for compressed data.  While most compressed chunks will have a
 220          * size much less than WIM_CHUNK_SIZE, WIM_CHUNK_SIZE - 1 is the maximum
 221          * size in the worst-case.  This assumption is valid only if chunks that
 222          * happen to compress to more than the uncompressed size (i.e. a
 223          * sequence of random bytes) are always stored uncompressed. But this seems
 224          * to be the case in M$'s WIM files, even though it is undocumented. */
 225         u8 compressed_buf[WIM_CHUNK_SIZE - 1];
 226
 227
 228         /* Decompress all the chunks. */
 229         for (u64 i = start_chunk; i <= end_chunk; i++) {
 230
 231                 DEBUG2("Chunk %"PRIu64" (start %"PRIu64", end %"PRIu64").",
 232                        i, start_chunk, end_chunk);
 233
 234                 /* Calculate the sizes of the compressed chunk and of the
 235                  * uncompressed chunk. */
 236                 uint compressed_chunk_size, uncompressed_chunk_size;
 237                 if (i != num_chunks - 1) {
 238                         /* All the chunks except the last one in the resource
 239                          * expand to WIM_CHUNK_SIZE uncompressed, and the amount
 240                          * of compressed data for the chunk is given by the
 241                          * difference of offsets in the chunk offset table. */
 242                         compressed_chunk_size = chunk_offsets[i + 1 - start_chunk] -
 243                                                 chunk_offsets[i - start_chunk];
 244                         uncompressed_chunk_size = WIM_CHUNK_SIZE;
 245                 } else {
 246                         /* The last compressed chunk consists of the remaining
 247                          * bytes in the file resource, and the last uncompressed
 248                          * chunk has size equal to however many bytes are left-
 249                          * that is, the remainder of the uncompressed size when
 250                          * divided by WIM_CHUNK_SIZE.
 251                          *
 252                          * Note that the resource_compressed_size includes the
 253                          * chunk table, so the size of it must be subtracted. */
 254                         compressed_chunk_size = resource_compressed_size -
 255                                                 chunk_table_size -
 256                                                 chunk_offsets[i - start_chunk];
 257
 258                         uncompressed_chunk_size = resource_uncompressed_size %
 259                                                                 WIM_CHUNK_SIZE;
 260
 261                         /* If the remainder is 0, the last chunk actually
 262                          * uncompresses to a full WIM_CHUNK_SIZE bytes. */
 263                         if (uncompressed_chunk_size == 0)
 264                                 uncompressed_chunk_size = WIM_CHUNK_SIZE;
 265                 }
 266
 267                 DEBUG2("compressed_chunk_size = %u, "
 268                        "uncompressed_chunk_size = %u",
 269                        compressed_chunk_size, uncompressed_chunk_size);
 270
 271
 272                 /* Figure out how much of this chunk we actually need to read */
 273                 u64 start_offset;
 274                 if (i == start_chunk)
 275                         start_offset = start_chunk_offset;
 276                 else
 277                         start_offset = 0;
 278                 u64 end_offset;
 279                 if (i == end_chunk)
 280                         end_offset = end_chunk_offset;
 281                 else
 282                         end_offset = WIM_CHUNK_SIZE - 1;
 283
 284                 u64 partial_chunk_size = end_offset + 1 - start_offset;
 285                 bool is_partial_chunk = (partial_chunk_size !=
 286                                                 uncompressed_chunk_size);
 287
 288                 DEBUG2("start_offset = %u, end_offset = %u", start_offset,
 289                                         end_offset);
 290                 DEBUG2("partial_chunk_size = %u", partial_chunk_size);
 291
 292                 /* This is undocumented, but chunks can be uncompressed.  This
 293                  * appears to always be the case when the compressed chunk size
 294                  * is equal to the uncompressed chunk size. */
 295                 if (compressed_chunk_size == uncompressed_chunk_size) {
 296                         /* Probably an uncompressed chunk */
 297
 298                         if (start_offset != 0) {
 299                                 if (fseeko(fp, start_offset, SEEK_CUR) != 0) {
 300                                         ERROR_WITH_ERRNO("Uncompressed partial "
 301                                                          "chunk fseek() error");
 302                                         return WIMLIB_ERR_READ;
 303                                 }
 304                         }
 305                         if (fread(out_p, 1, partial_chunk_size, fp) !=
 306                                         partial_chunk_size)
 307                                 goto err;
 308                 } else {
 309                         /* Compressed chunk */
 310                         int ret;
 311
 312                         /* Read the compressed data into compressed_buf. */
 313                         if (fread(compressed_buf, 1, compressed_chunk_size,
 314                                                 fp) != compressed_chunk_size)
 315                                 goto err;
 316
 317                         /* For partial chunks we must buffer the uncompressed
 318                          * data because we don't need all of it. */
 319                         if (is_partial_chunk) {
 320                                 u8 uncompressed_buf[uncompressed_chunk_size];
 321
 322                                 ret = decompress(compressed_buf,
 323                                                 compressed_chunk_size,
 324                                                 uncompressed_buf,
 325                                                 uncompressed_chunk_size);
 326                                 if (ret != 0)
 327                                         return WIMLIB_ERR_DECOMPRESSION;
 328                                 memcpy(out_p, uncompressed_buf + start_offset,
 329                                                 partial_chunk_size);
 330                         } else {
 331                                 ret = decompress(compressed_buf,
 332                                                 compressed_chunk_size,
 333                                                 out_p,
 334                                                 uncompressed_chunk_size);
 335                                 if (ret != 0)
 336                                         return WIMLIB_ERR_DECOMPRESSION;
 337                         }
 338                 }
 339
 340                 /* Advance the pointer into the uncompressed output data by the
 341                  * number of uncompressed bytes that were written.  */
 342                 out_p += partial_chunk_size;
 343         }
 344
 345         return 0;
 346
 347 err:
 348         if (feof(fp))
 349                 ERROR("Unexpected EOF in compressed file resource");
 350         else
 351                 ERROR_WITH_ERRNO("Error reading compressed file resource");
 352         return WIMLIB_ERR_READ;
 353 }
 354
 355 /*
 356  * Reads uncompressed data from an open file stream.
 357  */
 358 int read_uncompressed_resource(FILE *fp, u64 offset, u64 len,
 359                                u8 contents_ret[])
 360 {
 361         if (fseeko(fp, offset, SEEK_SET) != 0) {
 362                 ERROR("Failed to seek to byte %"PRIu64" of input file "
 363                       "to read uncompressed resource (len = %"PRIu64")",
 364                       offset, len);
 365                 return WIMLIB_ERR_READ;
 366         }
 367         if (fread(contents_ret, 1, len, fp) != len) {
 368                 if (feof(fp)) {
 369                         ERROR("Unexpected EOF in uncompressed file resource");
 370                 } else {
 371                         ERROR("Failed to read %"PRIu64" bytes from "
 372                               "uncompressed resource at offset %"PRIu64,
 373                               len, offset);
 374                 }
 375                 return WIMLIB_ERR_READ;
 376         }
 377         return 0;
 378 }
 379
 380
 381
 382
 383 /* Reads the contents of a struct resource_entry, as represented in the on-disk
 384  * format, from the memory pointed to by @p, and fills in the fields of @entry.
 385  * A pointer to the byte after the memory read at @p is returned. */
 386 const u8 *get_resource_entry(const u8 *p, struct resource_entry *entry)
 387 {
 388         u64 size;
 389         u8 flags;
 390
 391         p = get_u56(p, &size);
 392         p = get_u8(p, &flags);
 393         entry->size = size;
 394         entry->flags = flags;
 395
 396         /* offset and original_size are truncated to 62 bits to avoid possible
 397          * overflows, when converting to a signed 64-bit integer (off_t) or when
 398          * adding size or original_size.  This is okay since no one would ever
 399          * actually have a WIM bigger than 4611686018427387903 bytes... */
 400         p = get_u64(p, &entry->offset);
 401         if (entry->offset & 0xc000000000000000ULL) {
 402                 WARNING("Truncating offset in resource entry");
 403                 entry->offset &= 0x3fffffffffffffffULL;
 404         }
 405         p = get_u64(p, &entry->original_size);
 406         if (entry->original_size & 0xc000000000000000ULL) {
 407                 WARNING("Truncating original_size in resource entry");
 408                 entry->original_size &= 0x3fffffffffffffffULL;
 409         }
 410         return p;
 411 }
 412
 413 /* Copies the struct resource_entry @entry to the memory pointed to by @p in the
 414  * on-disk format.  A pointer to the byte after the memory written at @p is
 415  * returned. */
 416 u8 *put_resource_entry(u8 *p, const struct resource_entry *entry)
 417 {
 418         p = put_u56(p, entry->size);
 419         p = put_u8(p, entry->flags);
 420         p = put_u64(p, entry->offset);
 421         p = put_u64(p, entry->original_size);
 422         return p;
 423 }
 424
 425 static FILE *wim_get_fp(WIMStruct *w)
 426 {
 427         pthread_mutex_lock(&w->fp_tab_mutex);
 428         FILE *fp;
 429
 430         wimlib_assert(w->filename != NULL);
 431
 432         for (size_t i = 0; i < w->num_allocated_fps; i++) {
 433                 if (w->fp_tab[i]) {
 434                         fp = w->fp_tab[i];
 435                         w->fp_tab[i] = NULL;
 436                         goto out;
 437                 }
 438         }
 439         DEBUG("Opening extra file descriptor to `%s'", w->filename);
 440         fp = fopen(w->filename, "rb");
 441         if (!fp)
 442                 ERROR_WITH_ERRNO("Failed to open `%s'", w->filename);
 443 out:
 444         pthread_mutex_unlock(&w->fp_tab_mutex);
 445         return fp;
 446 }
 447
 448 static int wim_release_fp(WIMStruct *w, FILE *fp)
 449 {
 450         int ret = 0;
 451         FILE **fp_tab;
 452
 453         pthread_mutex_lock(&w->fp_tab_mutex);
 454
 455         for (size_t i = 0; i < w->num_allocated_fps; i++) {
 456                 if (w->fp_tab[i] == NULL) {
 457                         w->fp_tab[i] = fp;
 458                         goto out;
 459                 }
 460         }
 461
 462         fp_tab = REALLOC(w->fp_tab, sizeof(FILE*) * (w->num_allocated_fps + 4));
 463         if (!fp_tab) {
 464                 ret = WIMLIB_ERR_NOMEM;
 465                 goto out;
 466         }
 467         w->fp_tab = fp_tab;
 468         memset(&w->fp_tab[w->num_allocated_fps], 0, 4 * sizeof(FILE*));
 469         w->fp_tab[w->num_allocated_fps] = fp;
 470         w->num_allocated_fps += 4;
 471 out:
 472         pthread_mutex_unlock(&w->fp_tab_mutex);
 473         return ret;
 474 }
 475
 476 /*
 477  * Reads some data from the resource corresponding to a WIM lookup table entry.
 478  *
 479  * @lte:        The WIM lookup table entry for the resource.
 480  * @buf:        Buffer into which to write the data.
 481  * @size:       Number of bytes to read.
 482  * @offset:     Offset at which to start reading the resource.
 483  *
 484  * Returns zero on success, nonzero on failure.
 485  */
 486 int read_wim_resource(const struct lookup_table_entry *lte, u8 buf[],
 487                       size_t size, u64 offset, int flags)
 488 {
 489         int ctype;
 490         int ret = 0;
 491         FILE *fp;
 492
 493         /* We shouldn't be allowing read over-runs in any part of the library.
 494          * */
 495         if (flags & WIMLIB_RESOURCE_FLAG_RAW)
 496                 wimlib_assert(offset + size <= lte->resource_entry.size);
 497         else
 498                 wimlib_assert(offset + size <= lte->resource_entry.original_size);
 499
 500         switch (lte->resource_location) {
 501         case RESOURCE_IN_WIM:
 502                 /* The resource is in a WIM file, and its WIMStruct is given by
 503                  * the lte->wim member.  The resource may be either compressed
 504                  * or uncompressed. */
 505                 wimlib_assert(lte->wim != NULL);
 506
 507                 if (flags & WIMLIB_RESOURCE_FLAG_MULTITHREADED) {
 508                         fp = wim_get_fp(lte->wim);
 509                         if (!fp)
 510                                 return WIMLIB_ERR_OPEN;
 511                 } else {
 512                         wimlib_assert(lte->wim->fp != NULL);
 513                         fp = lte->wim->fp;
 514                 }
 515
 516                 ctype = wim_resource_compression_type(lte);
 517
 518                 wimlib_assert(ctype != WIM_COMPRESSION_TYPE_NONE ||
 519                               (lte->resource_entry.original_size ==
 520                                lte->resource_entry.size));
 521
 522                 if ((flags & WIMLIB_RESOURCE_FLAG_RAW)
 523                     || ctype == WIM_COMPRESSION_TYPE_NONE)
 524                         ret = read_uncompressed_resource(fp,
 525                                                          lte->resource_entry.offset + offset,
 526                                                          size, buf);
 527                 else
 528                         ret = read_compressed_resource(fp,
 529                                                        lte->resource_entry.size,
 530                                                        lte->resource_entry.original_size,
 531                                                        lte->resource_entry.offset,
 532                                                        ctype, size, offset, buf);
 533                 if (flags & WIMLIB_RESOURCE_FLAG_MULTITHREADED) {
 534                         int ret2 = wim_release_fp(lte->wim, fp);
 535                         if (ret == 0)
 536                                 ret = ret2;
 537                 }
 538                 break;
 539         case RESOURCE_IN_STAGING_FILE:
 540         case RESOURCE_IN_FILE_ON_DISK:
 541                 /* The resource is in some file on the external filesystem and
 542                  * needs to be read uncompressed */
 543                 wimlib_assert(lte->file_on_disk);
 544                 wimlib_assert(&lte->file_on_disk == &lte->staging_file_name);
 545                 /* Use existing file pointer if available; otherwise open one
 546                  * temporarily */
 547                 if (lte->file_on_disk_fp) {
 548                         fp = lte->file_on_disk_fp;
 549                 } else {
 550                         fp = fopen(lte->file_on_disk, "rb");
 551                         if (!fp) {
 552                                 ERROR_WITH_ERRNO("Failed to open the file "
 553                                                  "`%s'", lte->file_on_disk);
 554                                 ret = WIMLIB_ERR_OPEN;
 555                                 break;
 556                         }
 557                 }
 558                 ret = read_uncompressed_resource(fp, offset, size, buf);
 559                 if (fp != lte->file_on_disk_fp)
 560                         fclose(fp);
 561                 break;
 562         case RESOURCE_IN_ATTACHED_BUFFER:
 563                 /* The resource is directly attached uncompressed in an
 564                  * in-memory buffer. */
 565                 wimlib_assert(lte->attached_buffer != NULL);
 566                 memcpy(buf, lte->attached_buffer + offset, size);
 567                 break;
 568 #ifdef WITH_NTFS_3G
 569         case RESOURCE_IN_NTFS_VOLUME:
 570                 wimlib_assert(lte->ntfs_loc != NULL);
 571                 wimlib_assert(lte->attr != NULL);
 572                 {
 573                         if (lte->ntfs_loc->is_reparse_point)
 574                                 offset += 8;
 575                         if (ntfs_attr_pread(lte->attr, offset, size, buf) != size) {
 576                                 ERROR_WITH_ERRNO("Error reading NTFS attribute "
 577                                                  "at `%s'",
 578                                                  lte->ntfs_loc->path_utf8);
 579                                 ret = WIMLIB_ERR_NTFS_3G;
 580                         }
 581                         break;
 582                 }
 583 #endif
 584         default:
 585                 wimlib_assert(0);
 586                 ret = -1;
 587                 break;
 588         }
 589         return ret;
 590 }
 591
 592 /*
 593  * Reads all the data from the resource corresponding to a WIM lookup table
 594  * entry.
 595  *
 596  * @lte:        The WIM lookup table entry for the resource.
 597  * @buf:        Buffer into which to write the data.  It must be at least
 598  *              wim_resource_size(lte) bytes long.
 599  *
 600  * Returns 0 on success; nonzero on failure.
 601  */
 602 int read_full_wim_resource(const struct lookup_table_entry *lte, u8 buf[],
 603                            int flags)
 604 {
 605         return read_wim_resource(lte, buf, wim_resource_size(lte), 0, flags);
 606 }
 607
 608 /* Chunk table that's located at the beginning of each compressed resource in
 609  * the WIM.  (This is not the on-disk format; the on-disk format just has an
 610  * array of offsets.) */
 611 struct chunk_table {
 612         off_t file_offset;
 613         u64 num_chunks;
 614         u64 original_resource_size;
 615         u64 bytes_per_chunk_entry;
 616         u64 table_disk_size;
 617         u64 cur_offset;
 618         u64 *cur_offset_p;
 619         u64 offsets[0];
 620 };
 621
 622 /*
 623  * Allocates and initializes a chunk table, and reserves space for it in the
 624  * output file.
 625  */
 626 static int
 627 begin_wim_resource_chunk_tab(const struct lookup_table_entry *lte,
 628                              FILE *out_fp,
 629                              off_t file_offset,
 630                              struct chunk_table **chunk_tab_ret)
 631 {
 632         u64 size = wim_resource_size(lte);
 633         u64 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
 634         size_t alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
 635         struct chunk_table *chunk_tab = CALLOC(1, alloc_size);
 636         int ret;
 637
 638         if (!chunk_tab) {
 639                 ERROR("Failed to allocate chunk table for %"PRIu64" byte "
 640                       "resource", size);
 641                 ret = WIMLIB_ERR_NOMEM;
 642                 goto out;
 643         }
 644         chunk_tab->file_offset = file_offset;
 645         chunk_tab->num_chunks = num_chunks;
 646         chunk_tab->original_resource_size = size;
 647         chunk_tab->bytes_per_chunk_entry = (size >= (1ULL << 32)) ? 8 : 4;
 648         chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
 649                                      (num_chunks - 1);
 650         chunk_tab->cur_offset = 0;
 651         chunk_tab->cur_offset_p = chunk_tab->offsets;
 652
 653         if (fwrite(chunk_tab, 1, chunk_tab->table_disk_size, out_fp) !=
 654                    chunk_tab->table_disk_size) {
 655                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
 656                                  "file resource");
 657                 ret = WIMLIB_ERR_WRITE;
 658                 goto out;
 659         }
 660
 661         ret = 0;
 662 out:
 663         *chunk_tab_ret = chunk_tab;
 664         return ret;
 665 }
 666
 667 /*
 668  * Compresses a chunk of a WIM resource.
 669  *
 670  * @chunk:              Uncompressed data of the chunk.
 671  * @chunk_size:         Size of the uncompressed chunk in bytes.
 672  * @compressed_chunk:   Pointer to output buffer of size at least
 673  *                              (@chunk_size - 1) bytes.
 674  * @compressed_chunk_len_ret:   Pointer to an unsigned int into which the size
 675  *                                      of the compressed chunk will be
 676  *                                      returned.
 677  * @ctype:      Type of compression to use.  Must be WIM_COMPRESSION_TYPE_LZX
 678  *              or WIM_COMPRESSION_TYPE_XPRESS.
 679  *
 680  * Returns zero if compressed succeeded, and nonzero if the chunk could not be
 681  * compressed to any smaller than @chunk_size.  This function cannot fail for
 682  * any other reasons.
 683  */
 684 static int compress_chunk(const u8 chunk[], unsigned chunk_size,
 685                           u8 compressed_chunk[],
 686                           unsigned *compressed_chunk_len_ret,
 687                           int ctype)
 688 {
 689         int (*compress)(const void *, unsigned, void *, unsigned *);
 690         switch (ctype) {
 691         case WIM_COMPRESSION_TYPE_LZX:
 692                 compress = lzx_compress;
 693                 break;
 694         case WIM_COMPRESSION_TYPE_XPRESS:
 695                 compress = xpress_compress;
 696                 break;
 697         default:
 698                 wimlib_assert(0);
 699                 break;
 700         }
 701         return (*compress)(chunk, chunk_size, compressed_chunk,
 702                            compressed_chunk_len_ret);
 703 }
 704
 705 /*
 706  * Writes a chunk of a WIM resource to an output file.
 707  *
 708  * @chunk:        Uncompressed data of the chunk.
 709  * @chunk_size:   Size of the chunk (<= WIM_CHUNK_SIZE)
 710  * @out_fp:       FILE * to write tho chunk to.
 711  * @out_ctype:    Compression type to use when writing the chunk (ignored if no
 712  *                      chunk table provided)
 713  * @chunk_tab:    Pointer to chunk table being created.  It is updated with the
 714  *                      offset of the chunk we write.
 715  *
 716  * Returns 0 on success; nonzero on failure.
 717  */
 718 static int write_wim_resource_chunk(const u8 chunk[], unsigned chunk_size,
 719                                     FILE *out_fp, int out_ctype,
 720                                     struct chunk_table *chunk_tab)
 721 {
 722         const u8 *out_chunk;
 723         unsigned out_chunk_size;
 724
 725         wimlib_assert(chunk_size <= WIM_CHUNK_SIZE);
 726
 727         if (!chunk_tab) {
 728                 out_chunk = chunk;
 729                 out_chunk_size = chunk_size;
 730         } else {
 731                 u8 *compressed_chunk = alloca(chunk_size);
 732                 int ret;
 733
 734                 ret = compress_chunk(chunk, chunk_size, compressed_chunk,
 735                                      &out_chunk_size, out_ctype);
 736                 if (ret == 0) {
 737                         out_chunk = compressed_chunk;
 738                 } else {
 739                         out_chunk = chunk;
 740                         out_chunk_size = chunk_size;
 741                 }
 742                 *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
 743                 chunk_tab->cur_offset += out_chunk_size;
 744         }
 745
 746         if (fwrite(out_chunk, 1, out_chunk_size, out_fp) != out_chunk_size) {
 747                 ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
 748                 return WIMLIB_ERR_WRITE;
 749         }
 750         return 0;
 751 }
 752
 753 /*
 754  * Finishes a WIM chunk tale and writes it to the output file at the correct
 755  * offset.
 756  *
 757  * The final size of the full compressed resource is returned in the
 758  * @compressed_size_p.
 759  */
 760 static int
 761 finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
 762                               FILE *out_fp, u64 *compressed_size_p)
 763 {
 764         size_t bytes_written;
 765         if (fseeko(out_fp, chunk_tab->file_offset, SEEK_SET) != 0) {
 766                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of output "
 767                                  "WIM file", chunk_tab->file_offset);
 768                 return WIMLIB_ERR_WRITE;
 769         }
 770
 771         if (chunk_tab->bytes_per_chunk_entry == 8) {
 772                 array_cpu_to_le64(chunk_tab->offsets, chunk_tab->num_chunks);
 773         } else {
 774                 for (u64 i = 0; i < chunk_tab->num_chunks; i++)
 775                         ((u32*)chunk_tab->offsets)[i] =
 776                                 cpu_to_le32(chunk_tab->offsets[i]);
 777         }
 778         bytes_written = fwrite((u8*)chunk_tab->offsets +
 779                                         chunk_tab->bytes_per_chunk_entry,
 780                                1, chunk_tab->table_disk_size, out_fp);
 781         if (bytes_written != chunk_tab->table_disk_size) {
 782                 ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
 783                                  "file resource");
 784                 return WIMLIB_ERR_WRITE;
 785         }
 786         if (fseeko(out_fp, 0, SEEK_END) != 0) {
 787                 ERROR_WITH_ERRNO("Failed to seek to end of output WIM file");
 788                 return WIMLIB_ERR_WRITE;
 789         }
 790         *compressed_size_p = chunk_tab->cur_offset + chunk_tab->table_disk_size;
 791         return 0;
 792 }
 793
 794 /*
 795  * Writes a WIM resource to a FILE * opened for writing.  The resource may be
 796  * written uncompressed or compressed depending on the @out_ctype parameter.
 797  *
 798  * If by chance the resource compresses to more than the original size (this may
 799  * happen with random data or files than are pre-compressed), the resource is
 800  * instead written uncompressed (and this is reflected in the @out_res_entry by
 801  * removing the WIM_RESHDR_FLAG_COMPRESSED flag).
 802  *
 803  * @lte:        The lookup table entry for the WIM resource.
 804  * @out_fp:     The FILE * to write the resource to.
 805  * @out_ctype:  The compression type of the resource to write.  Note: if this is
 806  *                      the same as the compression type of the WIM resource we
 807  *                      need to read, we simply copy the data (i.e. we do not
 808  *                      uncompress it, then compress it again).
 809  * @out_res_entry:  If non-NULL, a resource entry that is filled in with the
 810  *                  offset, original size, compressed size, and compression flag
 811  *                  of the output resource.
 812  *
 813  * Returns 0 on success; nonzero on failure.
 814  */
 815 static int write_wim_resource(struct lookup_table_entry *lte,
 816                               FILE *out_fp, int out_ctype,
 817                               struct resource_entry *out_res_entry,
 818                               int flags)
 819 {
 820         u64 bytes_remaining;
 821         u64 original_size;
 822         u64 old_compressed_size;
 823         u64 new_compressed_size;
 824         u64 offset;
 825         int ret;
 826         struct chunk_table *chunk_tab = NULL;
 827         bool raw;
 828         off_t file_offset;
 829 #ifdef WITH_NTFS_3G
 830         ntfs_inode *ni = NULL;
 831 #endif
 832
 833         wimlib_assert(lte);
 834
 835         /* Original size of the resource */
 836         original_size = wim_resource_size(lte);
 837
 838         /* Compressed size of the resource (as it exists now) */
 839         old_compressed_size = wim_resource_compressed_size(lte);
 840
 841         /* Current offset in output file */
 842         file_offset = ftello(out_fp);
 843         if (file_offset == -1) {
 844                 ERROR_WITH_ERRNO("Failed to get offset in output "
 845                                  "stream");
 846                 return WIMLIB_ERR_WRITE;
 847         }
 848
 849         /* Are the compression types the same?  If so, do a raw copy (copy
 850          * without decompressing and recompressing the data). */
 851         raw = (wim_resource_compression_type(lte) == out_ctype
 852                && out_ctype != WIM_COMPRESSION_TYPE_NONE);
 853
 854         if (raw) {
 855                 flags |= WIMLIB_RESOURCE_FLAG_RAW;
 856                 bytes_remaining = old_compressed_size;
 857         } else {
 858                 flags &= ~WIMLIB_RESOURCE_FLAG_RAW;
 859                 bytes_remaining = original_size;
 860         }
 861
 862         /* Empty resource; nothing needs to be done, so just return success. */
 863         if (bytes_remaining == 0)
 864                 return 0;
 865
 866         /* Buffer for reading chunks for the resource */
 867         u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
 868
 869         /* If we are writing a compressed resource and not doing a raw copy, we
 870          * need to initialize the chunk table */
 871         if (out_ctype != WIM_COMPRESSION_TYPE_NONE && !raw) {
 872                 ret = begin_wim_resource_chunk_tab(lte, out_fp, file_offset,
 873                                                    &chunk_tab);
 874                 if (ret != 0)
 875                         goto out;
 876         }
 877
 878         /* If the WIM resource is in an external file, open a FILE * to it so we
 879          * don't have to open a temporary one in read_wim_resource() for each
 880          * chunk. */
 881         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
 882              && !lte->file_on_disk_fp)
 883         {
 884                 wimlib_assert(lte->file_on_disk);
 885                 lte->file_on_disk_fp = fopen(lte->file_on_disk, "rb");
 886                 if (!lte->file_on_disk_fp) {
 887                         ERROR_WITH_ERRNO("Failed to open the file `%s' for "
 888                                          "reading", lte->file_on_disk);
 889                         ret = WIMLIB_ERR_OPEN;
 890                         goto out;
 891                 }
 892         }
 893 #ifdef WITH_NTFS_3G
 894         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME
 895                   && !lte->attr)
 896         {
 897                 struct ntfs_location *loc = lte->ntfs_loc;
 898                 wimlib_assert(loc);
 899                 ni = ntfs_pathname_to_inode(*loc->ntfs_vol_p, NULL, loc->path_utf8);
 900                 if (!ni) {
 901                         ERROR_WITH_ERRNO("Failed to open inode `%s' in NTFS "
 902                                          "volume", loc->path_utf8);
 903                         ret = WIMLIB_ERR_NTFS_3G;
 904                         goto out;
 905                 }
 906                 lte->attr = ntfs_attr_open(ni,
 907                                            loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
 908                                            (ntfschar*)loc->stream_name_utf16,
 909                                            loc->stream_name_utf16_num_chars);
 910                 if (!lte->attr) {
 911                         ERROR_WITH_ERRNO("Failed to open attribute of `%s' in "
 912                                          "NTFS volume", loc->path_utf8);
 913                         ret = WIMLIB_ERR_NTFS_3G;
 914                         goto out_fclose;
 915                 }
 916         }
 917 #endif
 918
 919         /* If we aren't doing a raw copy, we will compute the SHA1 message
 920          * digest of the resource as we read it, and verify it's the same as the
 921          * hash given in the lookup table entry once we've finished reading the
 922          * resource. */
 923         SHA_CTX ctx;
 924         if (!raw)
 925                 sha1_init(&ctx);
 926
 927         /* While there are still bytes remaining in the WIM resource, read a
 928          * chunk of the resource, update SHA1, then write that chunk using the
 929          * desired compression type. */
 930         offset = 0;
 931         do {
 932                 u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
 933                 ret = read_wim_resource(lte, buf, to_read, offset, flags);
 934                 if (ret != 0)
 935                         goto out_fclose;
 936                 if (!raw)
 937                         sha1_update(&ctx, buf, to_read);
 938                 ret = write_wim_resource_chunk(buf, to_read, out_fp,
 939                                                out_ctype, chunk_tab);
 940                 if (ret != 0)
 941                         goto out_fclose;
 942                 bytes_remaining -= to_read;
 943                 offset += to_read;
 944         } while (bytes_remaining);
 945
 946         /* Raw copy:  The new compressed size is the same as the old compressed
 947          * size
 948          *
 949          * Using WIM_COMPRESSION_TYPE_NONE:  The new compressed size is the
 950          * original size
 951          *
 952          * Using a different compression type:  Call
 953          * finish_wim_resource_chunk_tab() and it will provide the new
 954          * compressed size.
 955          */
 956         if (raw) {
 957                 new_compressed_size = old_compressed_size;
 958         } else {
 959                 if (out_ctype == WIM_COMPRESSION_TYPE_NONE)
 960                         new_compressed_size = original_size;
 961                 else {
 962                         ret = finish_wim_resource_chunk_tab(chunk_tab, out_fp,
 963                                                             &new_compressed_size);
 964                         if (ret != 0)
 965                                 goto out_fclose;
 966                 }
 967         }
 968
 969         /* Verify SHA1 message digest of the resource, unless we are doing a raw
 970          * write (in which case we never even saw the uncompressed data).  Or,
 971          * if the hash we had before is all 0's, just re-set it to be the new
 972          * hash. */
 973         if (!raw) {
 974                 u8 md[SHA1_HASH_SIZE];
 975                 sha1_final(md, &ctx);
 976                 if (is_zero_hash(lte->hash)) {
 977                         copy_hash(lte->hash, md);
 978                 } else if (!hashes_equal(md, lte->hash)) {
 979                         ERROR("WIM resource has incorrect hash!");
 980                         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
 981                                 ERROR("We were reading it from `%s'; maybe it changed "
 982                                       "while we were reading it.",
 983                                       lte->file_on_disk);
 984                         }
 985                         ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
 986                         goto out_fclose;
 987                 }
 988         }
 989
 990         if (!raw && new_compressed_size >= original_size &&
 991             out_ctype != WIM_COMPRESSION_TYPE_NONE)
 992         {
 993                 /* Oops!  We compressed the resource to larger than the original
 994                  * size.  Write the resource uncompressed instead. */
 995                 if (fseeko(out_fp, file_offset, SEEK_SET) != 0) {
 996                         ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
 997                                          "of output WIM file", file_offset);
 998                         ret = WIMLIB_ERR_WRITE;
 999                         goto out_fclose;
1000                 }
1001                 ret = write_wim_resource(lte, out_fp, WIM_COMPRESSION_TYPE_NONE,
1002                                          out_res_entry, flags);
1003                 if (ret != 0)
1004                         goto out_fclose;
1005                 if (fflush(out_fp) != 0) {
1006                         ERROR_WITH_ERRNO("Failed to flush output WIM file");
1007                         ret = WIMLIB_ERR_WRITE;
1008                         goto out_fclose;
1009                 }
1010                 if (ftruncate(fileno(out_fp), file_offset + out_res_entry->size) != 0) {
1011                         ERROR_WITH_ERRNO("Failed to truncate output WIM file");
1012                         ret = WIMLIB_ERR_WRITE;
1013                         goto out_fclose;
1014                 }
1015         } else {
1016                 if (out_res_entry) {
1017                         out_res_entry->size          = new_compressed_size;
1018                         out_res_entry->original_size = original_size;
1019                         out_res_entry->offset        = file_offset;
1020                         out_res_entry->flags         = lte->resource_entry.flags
1021                                                         & ~WIM_RESHDR_FLAG_COMPRESSED;
1022                         if (out_ctype != WIM_COMPRESSION_TYPE_NONE)
1023                                 out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
1024                 }
1025         }
1026         ret = 0;
1027 out_fclose:
1028         if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
1029             && lte->file_on_disk_fp) {
1030                 fclose(lte->file_on_disk_fp);
1031                 lte->file_on_disk_fp = NULL;
1032         }
1033 #ifdef WITH_NTFS_3G
1034         else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME) {
1035                 if (lte->attr) {
1036                         ntfs_attr_close(lte->attr);
1037                         lte->attr = NULL;
1038                 }
1039                 if (ni)
1040                         ntfs_inode_close(ni);
1041         }
1042 #endif
1043 out:
1044         FREE(chunk_tab);
1045         return ret;
1046 }
1047
1048 /* Like write_wim_resource(), but the resource is specified by a buffer of
1049  * uncompressed data rather a lookup table entry; also writes the SHA1 hash of
1050  * the buffer to @hash.  */
1051 static int write_wim_resource_from_buffer(const u8 *buf, u64 buf_size,
1052                                           FILE *out_fp, int out_ctype,
1053                                           struct resource_entry *out_res_entry,
1054                                           u8 hash[SHA1_HASH_SIZE])
1055 {
1056         /* Set up a temporary lookup table entry to provide to
1057          * write_wim_resource(). */
1058         struct lookup_table_entry lte;
1059         int ret;
1060         lte.resource_entry.flags         = 0;
1061         lte.resource_entry.original_size = buf_size;
1062         lte.resource_entry.size          = buf_size;
1063         lte.resource_entry.offset        = 0;
1064         lte.resource_location            = RESOURCE_IN_ATTACHED_BUFFER;
1065         lte.attached_buffer              = (u8*)buf;
1066
1067         zero_out_hash(lte.hash);
1068         ret = write_wim_resource(&lte, out_fp, out_ctype, out_res_entry, 0);
1069         if (ret != 0)
1070                 return ret;
1071         copy_hash(hash, lte.hash);
1072         return 0;
1073 }
1074
1075 /*
1076  * Extracts the first @size bytes of the WIM resource specified by @lte to the
1077  * open file descriptor @fd.
1078  *
1079  * Returns 0 on success; nonzero on failure.
1080  */
1081 int extract_wim_resource_to_fd(const struct lookup_table_entry *lte, int fd,
1082                                u64 size)
1083 {
1084         u64 bytes_remaining = size;
1085         u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
1086         u64 offset = 0;
1087         int ret = 0;
1088         u8 hash[SHA1_HASH_SIZE];
1089
1090         SHA_CTX ctx;
1091         sha1_init(&ctx);
1092
1093         while (bytes_remaining) {
1094                 u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
1095                 ret = read_wim_resource(lte, buf, to_read, offset, 0);
1096                 if (ret != 0)
1097                         break;
1098                 sha1_update(&ctx, buf, to_read);
1099                 if (full_write(fd, buf, to_read) < to_read) {
1100                         ERROR_WITH_ERRNO("Error extracting WIM resource");
1101                         return WIMLIB_ERR_WRITE;
1102                 }
1103                 bytes_remaining -= to_read;
1104                 offset += to_read;
1105         }
1106         sha1_final(hash, &ctx);
1107         if (!hashes_equal(hash, lte->hash)) {
1108                 ERROR("Invalid checksum on a WIM resource "
1109                       "(detected when extracting to external file)");
1110                 ERROR("The following WIM resource is invalid:");
1111                 print_lookup_table_entry(lte);
1112                 return WIMLIB_ERR_INVALID_RESOURCE_HASH;
1113         }
1114         return 0;
1115 }
1116
1117 /*
1118  * Extracts the WIM resource specified by @lte to the open file descriptor @fd.
1119  *
1120  * Returns 0 on success; nonzero on failure.
1121  */
1122 int extract_full_wim_resource_to_fd(const struct lookup_table_entry *lte, int fd)
1123 {
1124         return extract_wim_resource_to_fd(lte, fd, wim_resource_size(lte));
1125 }
1126
1127 /*
1128  * Copies the file resource specified by the lookup table entry @lte from the
1129  * input WIM to the output WIM that has its FILE * given by
1130  * ((WIMStruct*)wim)->out_fp.
1131  *
1132  * The output_resource_entry, out_refcnt, and part_number fields of @lte are
1133  * updated.
1134  *
1135  * Metadata resources are not copied (they are handled elsewhere for joining and
1136  * splitting).
1137  */
1138 int copy_resource(struct lookup_table_entry *lte, void *wim)
1139 {
1140         WIMStruct *w = wim;
1141         int ret;
1142
1143         if ((lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) &&
1144             !w->write_metadata)
1145                 return 0;
1146
1147         ret = write_wim_resource(lte, w->out_fp,
1148                                  wim_resource_compression_type(lte),
1149                                  &lte->output_resource_entry, 0);
1150         if (ret != 0)
1151                 return ret;
1152         lte->out_refcnt = lte->refcnt;
1153         lte->part_number = w->hdr.part_number;
1154         return 0;
1155 }
1156
1157 /*
1158  * Writes a dentry's resources, including the main file resource as well as all
1159  * alternate data streams, to the output file.
1160  *
1161  * @dentry:  The dentry for the file.
1162  * @wim_p:   A pointer to the WIMStruct containing @dentry.
1163  *
1164  * @return zero on success, nonzero on failure.
1165  */
1166 int write_dentry_resources(struct dentry *dentry, void *wim_p)
1167 {
1168         WIMStruct *w = wim_p;
1169         int ret = 0;
1170         struct lookup_table_entry *lte;
1171         int ctype = wimlib_get_compression_type(w);
1172
1173         if (w->write_flags & WIMLIB_WRITE_FLAG_VERBOSE) {
1174                 wimlib_assert(dentry->full_path_utf8);
1175                 printf("Writing streams for `%s'\n", dentry->full_path_utf8);
1176         }
1177
1178         for (unsigned i = 0; i <= dentry->d_inode->num_ads; i++) {
1179                 lte = inode_stream_lte(dentry->d_inode, i, w->lookup_table);
1180                 if (lte && ++lte->out_refcnt == 1) {
1181                         ret = write_wim_resource(lte, w->out_fp, ctype,
1182                                                  &lte->output_resource_entry, 0);
1183                         if (ret != 0)
1184                                 break;
1185                 }
1186         }
1187         return ret;
1188 }
1189
1190 /*
1191  * Reads the metadata metadata resource from the WIM file.  The metadata
1192  * resource consists of the security data, followed by the directory entry for
1193  * the root directory, followed by all the other directory entries in the
1194  * filesystem.  The subdir_offset field of each directory entry gives the start
1195  * of its child entries from the beginning of the metadata resource.  An
1196  * end-of-directory is signaled by a directory entry of length '0', really of
1197  * length 8, because that's how long the 'length' field is.
1198  *
1199  * @fp:         The FILE* for the input WIM file.
1200  * @wim_ctype:  The compression type of the WIM file.
1201  * @imd:        Pointer to the image metadata structure.  Its `metadata_lte'
1202  *              member specifies the lookup table entry for the metadata
1203  *              resource.  The rest of the image metadata entry will be filled
1204  *              in by this function.
1205  *
1206  * @return:     Zero on success, nonzero on failure.
1207  */
1208 int read_metadata_resource(WIMStruct *w, struct image_metadata *imd)
1209 {
1210         u8 *buf;
1211         u32 dentry_offset;
1212         int ret;
1213         struct dentry *dentry;
1214         struct inode_table inode_tab;
1215         const struct lookup_table_entry *metadata_lte;
1216         u64 metadata_len;
1217         u64 metadata_offset;
1218         struct hlist_head inode_list;
1219
1220         metadata_lte = imd->metadata_lte;
1221         metadata_len = wim_resource_size(metadata_lte);
1222         metadata_offset = metadata_lte->resource_entry.offset;
1223
1224         DEBUG("Reading metadata resource: length = %"PRIu64", "
1225               "offset = %"PRIu64"", metadata_len, metadata_offset);
1226
1227         /* There is no way the metadata resource could possibly be less than (8
1228          * + WIM_DENTRY_DISK_SIZE) bytes, where the 8 is for security data (with
1229          * no security descriptors) and WIM_DENTRY_DISK_SIZE is for the root
1230          * dentry. */
1231         if (metadata_len < 8 + WIM_DENTRY_DISK_SIZE) {
1232                 ERROR("Expected at least %u bytes for the metadata resource",
1233                       8 + WIM_DENTRY_DISK_SIZE);
1234                 return WIMLIB_ERR_INVALID_RESOURCE_SIZE;
1235         }
1236
1237         if (sizeof(size_t) < 8 && metadata_len > 0xffffffff) {
1238                 ERROR("Metadata resource is too large (%"PRIu64" bytes",
1239                       metadata_len);
1240                 return WIMLIB_ERR_INVALID_RESOURCE_SIZE;
1241         }
1242
1243         /* Allocate memory for the uncompressed metadata resource. */
1244         buf = MALLOC(metadata_len);
1245
1246         if (!buf) {
1247                 ERROR("Failed to allocate %"PRIu64" bytes for uncompressed "
1248                       "metadata resource", metadata_len);
1249                 return WIMLIB_ERR_NOMEM;
1250         }
1251
1252         /* Read the metadata resource into memory.  (It may be compressed.) */
1253         ret = read_full_wim_resource(metadata_lte, buf, 0);
1254         if (ret != 0)
1255                 goto out_free_buf;
1256
1257         DEBUG("Finished reading metadata resource into memory.");
1258
1259         /* The root directory entry starts after security data, aligned on an
1260          * 8-byte boundary within the metadata resource.
1261          *
1262          * The security data starts with a 4-byte integer giving its total
1263          * length, so if we round that up to an 8-byte boundary that gives us
1264          * the offset of the root dentry.
1265          *
1266          * Here we read the security data into a wim_security_data structure,
1267          * and if successful, go ahead and calculate the offset in the metadata
1268          * resource of the root dentry. */
1269
1270         wimlib_assert(imd->security_data == NULL);
1271         ret = read_security_data(buf, metadata_len, &imd->security_data);
1272         if (ret != 0)
1273                 goto out_free_buf;
1274
1275         dentry_offset = (imd->security_data->total_length + 7) & ~7;
1276
1277         if (dentry_offset == 0) {
1278                 ERROR("Integer overflow while reading metadata resource");
1279                 ret = WIMLIB_ERR_INVALID_SECURITY_DATA;
1280                 goto out_free_security_data;
1281         }
1282
1283         /* Allocate memory for the root dentry and read it into memory */
1284         dentry = MALLOC(sizeof(struct dentry));
1285         if (!dentry) {
1286                 ERROR("Failed to allocate %zu bytes for root dentry",
1287                       sizeof(struct dentry));
1288                 ret = WIMLIB_ERR_NOMEM;
1289                 goto out_free_security_data;
1290         }
1291
1292         ret = read_dentry(buf, metadata_len, dentry_offset, dentry);
1293
1294         /* This is the root dentry, so set its parent to itself. */
1295         dentry->parent = dentry;
1296
1297         if (ret != 0)
1298                 goto out_free_dentry_tree;
1299         inode_add_dentry(dentry, dentry->d_inode);
1300
1301         /* Now read the entire directory entry tree into memory. */
1302         DEBUG("Reading dentry tree");
1303         ret = read_dentry_tree(buf, metadata_len, dentry);
1304         if (ret != 0)
1305                 goto out_free_dentry_tree;
1306
1307         /* Calculate the full paths in the dentry tree. */
1308         DEBUG("Calculating dentry full paths");
1309         ret = for_dentry_in_tree(dentry, calculate_dentry_full_path, NULL);
1310         if (ret != 0)
1311                 goto out_free_dentry_tree;
1312
1313         /* Build hash table that maps hard link group IDs to dentry sets */
1314         DEBUG("Building link group table");
1315         ret = init_inode_table(&inode_tab, 9001);
1316         if (ret != 0)
1317                 goto out_free_dentry_tree;
1318
1319         for_dentry_in_tree(dentry, inode_table_insert, &inode_tab);
1320
1321         DEBUG("Fixing inconsistencies in the hard link groups");
1322         ret = fix_inodes(&inode_tab, &inode_list);
1323         destroy_inode_table(&inode_tab);
1324         if (ret != 0)
1325                 goto out_free_dentry_tree;
1326
1327         DEBUG("Running miscellaneous verifications on the dentry tree");
1328         for_lookup_table_entry(w->lookup_table, lte_zero_real_refcnt, NULL);
1329         ret = for_dentry_in_tree(dentry, verify_dentry, w);
1330         if (ret != 0)
1331                 goto out_free_dentry_tree;
1332
1333         DEBUG("Done reading image metadata");
1334
1335         imd->root_dentry = dentry;
1336         imd->inode_list  = inode_list;
1337         goto out_free_buf;
1338 out_free_dentry_tree:
1339         free_dentry_tree(dentry, NULL);
1340 out_free_security_data:
1341         free_security_data(imd->security_data);
1342         imd->security_data = NULL;
1343 out_free_buf:
1344         FREE(buf);
1345         return ret;
1346 }
1347
1348 /* Write the metadata resource for the current WIM image. */
1349 int write_metadata_resource(WIMStruct *w)
1350 {
1351         u8 *buf;
1352         u8 *p;
1353         int ret;
1354         u64 subdir_offset;
1355         struct dentry *root;
1356         struct lookup_table_entry *lte;
1357         u64 metadata_original_size;
1358         const struct wim_security_data *sd;
1359
1360         DEBUG("Writing metadata resource for image %d", w->current_image);
1361
1362         root = wim_root_dentry(w);
1363         sd = wim_security_data(w);
1364
1365         /* We do not allow the security data pointer to be NULL, although it may
1366          * point to an empty security data with no entries. */
1367         wimlib_assert(root != NULL);
1368         wimlib_assert(sd != NULL);
1369
1370         /* Offset of first child of the root dentry.  It's equal to:
1371          * - The total length of the security data, rounded to the next 8-byte
1372          *   boundary,
1373          * - plus the total length of the root dentry,
1374          * - plus 8 bytes for an end-of-directory entry following the root
1375          *   dentry (shouldn't really be needed, but just in case...)
1376          */
1377         subdir_offset = ((sd->total_length + 7) & ~7) +
1378                         dentry_correct_total_length(root) + 8;
1379
1380         /* Calculate the subdirectory offsets for the entire dentry tree. */
1381         calculate_subdir_offsets(root, &subdir_offset);
1382
1383         /* Total length of the metadata resource (uncompressed) */
1384         metadata_original_size = subdir_offset;
1385
1386         /* Allocate a buffer to contain the uncompressed metadata resource */
1387         buf = MALLOC(metadata_original_size);
1388         if (!buf) {
1389                 ERROR("Failed to allocate %"PRIu64" bytes for "
1390                       "metadata resource", metadata_original_size);
1391                 return WIMLIB_ERR_NOMEM;
1392         }
1393
1394         /* Write the security data into the resource buffer */
1395         p = write_security_data(sd, buf);
1396
1397         /* Write the dentry tree into the resource buffer */
1398         p = write_dentry_tree(root, p);
1399
1400         /* We MUST have exactly filled the buffer; otherwise we calculated its
1401          * size incorrectly or wrote the data incorrectly. */
1402         wimlib_assert(p - buf == metadata_original_size);
1403
1404         /* Get the lookup table entry for the metadata resource so we can update
1405          * it. */
1406         lte = wim_metadata_lookup_table_entry(w);
1407
1408         wimlib_assert(lte != NULL);
1409
1410         /* Write the metadata resource to the output WIM using the proper
1411          * compression type.  The lookup table entry for the metadata resource
1412          * is updated. */
1413         ret = write_wim_resource_from_buffer(buf, metadata_original_size,
1414                                              w->out_fp,
1415                                              wimlib_get_compression_type(w),
1416                                              &lte->output_resource_entry,
1417                                              lte->hash);
1418         if (ret != 0)
1419                 goto out;
1420
1421         /* It's very likely the SHA1 message digest of the metadata resource
1422          * changed, so re-insert the lookup table entry into the lookup table.
1423          *
1424          * We do not check for other lookup table entries having the same SHA1
1425          * message digest.  It's possible for 2 absolutely identical images to
1426          * be added, therefore causing 2 identical metadata resources to be in
1427          * the WIM.  However, in this case, it's expected for 2 separate lookup
1428          * table entries to be created, even though this doesn't make a whole
1429          * lot of sense since they will share the same SHA1 message digest.
1430          * */
1431         lookup_table_unlink(w->lookup_table, lte);
1432         lookup_table_insert(w->lookup_table, lte);
1433
1434         wimlib_assert(lte->out_refcnt == 0);
1435         lte->out_refcnt = 1;
1436
1437         /* Make sure that the resource entry is written marked with the metadata
1438          * flag. */
1439         lte->output_resource_entry.flags |= WIM_RESHDR_FLAG_METADATA;
1440 out:
1441         /* All the data has been written to the new WIM; no need for the buffer
1442          * anymore */
1443         FREE(buf);
1444         return ret;
1445 }