]> wimlib.net Git - wimlib/blobdiff - src/resource.c
WIM chunk size: Calculate with existing macros when possible
[wimlib] / src / resource.c
index 73f860eb4d4bd559de6fc6be70dc4cc5585729a2..29bfcd7fd0a962fb7c29d6f7621251876bc43e5d 100644 (file)
  * wimlib; if not, see http://www.gnu.org/licenses/.
  */
 
-#include "wimlib_internal.h"
-#include "dentry.h"
-#include "lookup_table.h"
-#include "buffer_io.h"
-#include "sha1.h"
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "wimlib.h"
+#include "wimlib/dentry.h"
+#include "wimlib/endianness.h"
+#include "wimlib/error.h"
+#include "wimlib/file_io.h"
+#include "wimlib/lookup_table.h"
+#include "wimlib/resource.h"
+#include "wimlib/sha1.h"
 
 #ifdef __WIN32__
-#  include "win32.h"
+/* for read_win32_file_prefix(), read_win32_encrypted_file_prefix() */
+#  include "wimlib/win32.h"
 #endif
 
-#include <errno.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
+#ifdef WITH_NTFS_3G
+/* for read_ntfs_file_prefix() */
+#  include "wimlib/ntfs_3g.h"
+#endif
 
 #ifdef HAVE_ALLOCA_H
 #  include <alloca.h>
 #endif
-
-/* Write @n bytes from @buf to the file descriptor @fd, retrying on internupt
- * and on short writes.
- *
- * Returns short count and set errno on failure. */
-static ssize_t
-full_write(int fd, const void *buf, size_t n)
-{
-       const void *p = buf;
-       ssize_t ret;
-       ssize_t total = 0;
-
-       while (total != n) {
-               ret = write(fd, p, n);
-               if (ret < 0) {
-                       if (errno == EINTR)
-                               continue;
-                       else
-                               break;
-               }
-               total += ret;
-               p += ret;
-       }
-       return total;
-}
-
-/* Read @n bytes from the file descriptor @fd to the buffer @buf, retrying on
- * internupt and on short reads.
- *
- * Returns short count and set errno on failure. */
-static size_t
-full_read(int fd, void *buf, size_t n)
-{
-       size_t bytes_remaining = n;
-       while (bytes_remaining) {
-               ssize_t bytes_read = read(fd, buf, bytes_remaining);
-               if (bytes_read < 0) {
-                       if (errno == EINTR)
-                               continue;
-                       break;
-               }
-               bytes_remaining -= bytes_read;
-               buf += bytes_read;
-       }
-       return n - bytes_remaining;
-}
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <unistd.h>
 
 /*
  * Reads all or part of a compressed WIM resource.
@@ -94,7 +60,7 @@ full_read(int fd, void *buf, size_t n)
  * Returns zero on success, nonzero on failure.
  */
 static int
-read_compressed_resource(FILE *fp,
+read_compressed_resource(int in_fd,
                         u64 resource_compressed_size,
                         u64 resource_uncompressed_size,
                         u64 resource_offset,
@@ -131,8 +97,8 @@ read_compressed_resource(FILE *fp,
 
        /* Calculate how many chunks the resource consists of in its entirety.
         * */
-       u64 num_chunks = (resource_uncompressed_size + WIM_CHUNK_SIZE - 1) /
-                                                               WIM_CHUNK_SIZE;
+       u64 num_chunks = DIV_ROUND_UP(resource_uncompressed_size, WIM_CHUNK_SIZE);
+
        /* As mentioned, the first chunk has no entry in the chunk table. */
        u64 num_chunk_entries = num_chunks - 1;
 
@@ -157,11 +123,20 @@ read_compressed_resource(FILE *fp,
        if (end_chunk != num_chunks - 1)
                num_needed_chunks++;
 
+       /* According to M$'s documentation, if the uncompressed size of
+        * the file is greater than 4 GB, the chunk entries are 8-byte
+        * integers.  Otherwise, they are 4-byte integers. */
+       u64 chunk_entry_size = (resource_uncompressed_size >
+                               (u64)1 << 32) ?  8 : 4;
+
+       /* Size of the full chunk table in the WIM file. */
+       u64 chunk_table_size = chunk_entry_size * num_chunk_entries;
+
        /* Allocate the chunk table.  It will only contain offsets for the
         * chunks that are actually needed for this read. */
        u64 *chunk_offsets;
        bool chunk_offsets_malloced;
-       if (num_needed_chunks < 1000) {
+       if (num_needed_chunks < 1024) {
                chunk_offsets = alloca(num_needed_chunks * sizeof(u64));
                chunk_offsets_malloced = false;
        } else {
@@ -185,14 +160,6 @@ read_compressed_resource(FILE *fp,
        if (start_chunk == 0)
                chunk_offsets[0] = 0;
 
-       /* According to M$'s documentation, if the uncompressed size of
-        * the file is greater than 4 GB, the chunk entries are 8-byte
-        * integers.  Otherwise, they are 4-byte integers. */
-       u64 chunk_entry_size = (resource_uncompressed_size >= (u64)1 << 32) ?
-                                                                       8 : 4;
-
-       /* Size of the full chunk table in the WIM file. */
-       u64 chunk_table_size = chunk_entry_size * num_chunk_entries;
 
        /* Read the needed chunk offsets from the table in the WIM file. */
 
@@ -208,17 +175,34 @@ read_compressed_resource(FILE *fp,
        /* Skip over unneeded chunk table entries. */
        u64 file_offset_of_needed_chunk_entries = resource_offset +
                                start_table_idx * chunk_entry_size;
-       if (fseeko(fp, file_offset_of_needed_chunk_entries, SEEK_SET))
-               goto read_error;
+
+       /* Allocate a buffer into which to read the raw chunk entries. */
+       void *chunk_tab_buf;
+       bool chunk_tab_buf_malloced = false;
 
        /* Number of bytes we need to read from the chunk table. */
        size_t size = num_needed_chunk_entries * chunk_entry_size;
+       if ((u64)size != num_needed_chunk_entries * chunk_entry_size) {
+               ERROR("Compressed read request too large to fit into memory!");
+               ret = WIMLIB_ERR_NOMEM;
+               goto out;
+       }
 
-       /* Read the raw data into the end of the chunk_offsets array to
-        * avoid allocating another array. */
-       void *chunk_tab_buf = (void*)&chunk_offsets[num_needed_chunks] - size;
+       if (size < 4096) {
+               chunk_tab_buf = alloca(size);
+       } else {
+               chunk_tab_buf = malloc(size);
+               if (!chunk_tab_buf) {
+                       ERROR("Failed to allocate chunk table buffer of "
+                             "size %zu bytes", size);
+                       ret = WIMLIB_ERR_NOMEM;
+                       goto out;
+               }
+               chunk_tab_buf_malloced = true;
+       }
 
-       if (fread(chunk_tab_buf, 1, size, fp) != size)
+       if (full_pread(in_fd, chunk_tab_buf, size,
+                      file_offset_of_needed_chunk_entries) != size)
                goto read_error;
 
        /* Now fill in chunk_offsets from the entries we have read in
@@ -229,22 +213,19 @@ read_compressed_resource(FILE *fp,
                chunk_tab_p++;
 
        if (chunk_entry_size == 4) {
-               u32 *entries = (u32*)chunk_tab_buf;
+               le32 *entries = (le32*)chunk_tab_buf;
                while (num_needed_chunk_entries--)
                        *chunk_tab_p++ = le32_to_cpu(*entries++);
        } else {
-               u64 *entries = (u64*)chunk_tab_buf;
+               le64 *entries = (le64*)chunk_tab_buf;
                while (num_needed_chunk_entries--)
                        *chunk_tab_p++ = le64_to_cpu(*entries++);
        }
 
-       /* Done with the chunk table now.  We must now seek to the first chunk
-        * that is needed for the read. */
+       /* Done reading the chunk table now.  Now calculate the file offset for
+        * the first byte of compressed data we need to read. */
 
-       u64 file_offset_of_first_needed_chunk = resource_offset +
-                               chunk_table_size + chunk_offsets[0];
-       if (fseeko(fp, file_offset_of_first_needed_chunk, SEEK_SET))
-               goto read_error;
+       u64 cur_read_offset = resource_offset + chunk_table_size + chunk_offsets[0];
 
        /* Pointer to current position in the output buffer for uncompressed
         * data.  Alternatively, if using a callback function, we repeatedly
@@ -320,19 +301,24 @@ read_compressed_resource(FILE *fp,
                 * is equal to the uncompressed chunk size. */
                if (compressed_chunk_size == uncompressed_chunk_size) {
                        /* Uncompressed chunk */
-                       if (start_offset != 0)
-                               if (fseeko(fp, start_offset, SEEK_CUR))
-                                       goto read_error;
-                       if (fread(cb ? out_p + start_offset : out_p,
-                                 1, partial_chunk_size, fp) != partial_chunk_size)
+                       if (full_pread(in_fd,
+                                      cb ? out_p + start_offset : out_p,
+                                      partial_chunk_size,
+                                      cur_read_offset + start_offset) != partial_chunk_size)
+                       {
                                goto read_error;
+                       }
                } else {
                        /* Compressed chunk */
 
                        /* Read the compressed data into compressed_buf. */
-                       if (fread(compressed_buf, 1, compressed_chunk_size,
-                                               fp) != compressed_chunk_size)
+                       if (full_pread(in_fd,
+                                      compressed_buf,
+                                      compressed_chunk_size,
+                                      cur_read_offset) != compressed_chunk_size)
+                       {
                                goto read_error;
+                       }
 
                        /* For partial chunks and when writing directly to a
                         * buffer, we must buffer the uncompressed data because
@@ -374,150 +360,80 @@ read_compressed_resource(FILE *fp,
                         * written.  */
                        out_p += partial_chunk_size;
                }
+               cur_read_offset += compressed_chunk_size;
        }
 
        ret = 0;
 out:
        if (chunk_offsets_malloced)
                FREE(chunk_offsets);
+       if (chunk_tab_buf_malloced)
+               FREE(chunk_tab_buf);
        return ret;
 
 read_error:
-       if (feof(fp))
-               ERROR("Unexpected EOF in compressed file resource");
-       else
-               ERROR_WITH_ERRNO("Error reading compressed file resource");
+       ERROR_WITH_ERRNO("Error reading compressed file resource");
        ret = WIMLIB_ERR_READ;
        goto out;
 }
 
-/*
- * Reads uncompressed data from an open file stream.
- */
-int
-read_uncompressed_resource(FILE *fp, u64 offset, u64 len, void *contents_ret)
+/* Translates a WIM resource entry from the on-disk format to an in-memory
+ * format. */
+void
+get_resource_entry(const struct resource_entry_disk *disk_entry,
+                  struct resource_entry *entry)
 {
-       if (fseeko(fp, offset, SEEK_SET) != 0) {
-               ERROR("Failed to seek to byte %"PRIu64" of input file "
-                     "to read uncompressed resource (len = %"PRIu64")",
-                     offset, len);
-               return WIMLIB_ERR_READ;
-       }
-       if (fread(contents_ret, 1, len, fp) != len) {
-               if (feof(fp)) {
-                       ERROR("Unexpected EOF in uncompressed file resource");
-               } else {
-                       ERROR("Failed to read %"PRIu64" bytes from "
-                             "uncompressed resource at offset %"PRIu64,
-                             len, offset);
-               }
-               return WIMLIB_ERR_READ;
-       }
-       return 0;
-}
-
-/* Reads the contents of a struct resource_entry, as represented in the on-disk
- * format, from the memory pointed to by @p, and fills in the fields of @entry.
- * A pointer to the byte after the memory read at @p is returned. */
-const void *
-get_resource_entry(const void *p, struct resource_entry *entry)
-{
-       u64 size;
-       u8 flags;
-
-       p = get_u56(p, &size);
-       p = get_u8(p, &flags);
-       entry->size = size;
-       entry->flags = flags;
+       /* Note: disk_entry may not be 8 byte aligned--- in that case, the
+        * offset and original_size members will be unaligned.  (This should be
+        * okay since `struct resource_entry_disk' is declared as packed.) */
+
+       /* Read the size and flags into a bitfield portably... */
+       entry->size = (((u64)disk_entry->size[0] <<  0) |
+                      ((u64)disk_entry->size[1] <<  8) |
+                      ((u64)disk_entry->size[2] << 16) |
+                      ((u64)disk_entry->size[3] << 24) |
+                      ((u64)disk_entry->size[4] << 32) |
+                      ((u64)disk_entry->size[5] << 40) |
+                      ((u64)disk_entry->size[6] << 48));
+       entry->flags = disk_entry->flags;
+       entry->offset = le64_to_cpu(disk_entry->offset);
+       entry->original_size = le64_to_cpu(disk_entry->original_size);
 
        /* offset and original_size are truncated to 62 bits to avoid possible
         * overflows, when converting to a signed 64-bit integer (off_t) or when
         * adding size or original_size.  This is okay since no one would ever
         * actually have a WIM bigger than 4611686018427387903 bytes... */
-       p = get_u64(p, &entry->offset);
        if (entry->offset & 0xc000000000000000ULL) {
                WARNING("Truncating offset in resource entry");
                entry->offset &= 0x3fffffffffffffffULL;
        }
-       p = get_u64(p, &entry->original_size);
        if (entry->original_size & 0xc000000000000000ULL) {
                WARNING("Truncating original_size in resource entry");
                entry->original_size &= 0x3fffffffffffffffULL;
        }
-       return p;
-}
-
-/* Copies the struct resource_entry @entry to the memory pointed to by @p in the
- * on-disk format.  A pointer to the byte after the memory written at @p is
- * returned. */
-void *
-put_resource_entry(void *p, const struct resource_entry *entry)
-{
-       p = put_u56(p, entry->size);
-       p = put_u8(p, entry->flags);
-       p = put_u64(p, entry->offset);
-       p = put_u64(p, entry->original_size);
-       return p;
 }
 
-static FILE *
-wim_get_fp(WIMStruct *w)
+/* Translates a WIM resource entry from an in-memory format into the on-disk
+ * format. */
+void
+put_resource_entry(const struct resource_entry *entry,
+                  struct resource_entry_disk *disk_entry)
 {
-#ifdef WITH_FUSE
-       pthread_mutex_lock(&w->fp_tab_mutex);
-       FILE *fp;
-
-       wimlib_assert(w->filename != NULL);
-
-       for (size_t i = 0; i < w->num_allocated_fps; i++) {
-               if (w->fp_tab[i]) {
-                       fp = w->fp_tab[i];
-                       w->fp_tab[i] = NULL;
-                       goto out_unlock;
-               }
-       }
-       DEBUG("Opening extra file descriptor to `%"TS"'", w->filename);
-       fp = tfopen(w->filename, T("rb"));
-       if (!fp)
-               ERROR_WITH_ERRNO("Failed to open `%"TS"'", w->filename);
-out_unlock:
-       pthread_mutex_unlock(&w->fp_tab_mutex);
-#else /* WITH_FUSE */
-       fp = w->fp;
-#endif /* !WITH_FUSE */
-       return fp;
-}
-
-static int
-wim_release_fp(WIMStruct *w, FILE *fp)
-{
-       int ret = 0;
-#ifdef WITH_FUSE
-       FILE **fp_tab;
-
-       pthread_mutex_lock(&w->fp_tab_mutex);
-
-       for (size_t i = 0; i < w->num_allocated_fps; i++) {
-               if (w->fp_tab[i] == NULL) {
-                       w->fp_tab[i] = fp;
-                       goto out_unlock;
-               }
-       }
-
-       fp_tab = REALLOC(w->fp_tab, sizeof(FILE*) * (w->num_allocated_fps + 4));
-       if (!fp_tab) {
-               ret = WIMLIB_ERR_NOMEM;
-               fclose(fp);
-               goto out_unlock;
-       }
-       w->fp_tab = fp_tab;
-       memset(&w->fp_tab[w->num_allocated_fps], 0, 4 * sizeof(FILE*));
-       w->fp_tab[w->num_allocated_fps] = fp;
-       w->num_allocated_fps += 4;
-out_unlock:
-       pthread_mutex_unlock(&w->fp_tab_mutex);
-#endif /* WITH_FUSE */
-       return ret;
+       /* Note: disk_entry may not be 8 byte aligned--- in that case, the
+        * offset and original_size members will be unaligned.  (This should be
+        * okay since `struct resource_entry_disk' is declared as packed.) */
+       u64 size = entry->size;
+
+       disk_entry->size[0] = size >>  0;
+       disk_entry->size[1] = size >>  8;
+       disk_entry->size[2] = size >> 16;
+       disk_entry->size[3] = size >> 24;
+       disk_entry->size[4] = size >> 32;
+       disk_entry->size[5] = size >> 40;
+       disk_entry->size[6] = size >> 48;
+       disk_entry->flags = entry->flags;
+       disk_entry->offset = cpu_to_le64(entry->offset);
+       disk_entry->original_size = cpu_to_le64(entry->original_size);
 }
 
 static int
@@ -528,27 +444,19 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
                          int flags,
                          u64 offset)
 {
-       FILE *wim_fp;
        WIMStruct *wim;
+       int in_fd;
        int ret;
 
        wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
 
        wim = lte->wim;
-       if (flags & WIMLIB_RESOURCE_FLAG_THREADSAFE_READ) {
-               wim_fp = wim_get_fp(wim);
-               if (!wim_fp) {
-                       ret = WIMLIB_ERR_READ;
-                       goto out;
-               }
-       } else {
-               wim_fp = lte->wim->fp;
-       }
+       in_fd = wim->in_fd;
 
        if (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED &&
            !(flags & WIMLIB_RESOURCE_FLAG_RAW))
        {
-               ret = read_compressed_resource(wim_fp,
+               ret = read_compressed_resource(in_fd,
                                               lte->resource_entry.size,
                                               lte->resource_entry.original_size,
                                               lte->resource_entry.offset,
@@ -559,47 +467,32 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
                                               ctx_or_buf);
        } else {
                offset += lte->resource_entry.offset;
-
-               if (fseeko(wim_fp, offset, SEEK_SET)) {
-                       ERROR_WITH_ERRNO("Failed to seek to offset %"PRIu64
-                                        " in WIM", offset);
-                       ret = WIMLIB_ERR_READ;
-                       goto out_release_fp;
-               }
                if (cb) {
                        /* Send data to callback function */
                        u8 buf[min(WIM_CHUNK_SIZE, size)];
                        while (size) {
                                size_t bytes_to_read = min(WIM_CHUNK_SIZE, size);
-                               size_t bytes_read = fread(buf, 1, bytes_to_read, wim_fp);
-
+                               size_t bytes_read = full_pread(in_fd, buf,
+                                                              bytes_to_read, offset);
                                if (bytes_read != bytes_to_read)
                                        goto read_error;
                                ret = cb(buf, bytes_read, ctx_or_buf);
                                if (ret)
-                                       goto out_release_fp;
+                                       goto out;
                                size -= bytes_read;
+                               offset += bytes_read;
                        }
                } else {
                        /* Send data directly to a buffer */
-                       if (fread(ctx_or_buf, 1, size, wim_fp) != size)
+                       if (full_pread(in_fd, ctx_or_buf, size, offset) != size)
                                goto read_error;
                }
                ret = 0;
        }
-       goto out_release_fp;
+       goto out;
 read_error:
-       if (ferror(wim_fp))
-               ERROR_WITH_ERRNO("Error reading data from WIM");
-       else
-               ERROR("Unexpected EOF in WIM!");
+       ERROR_WITH_ERRNO("Error reading data from WIM");
        ret = WIMLIB_ERR_READ;
-out_release_fp:
-       if (flags & WIMLIB_RESOURCE_FLAG_THREADSAFE_READ) {
-               int ret2 = wim_release_fp(wim, wim_fp);
-               if (ret == 0)
-                       ret = ret2;
-       }
 out:
        if (ret) {
                if (errno == 0)
@@ -611,12 +504,9 @@ out:
 
 int
 read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
-                                  size_t size, u64 offset, void *buf,
-                                  bool threadsafe)
+                                  size_t size, u64 offset, void *buf)
 {
-       return read_partial_wim_resource(lte, size, NULL, buf,
-                                        threadsafe ? WIMLIB_RESOURCE_FLAG_THREADSAFE_READ : 0,
-                                        offset);
+       return read_partial_wim_resource(lte, size, NULL, buf, 0, offset);
 }
 
 static int
@@ -630,6 +520,7 @@ read_wim_resource_prefix(const struct wim_lookup_table_entry *lte,
 }
 
 
+#ifndef __WIN32__
 static int
 read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
                         u64 size,
@@ -676,6 +567,7 @@ out_close:
        close(fd);
        return ret;
 }
+#endif /* !__WIN32__ */
 
 static int
 read_buffer_prefix(const struct wim_lookup_table_entry *lte,
@@ -719,8 +611,6 @@ typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entr
  * size until the resource is exhausted.
  *
  * If the resource is located in a WIM file, @flags can be:
- *   * WIMLIB_RESOURCE_FLAG_THREADSAFE_READ if it must be safe to access the resource
- *     concurrently by multiple threads.
  *   * WIMLIB_RESOURCE_FLAG_RAW if the raw compressed data is to be supplied
  *     instead of the uncompressed data.
  * Otherwise, the @flags are ignored.
@@ -732,7 +622,9 @@ read_resource_prefix(const struct wim_lookup_table_entry *lte,
 {
        static const read_resource_prefix_handler_t handlers[] = {
                [RESOURCE_IN_WIM]             = read_wim_resource_prefix,
+       #ifndef __WIN32__
                [RESOURCE_IN_FILE_ON_DISK]    = read_file_on_disk_prefix,
+       #endif
                [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix,
        #ifdef WITH_FUSE
                [RESOURCE_IN_STAGING_FILE]    = read_file_on_disk_prefix,
@@ -752,12 +644,9 @@ read_resource_prefix(const struct wim_lookup_table_entry *lte,
 
 int
 read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
-                           void *buf, bool thread_safe)
+                           void *buf)
 {
-       return read_resource_prefix(lte,
-                                   wim_resource_size(lte),
-                                   NULL, buf,
-                                   thread_safe ? WIMLIB_RESOURCE_FLAG_THREADSAFE_READ : 0);
+       return read_resource_prefix(lte, wim_resource_size(lte), NULL, buf, 0);
 }
 
 struct extract_ctx {
@@ -803,13 +692,13 @@ extract_wim_resource(const struct wim_lookup_table_entry *lte,
                        sha1_final(hash, &ctx.sha_ctx);
                        if (!hashes_equal(hash, lte->hash)) {
                        #ifdef ENABLE_ERROR_MESSAGES
-                               ERROR_WITH_ERRNO("Invalid SHA1 message digest "
-                                                "on the following WIM resource:");
+                               ERROR("Invalid SHA1 message digest "
+                                     "on the following WIM resource:");
                                print_lookup_table_entry(lte, stderr);
                                if (lte->resource_location == RESOURCE_IN_WIM)
                                        ERROR("The WIM file appears to be corrupt!");
-                               ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
                        #endif
+                               ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
                        }
                }
        } else {
@@ -879,7 +768,7 @@ copy_resource(struct wim_lookup_table_entry *lte, void *wim)
        WIMStruct *w = wim;
        int ret;
 
-       ret = write_wim_resource(lte, w->out_fp,
+       ret = write_wim_resource(lte, w->out_fd,
                                 wim_resource_compression_type(lte),
                                 &lte->output_resource_entry, 0);
        if (ret == 0) {