]> wimlib.net Git - wimlib/blobdiff - src/resource.c
Multithreaded stream writing fixes
[wimlib] / src / resource.c
index 90e45ca1092db0eb3f0a65617734613beceab1c0..1b53ee40342cf6f5258b4b9bd0b508d4f8fc4500 100644 (file)
 #include <unistd.h>
 #include <fcntl.h>
 
+#ifdef HAVE_ALLOCA_H
+#  include <alloca.h>
+#endif
+
 /* Write @n bytes from @buf to the file descriptor @fd, retrying on internupt
  * and on short writes.
  *
@@ -90,10 +94,13 @@ full_read(int fd, void *buf, size_t n)
  * Returns zero on success, nonzero on failure.
  */
 static int
-read_compressed_resource(FILE *fp, u64 resource_compressed_size,
+read_compressed_resource(FILE *fp,
+                        u64 resource_compressed_size,
                         u64 resource_uncompressed_size,
-                        u64 resource_offset, int resource_ctype,
-                        u64 len, u64 offset,
+                        u64 resource_offset,
+                        int resource_ctype,
+                        u64 len,
+                        u64 offset,
                         consume_data_callback_t cb,
                         void *ctx_or_buf)
 {
@@ -122,7 +129,8 @@ read_compressed_resource(FILE *fp, u64 resource_compressed_size,
         * follows the chunk table and therefore must have an offset of 0.
         */
 
-       /* Calculate how many chunks the resource conists of in its entirety. */
+       /* Calculate how many chunks the resource consists of in its entirety.
+        * */
        u64 num_chunks = (resource_uncompressed_size + WIM_CHUNK_SIZE - 1) /
                                                                WIM_CHUNK_SIZE;
        /* As mentioned, the first chunk has no entry in the chunk table. */
@@ -149,9 +157,22 @@ read_compressed_resource(FILE *fp, u64 resource_compressed_size,
        if (end_chunk != num_chunks - 1)
                num_needed_chunks++;
 
-       /* Declare the chunk table.  It will only contain offsets for the chunks
-        * that are actually needed for this read. */
-       u64 chunk_offsets[num_needed_chunks];
+       /* Allocate the chunk table.  It will only contain offsets for the
+        * chunks that are actually needed for this read. */
+       u64 *chunk_offsets;
+       bool chunk_offsets_malloced;
+       if (num_needed_chunks < 1000) {
+               chunk_offsets = alloca(num_needed_chunks * sizeof(u64));
+               chunk_offsets_malloced = false;
+       } else {
+               chunk_offsets = malloc(num_needed_chunks * sizeof(u64));
+               if (!chunk_offsets) {
+                       ERROR("Failed to allocate chunk table "
+                             "with %"PRIu64" entries", num_needed_chunks);
+                       return WIMLIB_ERR_NOMEM;
+               }
+               chunk_offsets_malloced = true;
+       }
 
        /* Set the implicit offset of the first chunk if it is included in the
         * needed chunks.
@@ -193,28 +214,28 @@ read_compressed_resource(FILE *fp, u64 resource_compressed_size,
        /* Number of bytes we need to read from the chunk table. */
        size_t size = num_needed_chunk_entries * chunk_entry_size;
 
-       {
-               u8 chunk_tab_buf[size];
+       /* Read the raw data into the end of the chunk_offsets array to
+        * avoid allocating another array. */
+       void *chunk_tab_buf = (void*)&chunk_offsets[num_needed_chunks] - size;
 
-               if (fread(chunk_tab_buf, 1, size, fp) != size)
-                       goto read_error;
+       if (fread(chunk_tab_buf, 1, size, fp) != size)
+               goto read_error;
 
-               /* Now fill in chunk_offsets from the entries we have read in
-                * chunk_tab_buf. */
+       /* Now fill in chunk_offsets from the entries we have read in
+        * chunk_tab_buf. */
 
-               u64 *chunk_tab_p = chunk_offsets;
-               if (start_chunk == 0)
-                       chunk_tab_p++;
+       u64 *chunk_tab_p = chunk_offsets;
+       if (start_chunk == 0)
+               chunk_tab_p++;
 
-               if (chunk_entry_size == 4) {
-                       u32 *entries = (u32*)chunk_tab_buf;
-                       while (num_needed_chunk_entries--)
-                               *chunk_tab_p++ = le32_to_cpu(*entries++);
-               } else {
-                       u64 *entries = (u64*)chunk_tab_buf;
-                       while (num_needed_chunk_entries--)
-                               *chunk_tab_p++ = le64_to_cpu(*entries++);
-               }
+       if (chunk_entry_size == 4) {
+               u32 *entries = (u32*)chunk_tab_buf;
+               while (num_needed_chunk_entries--)
+                       *chunk_tab_p++ = le32_to_cpu(*entries++);
+       } else {
+               u64 *entries = (u64*)chunk_tab_buf;
+               while (num_needed_chunk_entries--)
+                       *chunk_tab_p++ = le64_to_cpu(*entries++);
        }
 
        /* Done with the chunk table now.  We must now seek to the first chunk
@@ -226,10 +247,11 @@ read_compressed_resource(FILE *fp, u64 resource_compressed_size,
                goto read_error;
 
        /* Pointer to current position in the output buffer for uncompressed
-        * data. */
+        * data.  Alternatively, if using a callback function, we repeatedly
+        * fill a temporary buffer to feed data into the callback function.  */
        u8 *out_p;
        if (cb)
-               out_p = alloca(32768);
+               out_p = alloca(WIM_CHUNK_SIZE);
        else
                out_p = ctx_or_buf;
 
@@ -298,11 +320,11 @@ read_compressed_resource(FILE *fp, u64 resource_compressed_size,
                 * is equal to the uncompressed chunk size. */
                if (compressed_chunk_size == uncompressed_chunk_size) {
                        /* Uncompressed chunk */
-
                        if (start_offset != 0)
                                if (fseeko(fp, start_offset, SEEK_CUR))
                                        goto read_error;
-                       if (fread(out_p, 1, partial_chunk_size, fp) != partial_chunk_size)
+                       if (fread(cb ? out_p + start_offset : out_p,
+                                 1, partial_chunk_size, fp) != partial_chunk_size)
                                goto read_error;
                } else {
                        /* Compressed chunk */
@@ -341,7 +363,8 @@ read_compressed_resource(FILE *fp, u64 resource_compressed_size,
                }
                if (cb) {
                        /* Feed the data to the callback function */
-                       ret = cb(out_p, partial_chunk_size, ctx_or_buf);
+                       ret = cb(out_p + start_offset,
+                                partial_chunk_size, ctx_or_buf);
                        if (ret)
                                goto out;
                } else {
@@ -355,6 +378,8 @@ read_compressed_resource(FILE *fp, u64 resource_compressed_size,
 
        ret = 0;
 out:
+       if (chunk_offsets_malloced)
+               FREE(chunk_offsets);
        return ret;
 
 read_error:
@@ -438,7 +463,7 @@ put_resource_entry(void *p, const struct resource_entry *entry)
 static FILE *
 wim_get_fp(WIMStruct *w)
 {
-#ifdef WITH_FUSE
+#if defined(WITH_FUSE) || defined(ENABLE_MULTITHREADED_COMPRESSION)
        pthread_mutex_lock(&w->fp_tab_mutex);
        FILE *fp;
 
@@ -457,9 +482,9 @@ wim_get_fp(WIMStruct *w)
                ERROR_WITH_ERRNO("Failed to open `%"TS"'", w->filename);
 out_unlock:
        pthread_mutex_unlock(&w->fp_tab_mutex);
-#else /* WITH_FUSE */
+#else /* WITH_FUSE || ENABLE_MULTITHREADED_COMPRESSION */
        fp = w->fp;
-#endif /* !WITH_FUSE */
+#endif /* !WITH_FUSE && !ENABLE_MULTITHREADED_COMPRESSION */
        return fp;
 }
 
@@ -467,7 +492,7 @@ static int
 wim_release_fp(WIMStruct *w, FILE *fp)
 {
        int ret = 0;
-#ifdef WITH_FUSE
+#if defined(WITH_FUSE) || defined(ENABLE_MULTITHREADED_COMPRESSION)
        FILE **fp_tab;
 
        pthread_mutex_lock(&w->fp_tab_mutex);
@@ -491,7 +516,7 @@ wim_release_fp(WIMStruct *w, FILE *fp)
        w->num_allocated_fps += 4;
 out_unlock:
        pthread_mutex_unlock(&w->fp_tab_mutex);
-#endif /* WITH_FUSE */
+#endif /* WITH_FUSE || ENABLE_MULTITHREADED_COMPRESSION */
        return ret;
 }
 
@@ -508,22 +533,18 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
        int ret;
 
        wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
-       wimlib_assert(offset + size <= lte->resource_entry.original_size);
 
        wim = lte->wim;
-
-       if (flags & WIMLIB_RESOURCE_FLAG_MULTITHREADED) {
+       if (flags & WIMLIB_RESOURCE_FLAG_THREADSAFE_READ) {
                wim_fp = wim_get_fp(wim);
                if (!wim_fp) {
-                       ret = -1;
+                       ret = WIMLIB_ERR_READ;
                        goto out;
                }
        } else {
                wim_fp = lte->wim->fp;
        }
 
-       wimlib_assert(wim_fp != NULL);
-
        if (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED &&
            !(flags & WIMLIB_RESOURCE_FLAG_RAW))
        {
@@ -537,6 +558,8 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
                                               cb,
                                               ctx_or_buf);
        } else {
+               offset += lte->resource_entry.offset;
+
                if (fseeko(wim_fp, offset, SEEK_SET)) {
                        ERROR_WITH_ERRNO("Failed to seek to offset %"PRIu64
                                         " in WIM", offset);
@@ -544,18 +567,21 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
                        goto out_release_fp;
                }
                if (cb) {
-                       char buf[min(32768, size)];
+                       /* Send data to callback function */
+                       u8 buf[min(WIM_CHUNK_SIZE, size)];
                        while (size) {
-                               size_t bytes_to_read = min(32768, size);
+                               size_t bytes_to_read = min(WIM_CHUNK_SIZE, size);
                                size_t bytes_read = fread(buf, 1, bytes_to_read, wim_fp);
-                               
+
                                if (bytes_read != bytes_to_read)
                                        goto read_error;
                                ret = cb(buf, bytes_read, ctx_or_buf);
                                if (ret)
                                        goto out_release_fp;
+                               size -= bytes_read;
                        }
                } else {
+                       /* Send data directly to a buffer */
                        if (fread(ctx_or_buf, 1, size, wim_fp) != size)
                                goto read_error;
                }
@@ -563,16 +589,21 @@ read_partial_wim_resource(const struct wim_lookup_table_entry *lte,
        }
        goto out_release_fp;
 read_error:
-       ERROR_WITH_ERRNO("Error reading data from WIM");
+       if (ferror(wim_fp))
+               ERROR_WITH_ERRNO("Error reading data from WIM");
+       else
+               ERROR("Unexpected EOF in WIM!");
        ret = WIMLIB_ERR_READ;
 out_release_fp:
-       if (flags & WIMLIB_RESOURCE_FLAG_MULTITHREADED)
-               ret |= wim_release_fp(wim, wim_fp);
+       if (flags & WIMLIB_RESOURCE_FLAG_THREADSAFE_READ) {
+               int ret2 = wim_release_fp(wim, wim_fp);
+               if (ret == 0)
+                       ret = ret2;
+       }
 out:
        if (ret) {
                if (errno == 0)
                        errno = EIO;
-               ret = -1;
        }
        return ret;
 }
@@ -584,7 +615,7 @@ read_partial_wim_resource_into_buf(const struct wim_lookup_table_entry *lte,
                                   bool threadsafe)
 {
        return read_partial_wim_resource(lte, size, NULL, buf,
-                                        threadsafe ? WIMLIB_RESOURCE_FLAG_MULTITHREADED : 0,
+                                        threadsafe ? WIMLIB_RESOURCE_FLAG_THREADSAFE_READ : 0,
                                         offset);
 }
 
@@ -618,16 +649,17 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte,
        }
        if (cb) {
                /* Send data to callback function */
-               char buf[min(32768, size)];
+               u8 buf[min(WIM_CHUNK_SIZE, size)];
                size_t bytes_to_read;
                while (size) {
-                       bytes_to_read = min(32768, size);
+                       bytes_to_read = min(WIM_CHUNK_SIZE, size);
                        bytes_read = full_read(fd, buf, bytes_to_read);
                        if (bytes_read != bytes_to_read)
                                goto read_error;
                        ret = cb(buf, bytes_read, ctx_or_buf);
                        if (ret)
                                goto out_close;
+                       size -= bytes_read;
                }
        } else {
                /* Send data directly to a buffer */
@@ -651,12 +683,21 @@ read_buffer_prefix(const struct wim_lookup_table_entry *lte,
                   void *ctx_or_buf, int _ignored_flags)
 {
        const void *inbuf = lte->attached_buffer;
+       int ret;
+
        if (cb) {
-               return cb(inbuf, size, ctx_or_buf);
+               while (size) {
+                       size_t chunk_size = min(WIM_CHUNK_SIZE, size);
+                       ret = cb(inbuf, chunk_size, ctx_or_buf);
+                       if (ret)
+                               return ret;
+                       size -= chunk_size;
+                       inbuf += chunk_size;
+               }
        } else {
                memcpy(ctx_or_buf, inbuf, size);
-               return 0;
        }
+       return 0;
 }
 
 typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entry *lte,
@@ -665,6 +706,25 @@ typedef int (*read_resource_prefix_handler_t)(const struct wim_lookup_table_entr
                                              void *ctx_or_buf,
                                              int flags);
 
+/*
+ * Read the first @size bytes from a generic "resource", which may be located in
+ * the WIM (compressed or uncompressed), in an external file, or directly in an
+ * in-memory buffer.
+ *
+ * Feed the data either to a callback function (cb != NULL, passing it
+ * ctx_or_buf), or write it directly into a buffer (cb == NULL, ctx_or_buf
+ * specifies the buffer, which must have room for @size bytes).
+ *
+ * When using a callback function, it is called with chunks up to 32768 bytes in
+ * size until the resource is exhausted.
+ *
+ * If the resource is located in a WIM file, @flags can be:
+ *   * WIMLIB_RESOURCE_FLAG_THREADSAFE_READ if it must be safe to access the resource
+ *     concurrently by multiple threads.
+ *   * WIMLIB_RESOURCE_FLAG_RAW if the raw compressed data is to be supplied
+ *     instead of the uncompressed data.
+ * Otherwise, the @flags are ignored.
+ */
 int
 read_resource_prefix(const struct wim_lookup_table_entry *lte,
                     u64 size, consume_data_callback_t cb, void *ctx_or_buf,
@@ -697,7 +757,23 @@ read_full_resource_into_buf(const struct wim_lookup_table_entry *lte,
        return read_resource_prefix(lte,
                                    wim_resource_size(lte),
                                    NULL, buf,
-                                   thread_safe ? WIMLIB_RESOURCE_FLAG_MULTITHREADED : 0);
+                                   thread_safe ? WIMLIB_RESOURCE_FLAG_THREADSAFE_READ : 0);
+}
+
+struct extract_ctx {
+       SHA_CTX sha_ctx;
+       consume_data_callback_t extract_chunk;
+       void *extract_chunk_arg;
+};
+
+static int
+extract_chunk_sha1_wrapper(const void *chunk, size_t chunk_size,
+                          void *_ctx)
+{
+       struct extract_ctx *ctx = _ctx;
+
+       sha1_update(&ctx->sha_ctx, chunk, chunk_size);
+       return ctx->extract_chunk(chunk, chunk_size, ctx->extract_chunk_arg);
 }
 
 /* Extracts the first @size bytes of a WIM resource to somewhere.  In the
@@ -712,8 +788,36 @@ extract_wim_resource(const struct wim_lookup_table_entry *lte,
                     consume_data_callback_t extract_chunk,
                     void *extract_chunk_arg)
 {
-       return read_resource_prefix(lte, size, extract_chunk,
-                                   extract_chunk_arg, 0);
+       int ret;
+       if (size == wim_resource_size(lte)) {
+               /* Do SHA1 */
+               struct extract_ctx ctx;
+               ctx.extract_chunk = extract_chunk;
+               ctx.extract_chunk_arg = extract_chunk_arg;
+               sha1_init(&ctx.sha_ctx);
+               ret = read_resource_prefix(lte, size,
+                                          extract_chunk_sha1_wrapper,
+                                          &ctx, 0);
+               if (ret == 0) {
+                       u8 hash[SHA1_HASH_SIZE];
+                       sha1_final(hash, &ctx.sha_ctx);
+                       if (!hashes_equal(hash, lte->hash)) {
+                       #ifdef ENABLE_ERROR_MESSAGES
+                               ERROR_WITH_ERRNO("Invalid SHA1 message digest "
+                                                "on the following WIM resource:");
+                               print_lookup_table_entry(lte, stderr);
+                               if (lte->resource_location == RESOURCE_IN_WIM)
+                                       ERROR("The WIM file appears to be corrupt!");
+                               ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
+                       #endif
+                       }
+               }
+       } else {
+               /* Don't do SHA1 */
+               ret = read_resource_prefix(lte, size, extract_chunk,
+                                          extract_chunk_arg, 0);
+       }
+       return ret;
 }
 
 static int
@@ -736,6 +840,29 @@ extract_wim_resource_to_fd(const struct wim_lookup_table_entry *lte,
        return extract_wim_resource(lte, size, extract_wim_chunk_to_fd, &fd);
 }
 
+
+static int
+sha1_chunk(const void *buf, size_t len, void *ctx)
+{
+       sha1_update(ctx, buf, len);
+       return 0;
+}
+
+/* Calculate the SHA1 message digest of a stream. */
+int
+sha1_resource(struct wim_lookup_table_entry *lte)
+{
+       int ret;
+       SHA_CTX sha_ctx;
+
+       sha1_init(&sha_ctx);
+       ret = read_resource_prefix(lte, wim_resource_size(lte),
+                                  sha1_chunk, &sha_ctx, 0);
+       if (ret == 0)
+               sha1_final(lte->hash, &sha_ctx);
+       return ret;
+}
+
 /*
  * Copies the file resource specified by the lookup table entry @lte from the
  * input WIM to the output WIM that has its FILE * given by