Implement multi-threaded compression
[wimlib] / src / resource.c
index a2772c1c0646918085ba004082c69dc869b674b7..01d481df0f97057a074e7168270e0616e9359c2e 100644 (file)
 
 #include "dentry.h"
 
-#ifdef WITH_NTFS_3G
-#include <time.h>
-#include <ntfs-3g/attrib.h>
-#include <ntfs-3g/inode.h>
-#include <ntfs-3g/dir.h>
-#endif
-
 #include "wimlib_internal.h"
 #include "lookup_table.h"
 #include "io.h"
 #include "sha1.h"
 #include <unistd.h>
 #include <errno.h>
-#ifdef HAVE_ALLOCA_H
-#include <alloca.h>
+
+#ifdef WITH_NTFS_3G
+#include <time.h>
+#include <ntfs-3g/attrib.h>
+#include <ntfs-3g/inode.h>
+#include <ntfs-3g/dir.h>
 #endif
 
 
@@ -605,446 +602,6 @@ int read_full_wim_resource(const struct lookup_table_entry *lte, u8 buf[],
        return read_wim_resource(lte, buf, wim_resource_size(lte), 0, flags);
 }
 
-/* Chunk table that's located at the beginning of each compressed resource in
- * the WIM.  (This is not the on-disk format; the on-disk format just has an
- * array of offsets.) */
-struct chunk_table {
-       off_t file_offset;
-       u64 num_chunks;
-       u64 original_resource_size;
-       u64 bytes_per_chunk_entry;
-       u64 table_disk_size;
-       u64 cur_offset;
-       u64 *cur_offset_p;
-       u64 offsets[0];
-};
-
-/*
- * Allocates and initializes a chunk table, and reserves space for it in the
- * output file.
- */
-static int
-begin_wim_resource_chunk_tab(const struct lookup_table_entry *lte,
-                            FILE *out_fp,
-                            off_t file_offset,
-                            struct chunk_table **chunk_tab_ret)
-{
-       u64 size = wim_resource_size(lte);
-       u64 num_chunks = (size + WIM_CHUNK_SIZE - 1) / WIM_CHUNK_SIZE;
-       size_t alloc_size = sizeof(struct chunk_table) + num_chunks * sizeof(u64);
-       struct chunk_table *chunk_tab = CALLOC(1, alloc_size);
-       int ret;
-
-       if (!chunk_tab) {
-               ERROR("Failed to allocate chunk table for %"PRIu64" byte "
-                     "resource", size);
-               ret = WIMLIB_ERR_NOMEM;
-               goto out;
-       }
-       chunk_tab->file_offset = file_offset;
-       chunk_tab->num_chunks = num_chunks;
-       chunk_tab->original_resource_size = size;
-       chunk_tab->bytes_per_chunk_entry = (size >= (1ULL << 32)) ? 8 : 4;
-       chunk_tab->table_disk_size = chunk_tab->bytes_per_chunk_entry *
-                                    (num_chunks - 1);
-       chunk_tab->cur_offset = 0;
-       chunk_tab->cur_offset_p = chunk_tab->offsets;
-
-       if (fwrite(chunk_tab, 1, chunk_tab->table_disk_size, out_fp) !=
-                  chunk_tab->table_disk_size) {
-               ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
-                                "file resource");
-               ret = WIMLIB_ERR_WRITE;
-               goto out;
-       }
-
-       ret = 0;
-out:
-       *chunk_tab_ret = chunk_tab;
-       return ret;
-}
-
-/*
- * Compresses a chunk of a WIM resource.
- *
- * @chunk:             Uncompressed data of the chunk.
- * @chunk_size:                Size of the uncompressed chunk in bytes.
- * @compressed_chunk:  Pointer to output buffer of size at least
- *                             (@chunk_size - 1) bytes.
- * @compressed_chunk_len_ret:  Pointer to an unsigned int into which the size
- *                                     of the compressed chunk will be
- *                                     returned.
- * @ctype:     Type of compression to use.  Must be WIM_COMPRESSION_TYPE_LZX
- *             or WIM_COMPRESSION_TYPE_XPRESS.
- *
- * Returns zero if compressed succeeded, and nonzero if the chunk could not be
- * compressed to any smaller than @chunk_size.  This function cannot fail for
- * any other reasons.
- */
-static int compress_chunk(const u8 chunk[], unsigned chunk_size,
-                         u8 compressed_chunk[],
-                         unsigned *compressed_chunk_len_ret,
-                         int ctype)
-{
-       int (*compress)(const void *, unsigned, void *, unsigned *);
-       switch (ctype) {
-       case WIM_COMPRESSION_TYPE_LZX:
-               compress = lzx_compress;
-               break;
-       case WIM_COMPRESSION_TYPE_XPRESS:
-               compress = xpress_compress;
-               break;
-       default:
-               wimlib_assert(0);
-               break;
-       }
-       return (*compress)(chunk, chunk_size, compressed_chunk,
-                          compressed_chunk_len_ret);
-}
-
-/*
- * Writes a chunk of a WIM resource to an output file.
- *
- * @chunk:       Uncompressed data of the chunk.
- * @chunk_size:          Size of the chunk (<= WIM_CHUNK_SIZE)
- * @out_fp:      FILE * to write tho chunk to.
- * @out_ctype:   Compression type to use when writing the chunk (ignored if no
- *                     chunk table provided)
- * @chunk_tab:   Pointer to chunk table being created.  It is updated with the
- *                     offset of the chunk we write.
- *
- * Returns 0 on success; nonzero on failure.
- */
-static int write_wim_resource_chunk(const u8 chunk[], unsigned chunk_size,
-                                   FILE *out_fp, int out_ctype,
-                                   struct chunk_table *chunk_tab)
-{
-       const u8 *out_chunk;
-       unsigned out_chunk_size;
-
-       wimlib_assert(chunk_size <= WIM_CHUNK_SIZE);
-
-       if (!chunk_tab) {
-               out_chunk = chunk;
-               out_chunk_size = chunk_size;
-       } else {
-               u8 *compressed_chunk = alloca(chunk_size);
-               int ret;
-
-               ret = compress_chunk(chunk, chunk_size, compressed_chunk,
-                                    &out_chunk_size, out_ctype);
-               if (ret == 0) {
-                       out_chunk = compressed_chunk;
-               } else {
-                       out_chunk = chunk;
-                       out_chunk_size = chunk_size;
-               }
-               *chunk_tab->cur_offset_p++ = chunk_tab->cur_offset;
-               chunk_tab->cur_offset += out_chunk_size;
-       }
-
-       if (fwrite(out_chunk, 1, out_chunk_size, out_fp) != out_chunk_size) {
-               ERROR_WITH_ERRNO("Failed to write WIM resource chunk");
-               return WIMLIB_ERR_WRITE;
-       }
-       return 0;
-}
-
-/*
- * Finishes a WIM chunk tale and writes it to the output file at the correct
- * offset.
- *
- * The final size of the full compressed resource is returned in the
- * @compressed_size_p.
- */
-static int
-finish_wim_resource_chunk_tab(struct chunk_table *chunk_tab,
-                             FILE *out_fp, u64 *compressed_size_p)
-{
-       size_t bytes_written;
-       if (fseeko(out_fp, chunk_tab->file_offset, SEEK_SET) != 0) {
-               ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of output "
-                                "WIM file", chunk_tab->file_offset);
-               return WIMLIB_ERR_WRITE;
-       }
-
-       if (chunk_tab->bytes_per_chunk_entry == 8) {
-               array_cpu_to_le64(chunk_tab->offsets, chunk_tab->num_chunks);
-       } else {
-               for (u64 i = 0; i < chunk_tab->num_chunks; i++)
-                       ((u32*)chunk_tab->offsets)[i] =
-                               cpu_to_le32(chunk_tab->offsets[i]);
-       }
-       bytes_written = fwrite((u8*)chunk_tab->offsets +
-                                       chunk_tab->bytes_per_chunk_entry,
-                              1, chunk_tab->table_disk_size, out_fp);
-       if (bytes_written != chunk_tab->table_disk_size) {
-               ERROR_WITH_ERRNO("Failed to write chunk table in compressed "
-                                "file resource");
-               return WIMLIB_ERR_WRITE;
-       }
-       if (fseeko(out_fp, 0, SEEK_END) != 0) {
-               ERROR_WITH_ERRNO("Failed to seek to end of output WIM file");
-               return WIMLIB_ERR_WRITE;
-       }
-       *compressed_size_p = chunk_tab->cur_offset + chunk_tab->table_disk_size;
-       return 0;
-}
-
-/*
- * Writes a WIM resource to a FILE * opened for writing.  The resource may be
- * written uncompressed or compressed depending on the @out_ctype parameter.
- *
- * If by chance the resource compresses to more than the original size (this may
- * happen with random data or files than are pre-compressed), the resource is
- * instead written uncompressed (and this is reflected in the @out_res_entry by
- * removing the WIM_RESHDR_FLAG_COMPRESSED flag).
- *
- * @lte:       The lookup table entry for the WIM resource.
- * @out_fp:    The FILE * to write the resource to.
- * @out_ctype:  The compression type of the resource to write.  Note: if this is
- *                     the same as the compression type of the WIM resource we
- *                     need to read, we simply copy the data (i.e. we do not
- *                     uncompress it, then compress it again).
- * @out_res_entry:  If non-NULL, a resource entry that is filled in with the
- *                 offset, original size, compressed size, and compression flag
- *                 of the output resource.
- *
- * Returns 0 on success; nonzero on failure.
- */
-static int write_wim_resource(struct lookup_table_entry *lte,
-                             FILE *out_fp, int out_ctype,
-                             struct resource_entry *out_res_entry,
-                             int flags)
-{
-       u64 bytes_remaining;
-       u64 original_size;
-       u64 old_compressed_size;
-       u64 new_compressed_size;
-       u64 offset;
-       int ret;
-       struct chunk_table *chunk_tab = NULL;
-       bool raw;
-       off_t file_offset;
-#ifdef WITH_NTFS_3G
-       ntfs_inode *ni = NULL;
-#endif
-
-       wimlib_assert(lte);
-
-       /* Original size of the resource */
-       original_size = wim_resource_size(lte);
-
-       /* Compressed size of the resource (as it exists now) */
-       old_compressed_size = wim_resource_compressed_size(lte);
-
-       /* Current offset in output file */
-       file_offset = ftello(out_fp);
-       if (file_offset == -1) {
-               ERROR_WITH_ERRNO("Failed to get offset in output "
-                                "stream");
-               return WIMLIB_ERR_WRITE;
-       }
-
-       /* Are the compression types the same?  If so, do a raw copy (copy
-        * without decompressing and recompressing the data). */
-       raw = (wim_resource_compression_type(lte) == out_ctype
-              && out_ctype != WIM_COMPRESSION_TYPE_NONE);
-
-       if (raw) {
-               flags |= WIMLIB_RESOURCE_FLAG_RAW;
-               bytes_remaining = old_compressed_size;
-       } else {
-               flags &= ~WIMLIB_RESOURCE_FLAG_RAW;
-               bytes_remaining = original_size;
-       }
-
-       /* Empty resource; nothing needs to be done, so just return success. */
-       if (bytes_remaining == 0)
-               return 0;
-
-       /* Buffer for reading chunks for the resource */
-       u8 buf[min(WIM_CHUNK_SIZE, bytes_remaining)];
-
-       /* If we are writing a compressed resource and not doing a raw copy, we
-        * need to initialize the chunk table */
-       if (out_ctype != WIM_COMPRESSION_TYPE_NONE && !raw) {
-               ret = begin_wim_resource_chunk_tab(lte, out_fp, file_offset,
-                                                  &chunk_tab);
-               if (ret != 0)
-                       goto out;
-       }
-
-       /* If the WIM resource is in an external file, open a FILE * to it so we
-        * don't have to open a temporary one in read_wim_resource() for each
-        * chunk. */
-       if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
-            && !lte->file_on_disk_fp)
-       {
-               wimlib_assert(lte->file_on_disk);
-               lte->file_on_disk_fp = fopen(lte->file_on_disk, "rb");
-               if (!lte->file_on_disk_fp) {
-                       ERROR_WITH_ERRNO("Failed to open the file `%s' for "
-                                        "reading", lte->file_on_disk);
-                       ret = WIMLIB_ERR_OPEN;
-                       goto out;
-               }
-       }
-#ifdef WITH_NTFS_3G
-       else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME
-                 && !lte->attr)
-       {
-               struct ntfs_location *loc = lte->ntfs_loc;
-               wimlib_assert(loc);
-               ni = ntfs_pathname_to_inode(*loc->ntfs_vol_p, NULL, loc->path_utf8);
-               if (!ni) {
-                       ERROR_WITH_ERRNO("Failed to open inode `%s' in NTFS "
-                                        "volume", loc->path_utf8);
-                       ret = WIMLIB_ERR_NTFS_3G;
-                       goto out;
-               }
-               lte->attr = ntfs_attr_open(ni,
-                                          loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
-                                          (ntfschar*)loc->stream_name_utf16,
-                                          loc->stream_name_utf16_num_chars);
-               if (!lte->attr) {
-                       ERROR_WITH_ERRNO("Failed to open attribute of `%s' in "
-                                        "NTFS volume", loc->path_utf8);
-                       ret = WIMLIB_ERR_NTFS_3G;
-                       goto out_fclose;
-               }
-       }
-#endif
-
-       /* If we aren't doing a raw copy, we will compute the SHA1 message
-        * digest of the resource as we read it, and verify it's the same as the
-        * hash given in the lookup table entry once we've finished reading the
-        * resource. */
-       SHA_CTX ctx;
-       if (!raw)
-               sha1_init(&ctx);
-
-       /* While there are still bytes remaining in the WIM resource, read a
-        * chunk of the resource, update SHA1, then write that chunk using the
-        * desired compression type. */
-       offset = 0;
-       do {
-               u64 to_read = min(bytes_remaining, WIM_CHUNK_SIZE);
-               ret = read_wim_resource(lte, buf, to_read, offset, flags);
-               if (ret != 0)
-                       goto out_fclose;
-               if (!raw)
-                       sha1_update(&ctx, buf, to_read);
-               ret = write_wim_resource_chunk(buf, to_read, out_fp,
-                                              out_ctype, chunk_tab);
-               if (ret != 0)
-                       goto out_fclose;
-               bytes_remaining -= to_read;
-               offset += to_read;
-       } while (bytes_remaining);
-
-       /* Raw copy:  The new compressed size is the same as the old compressed
-        * size
-        *
-        * Using WIM_COMPRESSION_TYPE_NONE:  The new compressed size is the
-        * original size
-        *
-        * Using a different compression type:  Call
-        * finish_wim_resource_chunk_tab() and it will provide the new
-        * compressed size.
-        */
-       if (raw) {
-               new_compressed_size = old_compressed_size;
-       } else {
-               if (out_ctype == WIM_COMPRESSION_TYPE_NONE)
-                       new_compressed_size = original_size;
-               else {
-                       ret = finish_wim_resource_chunk_tab(chunk_tab, out_fp,
-                                                           &new_compressed_size);
-                       if (ret != 0)
-                               goto out_fclose;
-               }
-       }
-
-       /* Verify SHA1 message digest of the resource, unless we are doing a raw
-        * write (in which case we never even saw the uncompressed data).  Or,
-        * if the hash we had before is all 0's, just re-set it to be the new
-        * hash. */
-       if (!raw) {
-               u8 md[SHA1_HASH_SIZE];
-               sha1_final(md, &ctx);
-               if (is_zero_hash(lte->hash)) {
-                       copy_hash(lte->hash, md);
-               } else if (!hashes_equal(md, lte->hash)) {
-                       ERROR("WIM resource has incorrect hash!");
-                       if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK) {
-                               ERROR("We were reading it from `%s'; maybe it changed "
-                                     "while we were reading it.",
-                                     lte->file_on_disk);
-                       }
-                       ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
-                       goto out_fclose;
-               }
-       }
-
-       if (!raw && new_compressed_size >= original_size &&
-           out_ctype != WIM_COMPRESSION_TYPE_NONE)
-       {
-               /* Oops!  We compressed the resource to larger than the original
-                * size.  Write the resource uncompressed instead. */
-               if (fseeko(out_fp, file_offset, SEEK_SET) != 0) {
-                       ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" "
-                                        "of output WIM file", file_offset);
-                       ret = WIMLIB_ERR_WRITE;
-                       goto out_fclose;
-               }
-               ret = write_wim_resource(lte, out_fp, WIM_COMPRESSION_TYPE_NONE,
-                                        out_res_entry, flags);
-               if (ret != 0)
-                       goto out_fclose;
-               if (fflush(out_fp) != 0) {
-                       ERROR_WITH_ERRNO("Failed to flush output WIM file");
-                       ret = WIMLIB_ERR_WRITE;
-                       goto out_fclose;
-               }
-               if (ftruncate(fileno(out_fp), file_offset + out_res_entry->size) != 0) {
-                       ERROR_WITH_ERRNO("Failed to truncate output WIM file");
-                       ret = WIMLIB_ERR_WRITE;
-                       goto out_fclose;
-               }
-       } else {
-               if (out_res_entry) {
-                       out_res_entry->size          = new_compressed_size;
-                       out_res_entry->original_size = original_size;
-                       out_res_entry->offset        = file_offset;
-                       out_res_entry->flags         = lte->resource_entry.flags
-                                                       & ~WIM_RESHDR_FLAG_COMPRESSED;
-                       if (out_ctype != WIM_COMPRESSION_TYPE_NONE)
-                               out_res_entry->flags |= WIM_RESHDR_FLAG_COMPRESSED;
-               }
-       }
-       ret = 0;
-out_fclose:
-       if (lte->resource_location == RESOURCE_IN_FILE_ON_DISK
-           && lte->file_on_disk_fp) {
-               fclose(lte->file_on_disk_fp);
-               lte->file_on_disk_fp = NULL;
-       }
-#ifdef WITH_NTFS_3G
-       else if (lte->resource_location == RESOURCE_IN_NTFS_VOLUME) {
-               if (lte->attr) {
-                       ntfs_attr_close(lte->attr);
-                       lte->attr = NULL;
-               }
-               if (ni)
-                       ntfs_inode_close(ni);
-       }
-#endif
-out:
-       FREE(chunk_tab);
-       return ret;
-}
-
 /* Like write_wim_resource(), but the resource is specified by a buffer of
  * uncompressed data rather a lookup table entry; also writes the SHA1 hash of
  * the buffer to @hash.  */
@@ -1154,39 +711,6 @@ int copy_resource(struct lookup_table_entry *lte, void *wim)
        return 0;
 }
 
-/*
- * Writes a dentry's resources, including the main file resource as well as all
- * alternate data streams, to the output file.
- *
- * @dentry:  The dentry for the file.
- * @wim_p:   A pointer to the WIMStruct containing @dentry.
- *
- * @return zero on success, nonzero on failure.
- */
-int write_dentry_resources(struct dentry *dentry, void *wim_p)
-{
-       WIMStruct *w = wim_p;
-       int ret = 0;
-       struct lookup_table_entry *lte;
-       int ctype = wimlib_get_compression_type(w);
-
-       if (w->write_flags & WIMLIB_WRITE_FLAG_VERBOSE) {
-               wimlib_assert(dentry->full_path_utf8);
-               printf("Writing streams for `%s'\n", dentry->full_path_utf8);
-       }
-
-       for (unsigned i = 0; i <= dentry->d_inode->num_ads; i++) {
-               lte = inode_stream_lte(dentry->d_inode, i, w->lookup_table);
-               if (lte && ++lte->out_refcnt == 1) {
-                       ret = write_wim_resource(lte, w->out_fp, ctype,
-                                                &lte->output_resource_entry, 0);
-                       if (ret != 0)
-                               break;
-               }
-       }
-       return ret;
-}
-
 /*
  * Reads the metadata metadata resource from the WIM file.  The metadata
  * resource consists of the security data, followed by the directory entry for