Stream and blob updates
authorEric Biggers <ebiggers3@gmail.com>
Tue, 17 Mar 2015 03:17:15 +0000 (22:17 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Thu, 26 Mar 2015 00:56:34 +0000 (19:56 -0500)
- Rename "lookup table entry" to "blob descriptor"
- Rename "lookup table" to "blob table"
- Use single array for all an inode's streams
- Explicitly annotate each stream with its type
- Account for fact that EFSRPC raw data includes all data streams
- Other cleanups

62 files changed:
Makefile.am
doc/man1/wimlib-imagex-info.1
doc/man1/wimlib-imagex-verify.1
include/wimlib.h
include/wimlib/apply.h
include/wimlib/blob_table.h [new file with mode: 0644]
include/wimlib/capture.h
include/wimlib/dentry.h
include/wimlib/encoding.h
include/wimlib/header.h
include/wimlib/inode.h
include/wimlib/integrity.h
include/wimlib/lookup_table.h [deleted file]
include/wimlib/metadata.h
include/wimlib/ntfs_3g.h
include/wimlib/reparse.h
include/wimlib/resource.h
include/wimlib/solid.h
include/wimlib/util.h
include/wimlib/wim.h
include/wimlib/wimboot.h
include/wimlib/win32.h
include/wimlib/wof.h
include/wimlib/write.h
programs/imagex.c
src/add_image.c
src/blob_table.c [new file with mode: 0644]
src/capture_common.c
src/delete_image.c
src/dentry.c
src/encoding.c
src/export_image.c
src/extract.c
src/header.c
src/inode.c
src/inode_fixup.c
src/inode_table.c
src/integrity.c
src/iterate_dir.c
src/lookup_table.c [deleted file]
src/metadata_resource.c
src/mount_image.c
src/ntfs-3g_apply.c
src/ntfs-3g_capture.c
src/paths.c
src/reference.c
src/reparse.c
src/resource.c
src/solid.c
src/split.c
src/template.c
src/unix_apply.c
src/unix_capture.c
src/update_image.c
src/util.c
src/verify.c
src/wim.c
src/wimboot.c
src/win32_apply.c
src/win32_capture.c
src/write.c
src/xml.c

index 84139fc..23ccd85 100644 (file)
@@ -36,6 +36,7 @@ wimlib.pc: config.status
 libwim_la_SOURCES =            \
        src/add_image.c         \
        src/avl_tree.c          \
+       src/blob_table.c        \
        src/capture_common.c    \
        src/compress.c          \
        src/compress_common.c   \
@@ -59,7 +60,6 @@ libwim_la_SOURCES =           \
        src/iterate_dir.c       \
        src/join.c              \
        src/lcpit_matchfinder.c \
-       src/lookup_table.c      \
        src/lzms_common.c       \
        src/lzms_compress.c     \
        src/lzms_decompress.c   \
index cc6e3f9..a952a2f 100644 (file)
@@ -52,8 +52,9 @@ byte-order mark.
 \fB--header\fR
 Shows detailed information from the WIM header.
 .TP
-\fB--lookup-table\fR
-Prints all the entries in the stream lookup table of the WIM.
+\fB--blobs\fR
+Prints information about all the blobs ("file data") in the WIM.  A WIM file
+stores only one copy of each unique blob.
 .TP
 \fB--xml\fR
 Prints the raw XML data from the WIM.  Note: the XML data will be encoded using
index 68a6658..a889829 100644 (file)
@@ -12,7 +12,7 @@ Specifically, this command performs the following verifications on the WIM
 archive:
 .IP \[bu] 4
 Verify that the WIM file can be successfully opened, which involves parsing the
-header, lookup table, and XML data.
+header, blob table, and XML data.
 .IP \[bu]
 If the WIM archive contains an integrity table, verify the integrity of the
 entire WIM archive.  Otherwise, print a warning.
index 8c904c5..df0ec28 100644 (file)
@@ -1449,7 +1449,11 @@ struct wimlib_resource_entry {
         * solid resource in the WIM.  */
        uint64_t raw_resource_compressed_size;
 
-       uint64_t reserved[2];
+       /** If @p packed is 1, then this will specify the uncompressed size of
+        * the solid resource in the WIM.  */
+       uint64_t raw_resource_uncompressed_size;
+
+       uint64_t reserved[1];
 };
 
 /**
index 54850fe..3d7873e 100644 (file)
@@ -31,8 +31,8 @@ struct wim_features {
        unsigned long case_sensitive_filenames;
 };
 
-struct wim_lookup_table_entry;
-struct read_stream_list_callbacks;
+struct blob_descriptor;
+struct read_blob_list_callbacks;
 struct apply_operations;
 struct wim_dentry;
 
@@ -67,20 +67,20 @@ struct apply_ctx {
        const struct apply_operations *apply_ops;
        u64 next_progress;
        unsigned long invalid_sequence;
-       unsigned long num_streams_remaining;
-       struct list_head stream_list;
-       const struct read_stream_list_callbacks *saved_cbs;
-       struct wim_lookup_table_entry *cur_stream;
-       u64 cur_stream_offset;
+       unsigned long num_blobs_remaining;
+       struct list_head blob_list;
+       const struct read_blob_list_callbacks *saved_cbs;
+       struct blob_descriptor *cur_blob;
+       u64 cur_blob_offset;
        struct filedes tmpfile_fd;
        tchar *tmpfile_name;
        unsigned int count_until_file_progress;
 };
 
 /* Maximum number of UNIX file descriptors, NTFS attributes, or Windows file
- * handles that can be opened simultaneously to extract a single-instance
- * stream to multiple destinations.  */
-#define MAX_OPEN_STREAMS 512
+ * handles that can be opened simultaneously to extract a blob to multiple
+ * destinations.  */
+#define MAX_OPEN_FILES 512
 
 static inline int
 extract_progress(struct apply_ctx *ctx, enum wimlib_progress_msg msg)
@@ -108,14 +108,14 @@ start_file_structure_phase(struct apply_ctx *ctx, uint64_t end_file_count);
 extern int
 start_file_metadata_phase(struct apply_ctx *ctx, uint64_t end_file_count);
 
-/* Report that a file was created, prior to stream extraction.  */
+/* Report that a file was created, prior to blob extraction.  */
 static inline int
 report_file_created(struct apply_ctx *ctx)
 {
        return maybe_do_file_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE);
 }
 
-/* Report that file metadata was applied, after stream extraction.  */
+/* Report that file metadata was applied, after blob extraction.  */
 static inline int
 report_file_metadata_applied(struct apply_ctx *ctx)
 {
@@ -140,8 +140,8 @@ report_apply_error(struct apply_ctx *ctx, int error_code, const tchar *path)
                         struct wim_dentry, d_extraction_alias_node)
 
 extern int
-extract_stream_list(struct apply_ctx *ctx,
-                   const struct read_stream_list_callbacks *cbs);
+extract_blob_list(struct apply_ctx *ctx,
+                   const struct read_blob_list_callbacks *cbs);
 
 /*
  * Represents an extraction backend.
@@ -205,9 +205,9 @@ struct apply_operations {
         * dentries of that inode being extracted.  This will be a (possibly
         * nonproper) subset of the 'd_inode->i_dentry' list.
         *
-        * The streams required to be extracted will already be prepared in
-        * 'apply_ctx'.  The extraction backend should call
-        * extract_stream_list() to extract them.
+        * The blobs required to be extracted will already be prepared in
+        * 'apply_ctx'.  The extraction backend should call extract_blob_list()
+        * to extract them.
         *
         * The will_extract_dentry() utility function, given an arbitrary dentry
         * in the WIM image (which may not be in the extraction list), can be
@@ -221,7 +221,7 @@ struct apply_operations {
         * Query whether the unnamed data stream of the specified file will be
         * extracted as "externally backed".  If so, the extraction backend is
         * assumed to handle this separately, and the common extraction code
-        * will not register a usage of that stream.
+        * will not register a usage of the unnamed data stream's blob.
         *
         * This routine is optional.
         *
diff --git a/include/wimlib/blob_table.h b/include/wimlib/blob_table.h
new file mode 100644 (file)
index 0000000..c07acfa
--- /dev/null
@@ -0,0 +1,400 @@
+#ifndef _WIMLIB_BLOB_TABLE_H
+#define _WIMLIB_BLOB_TABLE_H
+
+#include "wimlib/list.h"
+#include "wimlib/resource.h"
+#include "wimlib/sha1.h"
+#include "wimlib/types.h"
+
+/* An enumerated type that identifies where a blob's data is located.  */
+enum blob_location {
+
+       /* The blob's data does not exist.  This is a temporary state only.  */
+       BLOB_NONEXISTENT = 0,
+
+       /* The blob's data is located in a WIM resource identified by the
+        * `struct wim_resource_descriptor' pointed to by @rdesc.
+        * @offset_in_res identifies the offset at which this particular blob
+        * begins in the uncompressed data of the resource.  */
+       BLOB_IN_WIM,
+
+       /* The blob's data is available as the contents of the file named by
+        * @file_on_disk.  */
+       BLOB_IN_FILE_ON_DISK,
+
+       /* The blob's data is available as the contents of the in-memory buffer
+        * pointed to by @attached_buffer.  */
+       BLOB_IN_ATTACHED_BUFFER,
+
+#ifdef WITH_FUSE
+       /* The blob's data is available as the contents of the file with name
+        * @staging_file_name relative to the open directory file descriptor
+        * @staging_dir_fd.  */
+       BLOB_IN_STAGING_FILE,
+#endif
+
+#ifdef WITH_NTFS_3G
+       /* The blob's data is available as the contents of an NTFS attribute
+        * accessible through libntfs-3g.  The attribute is identified by
+        * volume, path to an inode, attribute name, and attribute type.
+        * @ntfs_loc points to a structure containing this information.  */
+       BLOB_IN_NTFS_VOLUME,
+#endif
+
+#ifdef __WIN32__
+       /* Windows only: the blob's data is available as the contents of the
+        * data stream named by @file_on_disk.  @file_on_disk is an NT namespace
+        * path that may be longer than the Win32-level MAX_PATH.  Furthermore,
+        * the stream may require "backup semantics" to access.  */
+       BLOB_IN_WINNT_FILE_ON_DISK,
+
+       /* Windows only: the blob's data is available as the raw encrypted data
+        * of the external file named by @file_on_disk.  @file_on_disk is a
+        * Win32 namespace path.  */
+       BLOB_WIN32_ENCRYPTED,
+#endif
+};
+
+/* A "blob target" is a stream, and the inode to which that stream belongs, to
+ * which a blob needs to be extracted as part of an extraction operation.  Since
+ * blobs are single-instanced, a blob may have multiple targets.  */
+struct blob_extraction_target {
+       struct wim_inode *inode;
+       struct wim_inode_stream *stream;
+};
+
+/*
+ * Descriptor for a blob, which is a known length sequence of binary data.
+ *
+ * Within a WIM file, blobs are single instanced and are identified by SHA-1
+ * message digest.
+ */
+struct blob_descriptor {
+
+       /* List node for a hash bucket of the blob table  */
+       struct hlist_node hash_list;
+
+       /* Uncompressed size of this blob  */
+       u64 size;
+
+       /* One of the `enum blob_location' values documented above.  */
+       u32 blob_location : 4;
+
+       /* Blob flags (WIM_RESHDR_FLAG_*)  */
+       u32 flags : 8;
+
+       /* 1 iff the SHA-1 message digest of this blob is unknown.  */
+       u32 unhashed : 1;
+
+       /* Temporary fields used when writing blobs; set as documented for
+        * prepare_blob_list_for_write().  */
+       u32 unique_size : 1;
+       u32 will_be_in_output_wim : 1;
+
+       /* Set to 1 if this blob represents a metadata resource that has been
+        * changed.  In such cases, the hash cannot be used to verify the data
+        * if the metadata resource is read again.  (This could be avoided if we
+        * used separate fields for input/output checksum, but most blobs
+        * wouldn't need this.)  */
+       u32 dont_check_metadata_hash : 1;
+
+       u32 may_send_done_with_file : 1;
+
+       /* Only used by wimlib_export_image() */
+       u32 was_exported : 1;
+
+       union {
+               /*
+                * For unhashed == 0: 'hash' is the SHA-1 message digest of the
+                * blob's data.  'hash_short' allows accessing just a prefix of
+                * the SHA-1 message digest, which is useful for getting a "hash
+                * code" for hash table lookup/insertion.
+                */
+               u8 hash[SHA1_HASH_SIZE];
+               size_t hash_short;
+
+               /* For unhashed == 1: these variables make it possible to find
+                * the stream that references this blob.  There can be at most
+                * one such reference, since duplicate blobs can only be joined
+                * after they have been hashed.  */
+               struct {
+                       struct wim_inode *back_inode;
+                       u32 back_stream_id;
+               };
+       };
+
+       /* Number of times this blob is referenced by file streams in WIM
+        * images.  See blob_decrement_refcnt() for information about the
+        * limitations of this field.  */
+       u32 refcnt;
+
+       /*
+        * When a WIM file is written, this is set to the number of references
+        * (from file streams) to this blob in the output WIM file.
+        *
+        * During extraction, this is set to the number of targets to which this
+        * blob is being extracted.
+        *
+        * During image export, this is set to the number of references of this
+        * blob that originated from the source WIM.
+        *
+        * When mounting a WIM image read-write, this is set to the number of
+        * extra references to this blob preemptively taken to allow later
+        * saving the modified image as a new image and leaving the original
+        * image alone.
+        */
+       u32 out_refcnt;
+
+#ifdef WITH_FUSE
+       /* Number of open file descriptors to this blob during a FUSE mount of
+        * the containing image.  */
+       u16 num_opened_fds;
+#endif
+
+       /* Specification of where this blob's data is located.  Which member of
+        * this union is valid is determined by the @blob_location field.  */
+       union {
+               /* BLOB_IN_WIM  */
+               struct {
+                       struct wim_resource_descriptor *rdesc;
+                       u64 offset_in_res;
+               };
+
+               /* BLOB_IN_FILE_ON_DISK
+                * BLOB_IN_WINNT_FILE_ON_DISK
+                * BLOB_WIN32_ENCRYPTED  */
+               struct {
+                       tchar *file_on_disk;
+                       struct wim_inode *file_inode;
+               };
+
+               /* BLOB_IN_ATTACHED_BUFFER */
+               void *attached_buffer;
+
+#ifdef WITH_FUSE
+               /* BLOB_IN_STAGING_FILE  */
+               struct {
+                       char *staging_file_name;
+                       int staging_dir_fd;
+               };
+#endif
+
+#ifdef WITH_NTFS_3G
+               /* BLOB_IN_NTFS_VOLUME  */
+               struct ntfs_location *ntfs_loc;
+#endif
+       };
+
+       /* Links together blobs that share the same underlying WIM resource.
+        * The head is the 'blob_list' member of
+        * 'struct wim_resource_descriptor'.  */
+       struct list_head rdesc_node;
+
+       /* Temporary fields  */
+       union {
+               /* Fields used temporarily during WIM file writing.  */
+               struct {
+                       union {
+                               /* List node used for blob size table.  */
+                               struct hlist_node hash_list_2;
+
+                               /* Metadata for the underlying solid resource in
+                                * the WIM being written (only valid if
+                                * WIM_RESHDR_FLAG_SOLID set in
+                                * out_reshdr.flags).  */
+                               struct {
+                                       u64 out_res_offset_in_wim;
+                                       u64 out_res_size_in_wim;
+                                       u64 out_res_uncompressed_size;
+                               };
+                       };
+
+                       /* Links blobs being written to the WIM.  */
+                       struct list_head write_blobs_list;
+
+                       union {
+                               /* Metadata for this blob in the WIM being
+                                * written.  */
+                               struct wim_reshdr out_reshdr;
+
+                               struct {
+                                       /* Name under which this blob is being
+                                        * sorted; used only when sorting blobs
+                                        * for solid compression.  */
+                                       utf16lechar *solid_sort_name;
+                                       size_t solid_sort_name_nbytes;
+                               };
+                       };
+               };
+
+               /* Used temporarily during extraction.  This is an array of
+                * references to the streams being extracted that use this blob.
+                * out_refcnt tracks the number of slots filled.  */
+               union {
+                       struct blob_extraction_target inline_blob_extraction_targets[3];
+                       struct {
+                               struct blob_extraction_target *blob_extraction_targets;
+                               u32 alloc_blob_extraction_targets;
+                       };
+               };
+       };
+
+       /* Temporary list fields.  */
+       union {
+               /* Links blobs for writing blob table.  */
+               struct list_head blob_table_list;
+
+               /* Links blobs being extracted.  */
+               struct list_head extraction_list;
+
+               /* Links blobs being exported.  */
+               struct list_head export_blob_list;
+
+               /* Links original list of blobs in the read-write mounted image.  */
+               struct list_head orig_blob_list;
+       };
+
+       /* Links blobs that are still unhashed after being been added to a WIM.
+        */
+       struct list_head unhashed_list;
+};
+
+extern struct blob_table *
+new_blob_table(size_t capacity) _malloc_attribute;
+
+extern void
+free_blob_table(struct blob_table *table);
+
+extern int
+read_blob_table(WIMStruct *wim);
+
+extern int
+write_blob_table_from_blob_list(struct list_head *blob_list,
+                               struct filedes *out_fd,
+                               u16 part_number,
+                               struct wim_reshdr *out_reshdr,
+                               int write_resource_flags);
+
+extern struct blob_descriptor *
+new_blob_descriptor(void) _malloc_attribute;
+
+extern struct blob_descriptor *
+clone_blob_descriptor(const struct blob_descriptor *blob)
+                       _malloc_attribute;
+
+extern void
+blob_decrement_refcnt(struct blob_descriptor *blob,
+                     struct blob_table *table);
+#ifdef WITH_FUSE
+extern void
+blob_decrement_num_opened_fds(struct blob_descriptor *blob);
+#endif
+
+extern void
+free_blob_descriptor(struct blob_descriptor *blob);
+
+extern void
+blob_table_insert(struct blob_table *table, struct blob_descriptor *blob);
+
+extern void
+blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob);
+
+extern struct blob_descriptor *
+lookup_blob(const struct blob_table *table, const u8 *hash);
+
+extern int
+for_blob_in_table(struct blob_table *table,
+                 int (*visitor)(struct blob_descriptor *, void *), void *arg);
+
+extern int
+for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
+                                            int (*visitor)(struct blob_descriptor *, void *),
+                                            void *arg);
+
+struct wimlib_resource_entry;
+
+extern void
+blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
+                             struct wimlib_resource_entry *wentry);
+
+extern int
+sort_blob_list(struct list_head *blob_list,
+              size_t list_head_offset,
+              int (*compar)(const void *, const void*));
+
+extern int
+sort_blob_list_by_sequential_order(struct list_head *blob_list,
+                                  size_t list_head_offset);
+
+extern int
+cmp_blobs_by_sequential_order(const void *p1, const void *p2);
+
+static inline bool
+blob_is_in_solid_wim_resource(const struct blob_descriptor *blob)
+{
+       return blob->blob_location == BLOB_IN_WIM &&
+              blob->size != blob->rdesc->uncompressed_size;
+}
+
+static inline bool
+blob_is_in_file(const struct blob_descriptor *blob)
+{
+       return blob->blob_location == BLOB_IN_FILE_ON_DISK
+#ifdef __WIN32__
+           || blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK
+           || blob->blob_location == BLOB_WIN32_ENCRYPTED
+#endif
+          ;
+}
+
+static inline const struct blob_extraction_target *
+blob_extraction_targets(struct blob_descriptor *blob)
+{
+       if (blob->out_refcnt <= ARRAY_LEN(blob->inline_blob_extraction_targets))
+               return blob->inline_blob_extraction_targets;
+       else
+               return blob->blob_extraction_targets;
+}
+
+static inline void
+blob_set_is_located_in_wim_resource(struct blob_descriptor *blob,
+                                   struct wim_resource_descriptor *rdesc)
+{
+       blob->blob_location = BLOB_IN_WIM;
+       blob->rdesc = rdesc;
+       list_add_tail(&blob->rdesc_node, &rdesc->blob_list);
+}
+
+static inline void
+blob_unset_is_located_in_wim_resource(struct blob_descriptor *blob)
+{
+       list_del(&blob->rdesc_node);
+       blob->blob_location = BLOB_NONEXISTENT;
+}
+
+extern struct blob_descriptor *
+new_blob_from_data_buffer(const void *buffer, size_t size,
+                         struct blob_table *blob_table);
+
+extern int
+hash_unhashed_blob(struct blob_descriptor *blob,
+                  struct blob_table *blob_table,
+                  struct blob_descriptor **blob_ret);
+
+extern struct blob_descriptor **
+retrieve_pointer_to_unhashed_blob(struct blob_descriptor *blob);
+
+static inline void
+prepare_unhashed_blob(struct blob_descriptor *blob,
+                     struct wim_inode *back_inode, u32 stream_id,
+                     struct list_head *unhashed_blobs)
+{
+       if (!blob)
+               return;
+       blob->unhashed = 1;
+       blob->back_inode = back_inode;
+       blob->back_stream_id = stream_id;
+       list_add_tail(&blob->unhashed_list, unhashed_blobs);
+}
+
+#endif /* _WIMLIB_BLOB_TABLE_H */
index ae2640a..7b352dc 100644 (file)
@@ -9,7 +9,7 @@
 #include "wimlib/textfile.h"
 #include "wimlib/util.h"
 
-struct wim_lookup_table;
+struct blob_table;
 struct wim_dentry;
 struct wim_inode;
 
@@ -22,12 +22,12 @@ struct capture_config {
 /* Common parameters to implementations of building an in-memory dentry tree
  * from an on-disk directory structure. */
 struct capture_params {
-       /* Pointer to the lookup table of the WIM.  */
-       struct wim_lookup_table *lookup_table;
+       /* Pointer to the blob table of the WIM.  */
+       struct blob_table *blob_table;
 
-       /* List of streams that have been added so far, but without their SHA1
+       /* List of blobs that have been added so far, but without their SHA-1
         * message digests being calculated (as a shortcut).  */
-       struct list_head *unhashed_streams;
+       struct list_head *unhashed_blobs;
 
        /* Hash table of inodes that have been captured for this tree so far. */
        struct wim_inode_table *inode_table;
index 05cb8c3..241dc56 100644 (file)
@@ -9,11 +9,11 @@
 #include "wimlib/types.h"
 
 struct wim_inode;
-struct wim_lookup_table;
+struct blob_table;
 
 /* Base size of a WIM dentry in the on-disk format, up to and including the file
  * name length.  This does not include the variable-length file name, short
- * name, alternate data stream entries, and padding to 8-byte boundaries.  */
+ * name, extra stream entries, and padding to 8-byte boundaries.  */
 #define WIM_DENTRY_DISK_SIZE 102
 
 /*
@@ -144,7 +144,7 @@ will_extract_dentry(const struct wim_dentry *dentry)
        return dentry->d_extraction_list_node.next != NULL;
 }
 
-extern u64
+extern size_t
 dentry_out_total_length(const struct wim_dentry *dentry);
 
 extern int
@@ -250,7 +250,7 @@ free_dentry(struct wim_dentry *dentry);
 
 extern void
 free_dentry_tree(struct wim_dentry *root,
-                struct wim_lookup_table *lookup_table);
+                struct blob_table *blob_table);
 
 extern void
 unlink_dentry(struct wim_dentry *dentry);
index f9e5f26..c3b7fb8 100644 (file)
@@ -37,6 +37,15 @@ varname1##_to_##varname2##_buf(const chartype1 *in, size_t in_nbytes,        \
 extern utf16lechar *
 utf16le_dupz(const void *ustr, size_t usize);
 
+extern utf16lechar *
+utf16le_dup(const utf16lechar *ustr);
+
+extern size_t
+utf16le_len_bytes(const utf16lechar *s);
+
+extern size_t
+utf16le_len_chars(const utf16lechar *s);
+
 #if !TCHAR_IS_UTF16LE
 DECLARE_CHAR_CONVERSION_FUNCTIONS(utf16le, tstr, utf16lechar, tchar);
 DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf16le, tchar, utf16lechar);
@@ -72,6 +81,10 @@ cmp_utf16le_strings(const utf16lechar *s1, size_t n1,
                    const utf16lechar *s2, size_t n2,
                    bool ignore_case);
 
+extern int
+cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2,
+                     bool ignore_case);
+
 /* Convert a string in the platform-dependent encoding to UTF-16LE, but if both
  * encodings are UTF-16LE, simply re-use the string.  Release with
  * tstr_put_utf16le() when done.  */
index 6c522fa..feb3176 100644 (file)
@@ -16,7 +16,7 @@
 /* Default WIM version number.  Streams are always compressed independently.  */
 #define WIM_VERSION_DEFAULT 0x10d00
 
-/* Version number used for WIMs that allow multiple streams combined into one
+/* Version number used for WIMs that allow multiple blobs combined into one
  * resource ("solid resources", marked by WIM_RESHDR_FLAG_SOLID) and also a new
  * compression format (LZMS).  This version is new as of Windows 8 WIMGAPI.
  * Although it is used by Windows 8 web downloader, it is not yet documented by
@@ -87,8 +87,8 @@ struct wim_header_disk {
         * least 1.  wimlib allows 0.  */
        u32 image_count;
 
-       /* +0x30: Location and size of the WIM's lookup table.  */
-       struct wim_reshdr_disk lookup_table_reshdr;
+       /* +0x30: Location and size of the WIM's blob table.  */
+       struct wim_reshdr_disk blob_table_reshdr;
 
        /* +0x48: Location and size of the WIM's XML data.  */
        struct wim_reshdr_disk xml_data_reshdr;
@@ -128,7 +128,7 @@ struct wim_header {
        u16 part_number;
        u16 total_parts;
        u32 image_count;
-       struct wim_reshdr lookup_table_reshdr;
+       struct wim_reshdr blob_table_reshdr;
        struct wim_reshdr xml_data_reshdr;
        struct wim_reshdr boot_metadata_reshdr;
        u32 boot_idx;
@@ -153,14 +153,14 @@ struct wim_header {
 /* The WIM is part of a split WIM.  */
 #define WIM_HDR_FLAG_SPANNED            0x00000008
 
-/* All streams included in the WIM's lookup table are non-metadata (do not have
+/* All blobs included in the WIM's blob table are non-metadata (do not have
  * WIM_RESHDR_FLAG_METADATA set).  wimlib ignores this flag and clears it on new
  * WIM files it writes.  */
 #define WIM_HDR_FLAG_RESOURCE_ONLY      0x00000010
 
-/* All streams included in the WIM's lookup table are metadata (have
- * WIM_RESHDR_FLAG_METADATA set).  wimlib ignores this flag and clears it on
- * new WIM files it writes.  */
+/* All blobs included in the WIM's blob table are metadata (have
+ * WIM_RESHDR_FLAG_METADATA set).  wimlib ignores this flag and clears it on new
+ * WIM files it writes.  */
 #define WIM_HDR_FLAG_METADATA_ONLY      0x00000020
 
 /* The WIM is currently being written or appended to.  */
index 11c2361..28285b2 100644 (file)
 #ifndef _WIMLIB_INODE_H
 #define _WIMLIB_INODE_H
 
+#include "wimlib/assert.h"
 #include "wimlib/list.h"
 #include "wimlib/sha1.h"
 #include "wimlib/types.h"
 
 struct avl_tree_node;
-struct wim_ads_entry;
+struct blob_descriptor;
+struct blob_table;
 struct wim_dentry;
-struct wim_lookup_table;
-struct wim_lookup_table_entry;
 struct wim_security_data;
 struct wimfs_fd;
 
+/* Valid values for the 'stream_type' field of a 'struct wim_inode_stream'  */
+enum wim_inode_stream_type {
+
+       /* Data stream, may be unnamed (usual case) or named  */
+       STREAM_TYPE_DATA,
+
+       /* Reparse point stream.  This is the same as the data of the on-disk
+        * reparse point attribute, except that the first 8 bytes of the on-disk
+        * attribute are omitted.  The omitted bytes contain the reparse tag
+        * (which is instead stored in the on-disk WIM dentry), the reparse data
+        * size (which is redundant with the stream size), and a reserved field
+        * that is always zero.  */
+       STREAM_TYPE_REPARSE_POINT,
+
+       /* Encrypted data in the "EFSRPC raw data format" specified by [MS-EFSR]
+        * section 2.2.3.  This contains metadata for the Encrypting File System
+        * as well as the encrypted data of all the file's data streams.  */
+       STREAM_TYPE_EFSRPC_RAW_DATA,
+
+       /* Stream type could not be determined  */
+       STREAM_TYPE_UNKNOWN,
+};
+
+extern const utf16lechar NO_STREAM_NAME[1];
+
 /*
- * WIM inode.
+ * 'struct wim_inode_stream' describes a "stream", which associates a blob of
+ * data with an inode.  Each stream has a type and optionally a name.
+ *
+ * The most frequently seen kind of stream is the "unnamed data stream"
+ * (stream_type == STREAM_TYPE_DATA && stream_name == NO_STREAM_NAME), which is
+ * the "default file contents".  Many inodes just have an unnamed data stream
+ * and no other streams.  However, files on NTFS filesystems may have
+ * additional, "named" data streams, and this is supported by the WIM format.
  *
- * As mentioned in the comment above `struct wim_dentry', in WIM files there
- * is no on-disk analogue of a real inode, as most of these fields are
- * duplicated in the dentries.  Instead, a `struct wim_inode' is something we
- * create ourselves to simplify the handling of hard links.
+ * A "reparse point" is an inode with reparse data set.  The reparse data is
+ * stored in a stream of type STREAM_TYPE_REPARSE_POINT.  There should be only
+ * one such stream, and it should be unnamed.  However, it is possible for an
+ * inode to have both a reparse point stream and an unnamed data stream, and
+ * even named data streams as well.
  */
-struct wim_inode {
-       /* If i_resolved == 0:
-        *      SHA1 message digest of the contents of the unnamed-data stream
-        *      of this inode.
-        *
-        * If i_resolved == 1:
-        *      Pointer to the lookup table entry for the unnamed data stream
-        *      of this inode, or NULL.
+struct wim_inode_stream {
+
+       /* The name of the stream or NO_STREAM_NAME.  */
+       utf16lechar *stream_name;
+
+       /*
+        * If 'stream_resolved' = 0, then 'stream_hash' is the SHA-1 message
+        * digest of the uncompressed data of this stream, or all zeroes if this
+        * stream is empty.
         *
-        * i_hash corresponds to the 'unnamed_stream_hash' field of the `struct
-        * wim_dentry_on_disk' and the additional caveats documented about that
-        * field apply here (for example, the quirks regarding all-zero hashes).
+        * If 'stream_resolved' = 1, then 'stream_blob' is a pointer directly to
+        * a descriptor for this stream's blob, or NULL if this stream is empty.
         */
        union {
-               u8 i_hash[SHA1_HASH_SIZE];
-               struct wim_lookup_table_entry *i_lte;
+               u8 _stream_hash[SHA1_HASH_SIZE];
+               struct blob_descriptor *_stream_blob;
        };
 
-       /* Corresponds to the 'attributes' field of `struct wim_dentry_on_disk';
-        * bitwise OR of the FILE_ATTRIBUTE_* flags that give the attributes of
-        * this inode. */
+       /* 'stream_resolved' determines whether 'stream_hash' or 'stream_blob'
+        * is valid as described above.  */
+       u32 stream_resolved : 1;
+
+       /* A unique identifier for this stream within the context of its inode.
+        * This stays constant even if the streams array is reallocated.  */
+       u32 stream_id : 28;
+
+       /* The type of this stream as one of the STREAM_TYPE_* values  */
+       u32 stream_type : 3;
+};
+
+/*
+ * WIM inode - a "file" in an image which may be accessible via multiple paths
+ *
+ * Note: in WIM files there is no true on-disk analogue of an inode; there are
+ * only directory entries, and some fields are duplicated among all links to a
+ * file.  However, wimlib uses inode structures internally to simplify handling
+ * of hard links.
+ */
+struct wim_inode {
+
+       /*
+        * The collection of streams for this inode.  'i_streams' points to
+        * either 'i_embedded_streams' or an allocated array.
+        */
+       struct wim_inode_stream *i_streams;
+       struct wim_inode_stream i_embedded_streams[1];
+       unsigned i_num_streams;
+
+       /* Windows file attribute flags (FILE_ATTRIBUTE_*).  */
        u32 i_attributes;
 
        /* Root of a balanced binary search tree storing the child directory
@@ -77,36 +138,15 @@ struct wim_inode {
        /* Number of dentries that are aliases for this inode.  */
        u32 i_nlink;
 
-       /* Number of alternate data streams (ADS) associated with this inode */
-       u16 i_num_ads;
-
-       /* Flag that indicates whether this inode's streams have been
-        * "resolved".  By default, the inode starts as "unresolved", meaning
-        * that the i_hash field, along with the hash field of any associated
-        * wim_ads_entry's, are valid and should be used as keys in the WIM
-        * lookup table to find the associated `struct wim_lookup_table_entry'.
-        * But if the inode has been resolved, then each of these fields is
-        * replaced with a pointer directly to the appropriate `struct
-        * wim_lookup_table_entry', or NULL if the stream is empty.  */
-       u8 i_resolved : 1;
-
        /* Flag used to mark this inode as visited; this is used when visiting
         * all the inodes in a dentry tree exactly once.  It will be 0 by
         * default and must be cleared following the tree traversal, even in
         * error paths.  */
        u8 i_visited : 1;
 
-       /* 1 iff all ADS entries of this inode are named or if this inode
-        * has no ADS entries  */
-       u8 i_canonical_streams : 1;
-
        /* Cached value  */
        u8 i_can_externally_back : 1;
 
-       /* Pointer to a malloc()ed array of i_num_ads alternate data stream
-        * entries for this inode.  */
-       struct wim_ads_entry *i_ads_entries;
-
        /* If not NULL, a pointer to the extra data that was read from the
         * dentry.  This should be a series of tagged items, each of which
         * represents a bit of extra metadata, such as the file's object ID.
@@ -180,8 +220,8 @@ struct wim_inode {
 
                /* Used during WIM writing with
                 * WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES:  the number
-                * of data streams this inode has that have not yet been fully
-                * read.  */
+                * of streams this inode has that have not yet been fully read.
+                * */
                u32 num_remaining_streams;
 
 #ifdef WITH_FUSE
@@ -204,68 +244,10 @@ struct wim_inode {
        u16 i_num_allocated_fds;
 #endif
 
-       /* Next alternate data stream ID to be assigned */
+       /* Next stream ID to be assigned  */
        u32 i_next_stream_id;
 };
 
-/* Alternate data stream entry.
- *
- * We read this from disk in the read_ads_entries() function; see that function
- * for more explanation. */
-struct wim_ads_entry {
-       union {
-               /* SHA-1 message digest of stream contents */
-               u8 hash[SHA1_HASH_SIZE];
-
-               /* The corresponding lookup table entry (only for resolved
-                * streams) */
-               struct wim_lookup_table_entry *lte;
-       };
-
-       /* Length of UTF16-encoded stream name, in bytes, not including the
-        * terminating null character; or 0 if the stream is unnamed. */
-       u16 stream_name_nbytes;
-
-       /* Number to identify an alternate data stream even after it's possibly
-        * been moved or renamed. */
-       u32 stream_id;
-
-       /* Stream name (UTF-16LE), null-terminated, or NULL if the stream is
-        * unnamed.  */
-       utf16lechar *stream_name;
-
-       /* Reserved field.  We read it into memory so we can write it out
-        * unchanged. */
-       u64 reserved;
-};
-
-/* WIM alternate data stream entry (on-disk format) */
-struct wim_ads_entry_on_disk {
-       /* Length of the entry, in bytes.  This includes all fixed-length
-        * fields, plus the stream name and null terminator if present, and the
-        * padding up to an 8 byte boundary.  wimlib is a little less strict
-        * when reading the entries, and only requires that the number of bytes
-        * from this field is at least as large as the size of the fixed length
-        * fields and stream name without null terminator.  */
-       le64 length;
-
-       le64 reserved;
-
-       /* SHA1 message digest of the uncompressed stream; or, alternatively,
-        * can be all zeroes if the stream has zero length.  */
-       u8 hash[SHA1_HASH_SIZE];
-
-       /* Length of the stream name, in bytes.  0 if the stream is unnamed.  */
-       le16 stream_name_nbytes;
-
-       /* Stream name in UTF-16LE.  It is @stream_name_nbytes bytes long,
-        * excluding the null terminator.  There is a null terminator character
-        * if @stream_name_nbytes != 0; i.e., if this stream is named.  */
-       utf16lechar stream_name[];
-} _packed_attribute;
-
-#define WIM_ADS_ENTRY_DISK_SIZE 38
-
 /*
  * Reparse tags documented at
  * http://msdn.microsoft.com/en-us/library/dd541667(v=prot.10).aspx
@@ -340,17 +322,6 @@ inode_is_directory(const struct wim_inode *inode)
                        == FILE_ATTRIBUTE_DIRECTORY;
 }
 
-/* Is the inode a directory with the encrypted attribute set?
- * This returns true for encrypted directories even if they have reparse data
- * (I'm not sure if such files can even exist!).  */
-static inline bool
-inode_is_encrypted_directory(const struct wim_inode *inode)
-{
-       return ((inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
-                                       FILE_ATTRIBUTE_ENCRYPTED))
-               == (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_ENCRYPTED));
-}
-
 /* Is the inode a symbolic link?
  * This returns true iff the inode is a reparse point that is either a "real"
  * symbolic link or a junction point.  */
@@ -362,114 +333,102 @@ inode_is_symlink(const struct wim_inode *inode)
                    inode->i_reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT);
 }
 
-/* Does the inode have children?
- * Currently (based on read_dentry_tree()), this can only return true for inodes
- * for which inode_is_directory() returns true.  (This also returns false on
- * empty directories.)  */
+/* Does the inode have children?  Currently (based on read_dentry_tree() as well
+ * as the various build-dentry-tree implementations), this can only return true
+ * for inodes for which inode_is_directory() returns true.  */
 static inline bool
 inode_has_children(const struct wim_inode *inode)
 {
        return inode->i_children != NULL;
 }
 
-extern struct wim_ads_entry *
-inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name);
+extern struct wim_inode_stream *
+inode_get_stream(const struct wim_inode *inode, int stream_type,
+                const utf16lechar *stream_name);
 
-extern struct wim_ads_entry *
-inode_add_ads_utf16le(struct wim_inode *inode, const utf16lechar *stream_name,
-                     size_t stream_name_nbytes);
+extern struct wim_inode_stream *
+inode_get_unnamed_stream(const struct wim_inode *inode, int stream_type);
 
-extern struct wim_ads_entry *
-inode_add_ads(struct wim_inode *dentry, const tchar *stream_name);
+extern struct wim_inode_stream *
+inode_add_stream(struct wim_inode *inode, int stream_type,
+                const utf16lechar *stream_name, struct blob_descriptor *blob);
 
-extern struct wim_ads_entry *
-inode_add_ads_with_data(struct wim_inode *inode, const tchar *name,
-                       const void *value, size_t size,
-                       struct wim_lookup_table *lookup_table);
+extern struct wim_inode_stream *
+inode_add_stream_with_data(struct wim_inode *inode, int stream_type,
+                          const utf16lechar *stream_name,
+                          const void *data, size_t size,
+                          struct blob_table *blob_table);
 
 extern void
-inode_remove_ads(struct wim_inode *inode, struct wim_ads_entry *entry,
-                struct wim_lookup_table *lookup_table);
+inode_remove_stream(struct wim_inode *inode, struct wim_inode_stream *strm,
+                   struct blob_table *blob_table);
 
-extern bool
-inode_has_named_stream(const struct wim_inode *inode);
-
-extern int
-inode_set_unnamed_stream(struct wim_inode *inode, const void *data, size_t len,
-                        struct wim_lookup_table *lookup_table);
-
-extern int
-inode_resolve_streams(struct wim_inode *inode, struct wim_lookup_table *table,
-                     bool force);
-
-extern void
-inode_unresolve_streams(struct wim_inode *inode);
-
-extern int
-stream_not_found_error(const struct wim_inode *inode, const u8 *hash);
-
-static inline struct wim_lookup_table_entry *
-inode_stream_lte_resolved(const struct wim_inode *inode, unsigned stream_idx)
+static inline struct blob_descriptor *
+stream_blob_resolved(const struct wim_inode_stream *strm)
 {
-       if (stream_idx == 0)
-               return inode->i_lte;
-       return inode->i_ads_entries[stream_idx - 1].lte;
+       wimlib_assert(strm->stream_resolved);
+       return strm->_stream_blob;
 }
 
-extern struct wim_lookup_table_entry *
-inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx,
-                const struct wim_lookup_table *table);
-
-extern struct wim_lookup_table_entry *
-inode_unnamed_stream_resolved(const struct wim_inode *inode,
-                             unsigned *stream_idx_ret);
-
-static inline struct wim_lookup_table_entry *
-inode_unnamed_lte_resolved(const struct wim_inode *inode)
+static inline void
+stream_set_blob(struct wim_inode_stream *strm, struct blob_descriptor *blob)
 {
-       unsigned stream_idx;
-       return inode_unnamed_stream_resolved(inode, &stream_idx);
+       strm->_stream_blob = blob;
+       strm->stream_resolved = 1;
 }
 
-extern struct wim_lookup_table_entry *
-inode_unnamed_lte(const struct wim_inode *inode,
-                 const struct wim_lookup_table *table);
-
-extern const u8 *
-inode_stream_hash(const struct wim_inode *inode, unsigned stream_idx);
-
-extern const u8 *
-inode_unnamed_stream_hash(const struct wim_inode *inode);
+static inline bool
+stream_is_named(const struct wim_inode_stream *strm)
+{
+       return strm->stream_name != NO_STREAM_NAME;
+}
 
-static inline unsigned
-inode_stream_name_nbytes(const struct wim_inode *inode, unsigned stream_idx)
+static inline bool
+stream_is_unnamed_data_stream(const struct wim_inode_stream *strm)
 {
-       if (stream_idx == 0)
-               return 0;
-       return inode->i_ads_entries[stream_idx - 1].stream_name_nbytes;
+       return strm->stream_type == STREAM_TYPE_DATA && !stream_is_named(strm);
 }
 
-static inline u32
-inode_stream_idx_to_id(const struct wim_inode *inode, unsigned stream_idx)
+static inline bool
+stream_is_named_data_stream(const struct wim_inode_stream *strm)
 {
-       if (stream_idx == 0)
-               return 0;
-       return inode->i_ads_entries[stream_idx - 1].stream_id;
+       return strm->stream_type == STREAM_TYPE_DATA && stream_is_named(strm);
 }
 
-extern void
-inode_ref_streams(struct wim_inode *inode);
+extern bool
+inode_has_named_data_stream(const struct wim_inode *inode);
+
+extern int
+inode_resolve_streams(struct wim_inode *inode,
+                     struct blob_table *table, bool force);
 
 extern void
-inode_unref_streams(struct wim_inode *inode,
-                   struct wim_lookup_table *lookup_table);
+inode_unresolve_streams(struct wim_inode *inode);
 
 extern int
-read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode,
-                size_t *nbytes_remaining_p);
+blob_not_found_error(const struct wim_inode *inode, const u8 *hash);
+
+extern struct blob_descriptor *
+stream_blob(const struct wim_inode_stream *strm, const struct blob_table *table);
+
+extern struct blob_descriptor *
+inode_get_blob_for_unnamed_data_stream(const struct wim_inode *inode,
+                                      const struct blob_table *blob_table);
+
+extern struct blob_descriptor *
+inode_get_blob_for_unnamed_data_stream_resolved(const struct wim_inode *inode);
+
+extern const u8 *
+stream_hash(const struct wim_inode_stream *strm);
+
+extern const u8 *
+inode_get_hash_of_unnamed_data_stream(const struct wim_inode *inode);
+
+extern void
+inode_ref_blobs(struct wim_inode *inode);
 
 extern void
-check_inode(struct wim_inode *inode, const struct wim_security_data *sd);
+inode_unref_blobs(struct wim_inode *inode, struct blob_table *blob_table);
 
 /* inode_fixup.c  */
 extern int
index 1fa723a..48f24e3 100644 (file)
@@ -19,8 +19,8 @@ read_integrity_table(WIMStruct *wim, u64 num_checked_bytes,
 
 extern int
 write_integrity_table(WIMStruct *wim,
-                     off_t new_lookup_table_end,
-                     off_t old_lookup_table_end,
+                     off_t new_blob_table_end,
+                     off_t old_blob_table_end,
                      struct integrity_table *old_table);
 
 extern int
diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h
deleted file mode 100644 (file)
index 4158f15..0000000
+++ /dev/null
@@ -1,419 +0,0 @@
-#ifndef _WIMLIB_LOOKUP_TABLE_H
-#define _WIMLIB_LOOKUP_TABLE_H
-
-#include "wimlib/list.h"
-#include "wimlib/resource.h"
-#include "wimlib/sha1.h"
-#include "wimlib/types.h"
-
-/* An enumerated type that identifies where the stream corresponding to this
- * lookup table entry is actually located.
- *
- * If we open a WIM and read its lookup table, the location is set to
- * RESOURCE_IN_WIM since all the streams will initially be located in the WIM.
- * However, to handle situations such as image capture and image mount, we allow
- * the actual location of the stream to be somewhere else, such as an external
- * file.  */
-enum resource_location {
-       /* The lookup table entry does not yet correspond to a stream; this is a
-        * temporary state only.  */
-       RESOURCE_NONEXISTENT = 0,
-
-       /* The stream is located in a resource in a WIM file identified by the
-        * `struct wim_resource_spec' pointed to by @rspec.  @offset_in_res
-        * identifies the offset at which this particular stream begins in the
-        * uncompressed data of the resource; this is normally 0, but a WIM
-        * resource can be "solid" and contain multiple streams.  */
-       RESOURCE_IN_WIM,
-
-       /* The stream is located in the external file named by @file_on_disk.
-        */
-       RESOURCE_IN_FILE_ON_DISK,
-
-       /* The stream is directly attached in the in-memory buffer pointed to by
-        * @attached_buffer.  */
-       RESOURCE_IN_ATTACHED_BUFFER,
-
-#ifdef WITH_FUSE
-       /* The stream is located in the external file named by
-        * @staging_file_name, located in the staging directory for a read-write
-        * mount.  */
-       RESOURCE_IN_STAGING_FILE,
-#endif
-
-#ifdef WITH_NTFS_3G
-       /* The stream is located in an NTFS volume.  It is identified by volume,
-        * filename, data stream name, and by whether it is a reparse point or
-        * not.  @ntfs_loc points to a structure containing this information.
-        * */
-       RESOURCE_IN_NTFS_VOLUME,
-#endif
-
-#ifdef __WIN32__
-       /* Windows only: the stream is located in the external file named by
-        * @file_on_disk, which is in the Windows NT namespace and may specify a
-        * named data stream.  */
-       RESOURCE_IN_WINNT_FILE_ON_DISK,
-
-       /* Windows only: the stream is located in the external file named by
-        * @file_on_disk, but the file is encrypted and must be read using the
-        * appropriate Windows API.  */
-       RESOURCE_WIN32_ENCRYPTED,
-#endif
-};
-
-struct stream_owner {
-       struct wim_inode *inode;
-       const utf16lechar *stream_name;
-};
-
-/* Specification for a stream, which may be the contents of a file (unnamed data
- * stream), a named data stream, reparse point data, or a WIM metadata resource.
- *
- * One instance of this structure is created for each entry in the WIM's lookup
- * table, hence the name of the struct.  Each of these entries contains the SHA1
- * message digest of a stream and the location of the stream data in the WIM
- * file (size, location, flags).  The in-memory lookup table is a map from SHA1
- * message digests to stream locations.  */
-struct wim_lookup_table_entry {
-
-       /* List node for a hash bucket of the lookup table.  */
-       struct hlist_node hash_list;
-
-       /* Uncompressed size of this stream.  */
-       u64 size;
-
-       /* Stream flags (WIM_RESHDR_FLAG_*).  */
-       u32 flags : 8;
-
-       /* One of the `enum resource_location' values documented above.  */
-       u32 resource_location : 4;
-
-       /* 1 if this stream has not had a SHA1 message digest calculated for it
-        * yet.  */
-       u32 unhashed : 1;
-
-       /* Temoorary fields used when writing streams; set as documented for
-        * prepare_stream_list_for_write().  */
-       u32 unique_size : 1;
-       u32 will_be_in_output_wim : 1;
-
-       /* Set to 1 when a metadata entry has its checksum changed; in such
-        * cases the hash cannot be used to verify the data if the metadata
-        * resource is read again.  (This could be avoided if we used separate
-        * fields for input/output checksum, but most stream entries wouldn't
-        * need this.)  */
-       u32 dont_check_metadata_hash : 1;
-
-       u32 may_send_done_with_file : 1;
-
-       /* Only used by wimlib_export_image() */
-       u32 was_exported : 1;
-
-       union {
-               /* (On-disk field) SHA1 message digest of the stream referenced
-                * by this lookup table entry.  */
-               u8  hash[SHA1_HASH_SIZE];
-
-               /* First 4 or 8 bytes of the SHA1 message digest, used for
-                * inserting the entry into the hash table.  Since the SHA1
-                * message digest can be considered random, we don't really need
-                * the full 20 byte hash just to insert the entry in a hash
-                * table.  */
-               size_t hash_short;
-
-               /* Unhashed entries only (unhashed == 1): these variables make
-                * it possible to find the pointer to this 'struct
-                * wim_lookup_table_entry' contained in either 'struct
-                * wim_ads_entry' or 'struct wim_inode'.  There can be at most 1
-                * such pointer, as we can only join duplicate streams after
-                * they have been hashed.  */
-               struct {
-                       struct wim_inode *back_inode;
-                       u32 back_stream_id;
-               };
-       };
-
-       /* Number of times this lookup table entry is referenced by dentries in
-        * the WIM.  When a WIM's lookup table is read, this field is
-        * initialized from a corresponding entry.
-        *
-        * However, see lte_decrement_refcnt() for information about the
-        * limitations of this field.  */
-       u32 refcnt;
-
-       /* When a WIM file is written, this is set to the number of references
-        * (by dentries) to this stream in the output WIM file.
-        *
-        * During extraction, this is the number of slots in stream_owners (or
-        * inline_stream_owners) that have been filled.
-        *
-        * During image export, this is set to the number of references of this
-        * stream that originated from the source WIM.
-        *
-        * When mounting a WIM image read-write, this is set to the number of
-        * extra references to this stream preemptively taken to allow later
-        * saving the modified image as a new image and leaving the original
-        * image alone.  */
-       u32 out_refcnt;
-
-#ifdef WITH_FUSE
-       /* Number of open file descriptors to this stream during a FUSE mount of
-        * the containing image.  */
-       u16 num_opened_fds;
-#endif
-
-       /* Specification of where this stream is actually located.  Which member
-        * is valid is determined by the @resource_location field.  */
-       union {
-               struct {
-                       struct wim_resource_spec *rspec;
-                       u64 offset_in_res;
-               };
-               struct {
-                       tchar *file_on_disk;
-                       struct wim_inode *file_inode;
-               };
-               void *attached_buffer;
-       #ifdef WITH_FUSE
-               struct {
-                       char *staging_file_name;
-                       int staging_dir_fd;
-               };
-       #endif
-       #ifdef WITH_NTFS_3G
-               struct ntfs_location *ntfs_loc;
-       #endif
-       };
-
-       /* Links together streams that share the same underlying WIM resource.
-        * The head is the `stream_list' member of `struct wim_resource_spec'.
-        */
-       struct list_head rspec_node;
-
-       /* Temporary fields  */
-       union {
-               /* Fields used temporarily during WIM file writing.  */
-               struct {
-                       union {
-                               /* List node used for stream size table.  */
-                               struct hlist_node hash_list_2;
-
-                               /* Metadata for the underlying solid resource in
-                                * the WIM being written (only valid if
-                                * WIM_RESHDR_FLAG_SOLID set in
-                                * out_reshdr.flags).  */
-                               struct {
-                                       u64 out_res_offset_in_wim;
-                                       u64 out_res_size_in_wim;
-                                       u64 out_res_uncompressed_size;
-                               };
-                       };
-
-                       /* Links streams being written to the WIM.  */
-                       struct list_head write_streams_list;
-
-                       union {
-                               /* Metadata for this stream in the WIM being
-                                * written.  */
-                               struct wim_reshdr out_reshdr;
-
-                               struct {
-                                       /* Name under which this stream is being
-                                        * sorted; used only when sorting
-                                        * streams for solid compression.  */
-                                       utf16lechar *solid_sort_name;
-                                       size_t solid_sort_name_nbytes;
-                               };
-                       };
-               };
-
-               /* Used temporarily during extraction.  This is an array of
-                * pointers to the inodes being extracted that use this stream.
-                */
-               union {
-                       /* Inodes to extract that reference this stream.
-                        * out_refcnt tracks the number of slots filled.  */
-                       struct stream_owner inline_stream_owners[3];
-                       struct {
-                               struct stream_owner *stream_owners;
-                               u32 alloc_stream_owners;
-                       };
-               };
-       };
-
-       /* Temporary list fields.  */
-       union {
-               /* Links streams for writing lookup table.  */
-               struct list_head lookup_table_list;
-
-               /* Links streams being extracted.  */
-               struct list_head extraction_list;
-
-               /* Links streams being exported.  */
-               struct list_head export_stream_list;
-
-               /* Links original list of streams in the read-write mounted image.  */
-               struct list_head orig_stream_list;
-       };
-
-       /* Links streams that are still unhashed after being been added to a
-        * WIM.  */
-       struct list_head unhashed_list;
-};
-
-/* Functions to allocate and free lookup tables  */
-
-extern struct wim_lookup_table *
-new_lookup_table(size_t capacity) _malloc_attribute;
-
-extern void
-free_lookup_table(struct wim_lookup_table *table);
-
-/* Functions to read or write the lookup table from/to a WIM file  */
-
-extern int
-read_wim_lookup_table(WIMStruct *wim);
-
-extern int
-write_wim_lookup_table_from_stream_list(struct list_head *stream_list,
-                                       struct filedes *out_fd,
-                                       u16 part_number,
-                                       struct wim_reshdr *out_reshdr,
-                                       int write_resource_flags);
-
-/* Functions to create, clone, print, and free lookup table entries  */
-
-extern struct wim_lookup_table_entry *
-new_lookup_table_entry(void) _malloc_attribute;
-
-extern struct wim_lookup_table_entry *
-clone_lookup_table_entry(const struct wim_lookup_table_entry *lte)
-                       _malloc_attribute;
-
-extern void
-lte_decrement_refcnt(struct wim_lookup_table_entry *lte,
-                    struct wim_lookup_table *table);
-#ifdef WITH_FUSE
-extern void
-lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte);
-#endif
-
-extern void
-free_lookup_table_entry(struct wim_lookup_table_entry *lte);
-
-/* Functions to insert and delete entries from a lookup table  */
-
-extern void
-lookup_table_insert(struct wim_lookup_table *table,
-               struct wim_lookup_table_entry *lte);
-
-extern void
-lookup_table_unlink(struct wim_lookup_table *table,
-                   struct wim_lookup_table_entry *lte);
-
-/* Function to lookup a stream by SHA1 message digest  */
-extern struct wim_lookup_table_entry *
-lookup_stream(const struct wim_lookup_table *table, const u8 hash[]);
-
-/* Functions to iterate through the entries of a lookup table  */
-
-extern int
-for_lookup_table_entry(struct wim_lookup_table *table,
-                      int (*visitor)(struct wim_lookup_table_entry *, void *),
-                      void *arg);
-
-extern int
-for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table,
-                                 int (*visitor)(struct wim_lookup_table_entry *,
-                                                void *),
-                                 void *arg);
-
-
-
-/* Function to get a resource entry in stable format  */
-
-struct wimlib_resource_entry;
-
-extern void
-lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte,
-                            struct wimlib_resource_entry *wentry);
-
-/* Functions to sort a list of lookup table entries  */
-extern int
-sort_stream_list(struct list_head *stream_list,
-                size_t list_head_offset,
-                int (*compar)(const void *, const void*));
-
-extern int
-sort_stream_list_by_sequential_order(struct list_head *stream_list,
-                                    size_t list_head_offset);
-
-extern int
-cmp_streams_by_sequential_order(const void *p1, const void *p2);
-
-/* Utility functions  */
-
-extern int
-lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *ignore);
-
-static inline bool
-lte_is_partial(const struct wim_lookup_table_entry * lte)
-{
-       return lte->resource_location == RESOURCE_IN_WIM &&
-              lte->size != lte->rspec->uncompressed_size;
-}
-
-static inline const struct stream_owner *
-stream_owners(struct wim_lookup_table_entry *stream)
-{
-       if (stream->out_refcnt <= ARRAY_LEN(stream->inline_stream_owners))
-               return stream->inline_stream_owners;
-       else
-               return stream->stream_owners;
-}
-
-static inline void
-lte_bind_wim_resource_spec(struct wim_lookup_table_entry *lte,
-                          struct wim_resource_spec *rspec)
-{
-       lte->resource_location = RESOURCE_IN_WIM;
-       lte->rspec = rspec;
-       list_add_tail(&lte->rspec_node, &rspec->stream_list);
-}
-
-static inline void
-lte_unbind_wim_resource_spec(struct wim_lookup_table_entry *lte)
-{
-       list_del(&lte->rspec_node);
-       lte->resource_location = RESOURCE_NONEXISTENT;
-}
-
-extern void
-lte_put_resource(struct wim_lookup_table_entry *lte);
-
-extern struct wim_lookup_table_entry *
-new_stream_from_data_buffer(const void *buffer, size_t size,
-                           struct wim_lookup_table *lookup_table);
-
-static inline void
-add_unhashed_stream(struct wim_lookup_table_entry *lte,
-                   struct wim_inode *back_inode,
-                   u32 back_stream_id,
-                   struct list_head *unhashed_streams)
-{
-       lte->unhashed = 1;
-       lte->back_inode = back_inode;
-       lte->back_stream_id = back_stream_id;
-       list_add_tail(&lte->unhashed_list, unhashed_streams);
-}
-
-extern int
-hash_unhashed_stream(struct wim_lookup_table_entry *lte,
-                    struct wim_lookup_table *lookup_table,
-                    struct wim_lookup_table_entry **lte_ret);
-
-extern struct wim_lookup_table_entry **
-retrieve_lte_pointer(struct wim_lookup_table_entry *lte);
-
-#endif /* _WIMLIB_LOOKUP_TABLE_H */
index 8a527ac..af35b53 100644 (file)
@@ -22,23 +22,22 @@ struct wim_image_metadata {
        /* Pointer to the security data of the image. */
        struct wim_security_data *security_data;
 
-       /* Pointer to the lookup table entry for this image's metadata resource
-        */
-       struct wim_lookup_table_entry *metadata_lte;
+       /* Pointer to the blob descriptor for this image's metadata resource */
+       struct blob_descriptor *metadata_blob;
 
        /* Linked list of 'struct wim_inode's for this image. */
        struct list_head inode_list;
 
-       /* Linked list of 'struct wim_lookup_table_entry's for this image that
-        * are referred to in the dentry tree, but have not had a SHA1 message
-        * digest calculated yet and therefore have not been inserted into the
-        * WIM's lookup table.  This list is added to during wimlib_add_image()
-        * and wimlib_mount_image() (read-write only). */
-       struct list_head unhashed_streams;
+       /* Linked list of 'struct blob_descriptor's for blobs that are
+        * referenced by this image's dentry tree, but have not had their SHA-1
+        * message digests calculated yet and therefore have not been inserted
+        * into the WIMStruct's blob table.  This list is appended to when files
+        * are scanned for inclusion in this WIM image.  */
+       struct list_head unhashed_blobs;
 
-       /* 1 iff the dentry tree has been modified.  If this is the case, the
-        * memory for the dentry tree should not be freed when switching to a
-        * different WIM image. */
+       /* 1 iff the dentry tree has been modified from the original stored in
+        * the WIM file.  If this is the case, the memory for the dentry tree
+        * should not be freed when switching to a different WIM image. */
        u8 modified : 1;
 
 #ifdef WITH_NTFS_3G
@@ -74,18 +73,17 @@ wim_get_current_security_data(WIMStruct *wim)
 #define image_for_each_inode(inode, imd) \
        list_for_each_entry(inode, &(imd)->inode_list, i_list)
 
-/* Iterate over each stream in a WIM image that has not yet been hashed */
-#define image_for_each_unhashed_stream(lte, imd) \
-       list_for_each_entry(lte, &(imd)->unhashed_streams, unhashed_list)
+/* Iterate over each blob in a WIM image that has not yet been hashed */
+#define image_for_each_unhashed_blob(blob, imd) \
+       list_for_each_entry(blob, &(imd)->unhashed_blobs, unhashed_list)
 
-/* Iterate over each stream in a WIM image that has not yet been hashed (safe
- * against stream removal) */
-#define image_for_each_unhashed_stream_safe(lte, tmp, imd) \
-       list_for_each_entry_safe(lte, tmp, &(imd)->unhashed_streams, unhashed_list)
+/* Iterate over each blob in a WIM image that has not yet been hashed (safe
+ * against blob removal) */
+#define image_for_each_unhashed_blob_safe(blob, tmp, imd) \
+       list_for_each_entry_safe(blob, tmp, &(imd)->unhashed_blobs, unhashed_list)
 
 extern void
-put_image_metadata(struct wim_image_metadata *imd,
-                  struct wim_lookup_table *table);
+put_image_metadata(struct wim_image_metadata *imd, struct blob_table *table);
 
 extern int
 append_image_metadata(WIMStruct *wim, struct wim_image_metadata *imd);
index 499c1aa..6648e5a 100644 (file)
@@ -4,17 +4,17 @@
 #include "wimlib/callback.h"
 #include "wimlib/types.h"
 
-struct wim_lookup_table_entry;
+struct blob_descriptor;
 struct _ntfs_volume;
 
 #ifdef WITH_NTFS_3G
 struct _ntfs_volume;
 struct ntfs_location {
-       tchar *path;
-       utf16lechar *stream_name;
-       u16 stream_name_nchars;
        struct _ntfs_volume *ntfs_vol;
-       bool is_reparse_point;
+       char *path;
+       utf16lechar *attr_name;
+       unsigned attr_name_nchars;
+       unsigned attr_type;
 };
 #endif
 
@@ -22,11 +22,8 @@ extern void
 libntfs3g_global_init(void);
 
 extern int
-read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte,
-                     u64 size,
-                     consume_data_callback_t cb,
-                     void *cb_ctx);
-
+read_ntfs_attribute_prefix(const struct blob_descriptor *blob, u64 size,
+                          consume_data_callback_t cb, void *cb_ctx);
 
 extern int
 do_ntfs_umount(struct _ntfs_volume *vol);
index f7d7c63..43adbd5 100644 (file)
@@ -6,8 +6,8 @@
 #include "wimlib/types.h"
 
 struct wim_inode;
-struct wim_lookup_table;
-struct wim_lookup_table_entry;
+struct blob_table;
+struct blob_descriptor;
 
 #define REPARSE_POINT_MAX_SIZE (16 * 1024)
 
@@ -86,20 +86,14 @@ make_reparse_buffer(const struct reparse_data * restrict rpdata,
                    u8 * restrict rpbuf,
                    u16 * restrict rpbuflen_ret);
 
-extern int
-wim_inode_get_reparse_data(const struct wim_inode * restrict inode,
-                          u8 * restrict rpbuf,
-                          u16 * restrict rpbuflen_ret,
-                          struct wim_lookup_table_entry *lte_override);
-
 #ifndef __WIN32__
 ssize_t
 wim_inode_readlink(const struct wim_inode * restrict inode, char * restrict buf,
-                  size_t buf_len, struct wim_lookup_table_entry *lte);
+                  size_t buf_len, struct blob_descriptor *blob);
 
 extern int
 wim_inode_set_symlink(struct wim_inode *inode, const char *target,
-                     struct wim_lookup_table *lookup_table);
+                     struct blob_table *blob_table);
 #endif
 
 #endif /* _WIMLIB_REPARSE_H */
index 5572f52..98305f0 100644 (file)
@@ -6,22 +6,19 @@
 #include "wimlib/sha1.h"
 #include "wimlib/types.h"
 
+struct blob_descriptor;
 struct filedes;
-struct wim_lookup_table_entry;
 struct wim_image_metadata;
 
 /*
- * Specification of a resource in a WIM file.
- *
- * If a `struct wim_lookup_table_entry' lte has (lte->resource_location ==
- * RESOURCE_IN_WIM), then lte->rspec points to an instance of this structure.
- *
- * Normally, there is a one-to-one correspondence between lookup table entries
- * ("streams", each of which may be the contents of a file, for example) and
- * resources.  However, a resource with the WIM_RESHDR_FLAG_SOLID flag set is a
- * "solid" resource that may contain multiple streams compressed together.
+ * Description of a "resource" in a WIM file.  A "resource" is a standalone,
+ * possibly compressed region of data.  Normally, there is a one-to-one
+ * correspondence between "blobs" (each of which may be the contents of a file,
+ * for example) and resources.  However, a resource with the
+ * WIM_RESHDR_FLAG_SOLID flag set is a "solid" resource that contains multiple
+ * blobs compressed together.
  */
-struct wim_resource_spec {
+struct wim_resource_descriptor {
        /* The WIM containing this resource.  @wim->in_fd is expected to be a
         * file descriptor to the underlying WIM file, opened for reading.  */
        WIMStruct *wim;
@@ -39,8 +36,8 @@ struct wim_resource_spec {
         * to.  */
        u64 uncompressed_size;
 
-       /* The list of streams this resource contains.  */
-       struct list_head stream_list;
+       /* The list of blobs this resource contains.  */
+       struct list_head blob_list;
 
        /* Flags for this resource (WIM_RESHDR_FLAG_*).  */
        u32 flags : 8;
@@ -89,25 +86,22 @@ struct wim_reshdr {
 /* Flags for the `flags' field of WIM resource headers (`struct wim_reshdr').
  */
 
-/* Unknown meaning; may be intended to indicate spaces in the WIM that are free
- * to overwrite.  Currently ignored by wimlib.  */
+/* Unknown meaning; currently ignored by wimlib.  */
 #define WIM_RESHDR_FLAG_FREE            0x01
 
-/* The resource is a metadata resource for a WIM image, or is the lookup table
- * or XML data for the WIM.  */
+/* The resource is a metadata resource for a WIM image, or is the blob table or
+ * XML data for the WIM.  */
 #define WIM_RESHDR_FLAG_METADATA        0x02
 
 /* The resource is a non-solid resource compressed using the WIM's default
  * compression type.  */
 #define WIM_RESHDR_FLAG_COMPRESSED     0x04
 
-/* Unknown meaning; may be intended to indicate a partial stream.  Currently
- * ignored by wimlib.  */
+/* Unknown meaning; currently ignored by wimlib.  */
 #define WIM_RESHDR_FLAG_SPANNED         0x08
 
-/* The resource is a solid compressed resource which may contain multiple
- * streams.  This flag is only allowed if the WIM version number is
- * WIM_VERSION_SOLID.  */
+/* The resource is a solid compressed resource which may contain multiple blobs.
+ * This flag is only allowed if the WIM version number is WIM_VERSION_SOLID.  */
 #define WIM_RESHDR_FLAG_SOLID          0x10
 
 /* Magic number in the 'uncompressed_size' field of the resource header that
@@ -117,9 +111,9 @@ struct wim_reshdr {
 /* Returns true if the specified WIM resource is compressed (may be either solid
  * or non-solid)  */
 static inline bool
-resource_is_compressed(const struct wim_resource_spec *rspec)
+resource_is_compressed(const struct wim_resource_descriptor *rdesc)
 {
-       return (rspec->flags & (WIM_RESHDR_FLAG_COMPRESSED |
+       return (rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED |
                                WIM_RESHDR_FLAG_SOLID));
 }
 
@@ -136,11 +130,11 @@ zero_reshdr(struct wim_reshdr *reshdr)
 }
 
 extern void
-wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim,
-                   struct wim_resource_spec *rspec);
+wim_res_hdr_to_desc(const struct wim_reshdr *reshdr, WIMStruct *wim,
+                   struct wim_resource_descriptor *rdesc);
 
 extern void
-wim_res_spec_to_hdr(const struct wim_resource_spec *rspec,
+wim_res_desc_to_hdr(const struct wim_resource_descriptor *rdesc,
                    struct wim_reshdr *reshdr);
 
 extern void
@@ -184,18 +178,17 @@ get_chunk_entry_size(u64 res_size, bool is_alt)
                return 8;
 }
 
-/* Functions to read streams  */
+/* Functions to read blobs  */
 
 extern int
-read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte,
-                                size_t size, u64 offset, void *buf);
+read_partial_wim_blob_into_buf(const struct blob_descriptor *blob,
+                              size_t size, u64 offset, void *buf);
 
 extern int
-read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf);
+read_full_blob_into_buf(const struct blob_descriptor *blob, void *buf);
 
 extern int
-read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte,
-                               void **buf_ret);
+read_full_blob_into_alloc_buf(const struct blob_descriptor *blob, void **buf_ret);
 
 extern int
 wim_reshdr_to_data(const struct wim_reshdr *reshdr,
@@ -206,100 +199,93 @@ wim_reshdr_to_hash(const struct wim_reshdr *reshdr, WIMStruct *wim,
                   u8 hash[SHA1_HASH_SIZE]);
 
 extern int
-skip_wim_stream(struct wim_lookup_table_entry *lte);
+skip_wim_resource(struct wim_resource_descriptor *rdesc);
 
 /*
- * Type of callback function for beginning to read a stream.
+ * Type of callback function for beginning to read a blob.
  *
- * @lte:
- *     Stream that is about to be read.
+ * @blob:
+ *     Blob that is about to be read.
  *
  * @ctx:
  *     User-provided context.
  *
  * Must return 0 on success, a positive error code on failure, or the special
- * value BEGIN_STREAM_STATUS_SKIP_STREAM to indicate that the stream should not
- * be read, and read_stream_list() should continue on to the next stream
- * (without calling @consume_chunk or @end_stream).
+ * value BEGIN_BLOB_STATUS_SKIP_BLOB to indicate that the blob should not be
+ * read, and read_blob_list() should continue on to the next blob (without
+ * calling @consume_chunk or @end_blob).
  */
-typedef int (*read_stream_list_begin_stream_t)(struct wim_lookup_table_entry *lte,
-                                              void *ctx);
+typedef int (*read_blob_list_begin_blob_t)(struct blob_descriptor *blob, void *ctx);
 
-#define BEGIN_STREAM_STATUS_SKIP_STREAM        -1
+#define BEGIN_BLOB_STATUS_SKIP_BLOB    -1
 
 /*
- * Type of callback function for finishing reading a stream.
+ * Type of callback function for finishing reading a blob.
  *
- * @lte:
- *     Stream that has been fully read, or stream that started being read but
- *     could not be fully read due to a read error.
+ * @blob:
+ *     Blob that has been fully read, or blob that started being read but could
+ *     not be fully read due to a read error.
  *
  * @status:
- *     0 if reading the stream was successful; otherwise a nonzero error code
+ *     0 if reading the blob was successful; otherwise a nonzero error code
  *     that specifies the return status.
  *
  * @ctx:
  *     User-provided context.
  */
-typedef int (*read_stream_list_end_stream_t)(struct wim_lookup_table_entry *lte,
-                                            int status,
-                                            void *ctx);
+typedef int (*read_blob_list_end_blob_t)(struct blob_descriptor *blob,
+                                        int status,
+                                        void *ctx);
 
 
-/* Callback functions and contexts for read_stream_list().  */
-struct read_stream_list_callbacks {
+/* Callback functions and contexts for read_blob_list().  */
+struct read_blob_list_callbacks {
 
-       /* Called when a stream is about to be read.  */
-       read_stream_list_begin_stream_t begin_stream;
+       /* Called when a blob is about to be read.  */
+       read_blob_list_begin_blob_t begin_blob;
 
        /* Called when a chunk of data has been read.  */
        consume_data_callback_t consume_chunk;
 
-       /* Called when a stream has been fully read.  A successful call to
-        * @begin_stream will always be matched by a call to @end_stream.  */
-       read_stream_list_end_stream_t end_stream;
+       /* Called when a blob has been fully read.  A successful call to
+        * @begin_blob will always be matched by a call to @end_blob.  */
+       read_blob_list_end_blob_t end_blob;
 
-       /* Parameter passed to @begin_stream.  */
-       void *begin_stream_ctx;
+       /* Parameter passed to @begin_blob.  */
+       void *begin_blob_ctx;
 
        /* Parameter passed to @consume_chunk.  */
        void *consume_chunk_ctx;
 
-       /* Parameter passed to @end_stream.  */
-       void *end_stream_ctx;
+       /* Parameter passed to @end_blob.  */
+       void *end_blob_ctx;
 };
 
-/* Flags for read_stream_list()  */
-#define VERIFY_STREAM_HASHES           0x1
-#define COMPUTE_MISSING_STREAM_HASHES  0x2
-#define STREAM_LIST_ALREADY_SORTED     0x4
+/* Flags for read_blob_list()  */
+#define VERIFY_BLOB_HASHES             0x1
+#define COMPUTE_MISSING_BLOB_HASHES    0x2
+#define BLOB_LIST_ALREADY_SORTED       0x4
 
 extern int
-read_stream_list(struct list_head *stream_list,
-                size_t list_head_offset,
-                const struct read_stream_list_callbacks *cbs,
-                int flags);
+read_blob_list(struct list_head *blob_list, size_t list_head_offset,
+              const struct read_blob_list_callbacks *cbs, int flags);
 
-/* Functions to extract streams.  */
+/* Functions to extract blobs.  */
 
 extern int
-extract_stream(struct wim_lookup_table_entry *lte,
-              u64 size,
-              consume_data_callback_t extract_chunk,
-              void *extract_chunk_arg);
+extract_blob(struct blob_descriptor *blob, u64 size,
+            consume_data_callback_t extract_chunk, void *extract_chunk_arg);
 
 extern int
-extract_stream_to_fd(struct wim_lookup_table_entry *lte,
-                    struct filedes *fd, u64 size);
+extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd, u64 size);
 
 extern int
-extract_full_stream_to_fd(struct wim_lookup_table_entry *lte,
-                         struct filedes *fd);
+extract_full_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd);
 
-/* Miscellaneous stream functions.  */
+/* Miscellaneous blob functions.  */
 
 extern int
-sha1_stream(struct wim_lookup_table_entry *lte);
+sha1_blob(struct blob_descriptor *blob);
 
 /* Functions to read/write metadata resources.  */
 
@@ -311,12 +297,12 @@ write_metadata_resource(WIMStruct *wim, int image, int write_resource_flags);
 
 /* Definitions specific to pipable WIM resources.  */
 
-/* Arbitrary number to begin each stream in the pipable WIM, used for sanity
+/* Arbitrary number to begin each blob in the pipable WIM, used for sanity
  * checking.  */
-#define PWM_STREAM_MAGIC 0x2b9b9ba2443db9d8ULL
+#define PWM_BLOB_MAGIC 0x2b9b9ba2443db9d8ULL
 
 /* Header that precedes each resource in a pipable WIM.  */
-struct pwm_stream_hdr {
+struct pwm_blob_hdr {
        le64 magic;                     /* +0   */
        le64 uncompressed_size;         /* +8   */
        u8 hash[SHA1_HASH_SIZE];        /* +16  */
@@ -324,9 +310,9 @@ struct pwm_stream_hdr {
                                        /* +40  */
 } _packed_attribute;
 
-/* Extra flag for the @flags field in `struct pipable_wim_stream_hdr': Indicates
- * that the SHA1 message digest of the stream has not been calculated.
- * Currently only used for the XML data.  */
+/* Extra flag for the @flags field in `struct pwm_blob_hdr': Indicates that the
+ * SHA-1 message digest of the stream has not been calculated.  Currently only
+ * used for the XML data.  */
 #define PWM_RESHDR_FLAG_UNHASHED         0x100
 
 /* Header that precedes each chunk of a compressed resource in a pipable WIM.
index 064f27f..cf37a88 100644 (file)
@@ -4,6 +4,6 @@
 struct list_head;
 
 extern int
-sort_stream_list_for_solid_compression(struct list_head *stream_list);
+sort_blob_list_for_solid_compression(struct list_head *blob_list);
 
 #endif /* _WIMLIB_SOLID_H */
index 40f6294..6704463 100644 (file)
@@ -84,9 +84,6 @@ extern void *
 mempcpy(void *dst, const void *src, size_t n);
 #endif
 
-extern size_t
-utf16le_strlen(const utf16lechar *s);
-
 extern void
 randomize_byte_array(u8 *p, size_t n);
 
index 5bc20a6..0f77aab 100644 (file)
@@ -12,7 +12,7 @@
 
 struct wim_image_metadata;
 struct wim_info;
-struct wim_lookup_table;
+struct blob_table;
 
 /*
  * WIMStruct - represents a WIM, or a part of a non-standalone WIM
@@ -57,11 +57,11 @@ struct WIMStruct {
         * also maintained for a WIMStruct not backed by a file.  */
        struct wim_info *wim_info;
 
-       /* The lookup table for this WIMStruct.  If this WIMStruct has a backing
-        * file, then this table will index the streams contained in that file.
-        * In addition, this table may index streams that were added by updates
-        * or referenced from other WIMStructs.  */
-       struct wim_lookup_table *lookup_table;
+       /* The blob table for this WIMStruct.  If this WIMStruct has a backing
+        * file, then this table will index the blobs contained in that file.
+        * In addition, this table may index blobs that were added by updates or
+        * referenced from other WIMStructs.  */
+       struct blob_table *blob_table;
 
        /*
         * The 1-based index of the currently selected image in this WIMStruct,
@@ -211,7 +211,7 @@ extern int
 for_image(WIMStruct *wim, int image, int (*visitor)(WIMStruct *));
 
 extern int
-wim_checksum_unhashed_streams(WIMStruct *wim);
+wim_checksum_unhashed_blobs(WIMStruct *wim);
 
 extern int
 delete_wim_image(WIMStruct *wim, int image);
index f9bdbe8..7dcd85d 100644 (file)
@@ -6,7 +6,7 @@
 #include "wimlib/types.h"
 #include "wimlib/win32_common.h"
 
-struct wim_lookup_table_entry;
+struct blob_descriptor;
 
 extern int
 wimboot_alloc_data_source_id(const wchar_t *wim_path,
@@ -16,9 +16,9 @@ wimboot_alloc_data_source_id(const wchar_t *wim_path,
 
 extern bool
 wimboot_set_pointer(HANDLE h,
-                   const struct wim_lookup_table_entry *lte,
+                   const struct blob_descriptor *blob,
                    u64 data_source_id,
-                   const u8 lookup_table_hash[SHA1_HASH_SIZE],
+                   const u8 blob_table_hash[SHA1_HASH_SIZE],
                    bool wof_running);
 
 
index c8be8c8..9ab141f 100644 (file)
@@ -8,14 +8,14 @@
 #include "wimlib/callback.h"
 #include "wimlib/types.h"
 
-struct wim_lookup_table_entry;
+struct blob_descriptor;
 
 extern int
-read_winnt_file_prefix(const struct wim_lookup_table_entry *lte, u64 size,
-                      consume_data_callback_t cb, void *cb_ctx);
+read_winnt_stream_prefix(const struct blob_descriptor *blob, u64 size,
+                        consume_data_callback_t cb, void *cb_ctx);
 
 extern int
-read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte,
+read_win32_encrypted_file_prefix(const struct blob_descriptor *blob,
                                 u64 size,
                                 consume_data_callback_t cb,
                                 void *cb_ctx);
index 969be85..fae0801 100644 (file)
@@ -63,22 +63,22 @@ struct wim_provider_rpdata {
        /* Integer ID that identifies the WIM.  */
        le64 data_source_id;
 
-       /* SHA1 message digest of the file's unnamed data stream.  */
-       u8 resource_hash[20];
+       /* SHA-1 message digest of the file's unnamed data stream.  */
+       u8 unnamed_data_stream_hash[20];
 
-       /* SHA1 message digest of the WIM's lookup table.  */
-       u8 wim_lookup_table_hash[20];
+       /* SHA-1 message digest of the WIM's blob table as stored on disk.  */
+       u8 blob_table_hash[20];
 
        /* Uncompressed size of the file's unnamed data stream, in bytes.  */
-       le64 stream_uncompressed_size;
+       le64 unnamed_data_stream_uncompressed_size;
 
-       /* Compressed size of the file's unnamed data stream, in bytes.  If
+       /* Compressed size of the file's unnamed data stream, in bytes.  If the
         * stream is stored uncompressed, set this the same as the uncompressed
         * size.  */
-       le64 stream_compressed_size;
+       le64 unnamed_data_stream_compressed_size;
 
        /* Byte offset of the file's unnamed data stream in the WIM.  */
-       le64 stream_offset_in_wim;
+       le64 unnamed_data_stream_offset_in_wim;
 } _packed_attribute;
 
 /* WIM-specific information about a WIM data source  */
@@ -102,8 +102,8 @@ struct WimOverlay_dat_entry_1 {
        le32 wim_type;
 
        /* Index of the image in the WIM to use??? (This doesn't really make
-        * sense, since WIM files combine streams for all images into a single
-        * table.  Set to 1 if unsure...)  */
+        * sense, since WIM files combine file data "blobs" for all images into
+        * a single table.  Set to 1 if unsure...)  */
        le32 wim_index;
 
        /* GUID of the WIM file (copied from the WIM header, offset +0x18).  */
@@ -292,8 +292,8 @@ struct wim_provider_external_info {
         * FSCTL_ADD_OVERLAY ioctl.  */
        u64 data_source_id;
 
-       /* SHA1 message digest of the file's unnamed data stream.  */
-       u8 resource_hash[20];
+       /* SHA-1 message digest of the file's unnamed data stream.  */
+       u8 unnamed_data_stream_hash[20];
 };
 
 /*****************************************************************************
@@ -394,9 +394,9 @@ struct wim_provider_overlay_entry {
        /* Type of WIM file: WIM_BOOT_OS_WIM or WIM_BOOT_NOT_OS_WIM.  */
        uint32_t wim_type;
 
-       /* Index of the backing image in the WIM??? (This doesn't really make
-        * sense, since WIM files combine streams for all images into a single
-        * table.)  */
+       /* Index of the image in the WIM to use??? (This doesn't really make
+        * sense, since WIM files combine file data "blobs" for all images into
+        * a single table.  Set to 1 if unsure...)  */
        uint32_t wim_index;
 
        /* 0 when WIM provider active, otherwise
@@ -441,8 +441,8 @@ struct wim_provider_add_overlay_input {
 #define WIM_BOOT_NOT_OS_WIM    1
 
        /* Index of the image in the WIM to use??? (This doesn't really make
-        * sense, since WIM files combine streams for all images into a single
-        * table.  Set to 1 if unsure...)  */
+        * sense, since WIM files combine file data "blobs" for all images into
+        * a single table.  Set to 1 if unsure...)  */
        u32 wim_index;
 
        /* Byte offset of wim_file_name in this buffer, not including the
index a46460b..af1a635 100644 (file)
@@ -5,7 +5,7 @@
 #include "wimlib/types.h"
 
 /* Internal use only */
-#define WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE              0x80000000
+#define WIMLIB_WRITE_FLAG_NO_BLOB_TABLE                        0x80000000
 #define WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML         0x40000000
 #define WIMLIB_WRITE_FLAG_HEADER_AT_END                        0x20000000
 #define WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR              0x10000000
@@ -60,7 +60,7 @@ write_wim_part(WIMStruct *wim,
               unsigned num_threads,
               unsigned part_number,
               unsigned total_parts,
-              struct list_head *stream_list_override,
+              struct list_head *blob_list_override,
               const u8 *guid);
 
 int
index 9334623..b308028 100644 (file)
@@ -160,7 +160,7 @@ enum {
        IMAGEX_HEADER_OPTION,
        IMAGEX_INCLUDE_INVALID_NAMES_OPTION,
        IMAGEX_LAZY_OPTION,
-       IMAGEX_LOOKUP_TABLE_OPTION,
+       IMAGEX_BLOBS_OPTION,
        IMAGEX_METADATA_OPTION,
        IMAGEX_NEW_IMAGE_OPTION,
        IMAGEX_NOCHECK_OPTION,
@@ -320,7 +320,8 @@ static const struct option info_options[] = {
        {T("no-check"),     no_argument,       NULL, IMAGEX_NOCHECK_OPTION},
        {T("extract-xml"),  required_argument, NULL, IMAGEX_EXTRACT_XML_OPTION},
        {T("header"),       no_argument,       NULL, IMAGEX_HEADER_OPTION},
-       {T("lookup-table"), no_argument,       NULL, IMAGEX_LOOKUP_TABLE_OPTION},
+       {T("lookup-table"), no_argument,       NULL, IMAGEX_BLOBS_OPTION},
+       {T("blobs"),        no_argument,       NULL, IMAGEX_BLOBS_OPTION},
        {T("metadata"),     no_argument,       NULL, IMAGEX_METADATA_OPTION},
        {T("xml"),          no_argument,       NULL, IMAGEX_XML_OPTION},
        {NULL, 0, NULL, 0},
@@ -2440,34 +2441,34 @@ static int
 print_resource(const struct wimlib_resource_entry *resource,
               void *_ignore)
 {
-       tprintf(T("Hash                = 0x"));
+       tprintf(T("Hash              = 0x"));
        print_byte_field(resource->sha1_hash, sizeof(resource->sha1_hash));
        tputchar(T('\n'));
 
        if (!resource->is_missing) {
-               tprintf(T("Uncompressed size   = %"PRIu64" bytes\n"),
+               tprintf(T("Uncompressed size = %"PRIu64" bytes\n"),
                        resource->uncompressed_size);
                if (resource->packed) {
-                       tprintf(T("Raw compressed size = %"PRIu64" bytes\n"),
-                               resource->raw_resource_compressed_size);
-
-                       tprintf(T("Raw offset in WIM   = %"PRIu64" bytes\n"),
+                       tprintf(T("Solid resource    = %"PRIu64" => %"PRIu64" "
+                                 "bytes @ offset %"PRIu64"\n"),
+                               resource->raw_resource_uncompressed_size,
+                               resource->raw_resource_compressed_size,
                                resource->raw_resource_offset_in_wim);
 
-                       tprintf(T("Offset in raw       = %"PRIu64" bytes\n"),
+                       tprintf(T("Solid offset      = %"PRIu64" bytes\n"),
                                resource->offset);
                } else {
-                       tprintf(T("Compressed size     = %"PRIu64" bytes\n"),
+                       tprintf(T("Compressed size   = %"PRIu64" bytes\n"),
                                resource->compressed_size);
 
-                       tprintf(T("Offset in WIM       = %"PRIu64" bytes\n"),
+                       tprintf(T("Offset in WIM     = %"PRIu64" bytes\n"),
                                resource->offset);
                }
 
-               tprintf(T("Part Number         = %u\n"), resource->part_number);
-               tprintf(T("Reference Count     = %u\n"), resource->reference_count);
+               tprintf(T("Part Number       = %u\n"), resource->part_number);
+               tprintf(T("Reference Count   = %u\n"), resource->reference_count);
 
-               tprintf(T("Flags               = "));
+               tprintf(T("Flags             = "));
                if (resource->is_compressed)
                        tprintf(T("WIM_RESHDR_FLAG_COMPRESSED  "));
                if (resource->is_metadata)
@@ -2485,7 +2486,7 @@ print_resource(const struct wimlib_resource_entry *resource,
 }
 
 static void
-print_lookup_table(WIMStruct *wim)
+print_blobs(WIMStruct *wim)
 {
        wimlib_iterate_lookup_table(wim, 0, print_resource, NULL);
 }
@@ -2538,8 +2539,12 @@ print_dentry_detailed(const struct wimlib_dir_entry *dentry)
 
        for (uint32_t i = 0; i <= dentry->num_named_streams; i++) {
                if (dentry->streams[i].stream_name) {
-                       tprintf(T("\tData stream \"%"TS"\":\n"),
+                       tprintf(T("\tNamed data stream \"%"TS"\":\n"),
                                dentry->streams[i].stream_name);
+               } else if (dentry->attributes & WIMLIB_FILE_ATTRIBUTE_ENCRYPTED) {
+                       tprintf(T("\tRaw encrypted data stream:\n"));
+               } else if (dentry->attributes & WIMLIB_FILE_ATTRIBUTE_REPARSE_POINT) {
+                       tprintf(T("\tReparse point stream:\n"));
                } else {
                        tprintf(T("\tUnnamed data stream:\n"));
                }
@@ -3119,7 +3124,7 @@ imagex_info(int argc, tchar **argv, int cmd)
        bool check        = false;
        bool nocheck      = false;
        bool header       = false;
-       bool lookup_table = false;
+       bool blobs        = false;
        bool xml          = false;
        bool short_header = true;
        const tchar *xml_out_file = NULL;
@@ -3148,8 +3153,8 @@ imagex_info(int argc, tchar **argv, int cmd)
                        header = true;
                        short_header = false;
                        break;
-               case IMAGEX_LOOKUP_TABLE_OPTION:
-                       lookup_table = true;
+               case IMAGEX_BLOBS_OPTION:
+                       blobs = true;
                        short_header = false;
                        break;
                case IMAGEX_XML_OPTION:
@@ -3244,13 +3249,13 @@ imagex_info(int argc, tchar **argv, int cmd)
                if (header)
                        wimlib_print_header(wim);
 
-               if (lookup_table) {
+               if (blobs) {
                        if (info.total_parts != 1) {
-                               tfprintf(stderr, T("Warning: Only showing the lookup table "
+                               tfprintf(stderr, T("Warning: Only showing the blobs "
                                                   "for part %d of a %d-part WIM.\n"),
                                         info.part_number, info.total_parts);
                        }
-                       print_lookup_table(wim);
+                       print_blobs(wim);
                }
 
                if (xml) {
@@ -4223,7 +4228,7 @@ T(
 T(
 "    %"TS" WIMFILE [IMAGE [NEW_NAME [NEW_DESC]]]\n"
 "                    [--boot] [--check] [--nocheck] [--xml]\n"
-"                    [--extract-xml FILE] [--header] [--lookup-table]\n"
+"                    [--extract-xml FILE] [--header] [--blobs]\n"
 ),
 [CMD_JOIN] =
 T(
index 86ba9f0..cff9820 100644 (file)
@@ -24,8 +24,8 @@
 #endif
 
 #include "wimlib.h"
+#include "wimlib/blob_table.h"
 #include "wimlib/error.h"
-#include "wimlib/lookup_table.h"
 #include "wimlib/metadata.h"
 #include "wimlib/security.h"
 #include "wimlib/xml.h"
@@ -38,24 +38,23 @@ static int
 add_empty_image_metadata(WIMStruct *wim)
 {
        int ret;
-       struct wim_lookup_table_entry *metadata_lte;
+       struct blob_descriptor *metadata_blob;
        struct wim_security_data *sd;
        struct wim_image_metadata *imd;
 
-       /* Create lookup table entry for this metadata resource (for now really
-        * just a dummy entry).  */
+       /* Create a blob descriptor for the new metadata resource.  */
        ret = WIMLIB_ERR_NOMEM;
-       metadata_lte = new_lookup_table_entry();
-       if (!metadata_lte)
+       metadata_blob = new_blob_descriptor();
+       if (!metadata_blob)
                goto out;
 
-       metadata_lte->flags = WIM_RESHDR_FLAG_METADATA;
-       metadata_lte->unhashed = 1;
+       metadata_blob->flags = WIM_RESHDR_FLAG_METADATA;
+       metadata_blob->unhashed = 1;
 
        /* Create empty security data (no security descriptors).  */
        sd = new_wim_security_data();
        if (!sd)
-               goto out_free_metadata_lte;
+               goto out_free_metadata_blob;
 
        imd = new_image_metadata();
        if (!imd)
@@ -64,7 +63,7 @@ add_empty_image_metadata(WIMStruct *wim)
        /* A NULL root_dentry indicates a completely empty image, without even a
         * root directory.  */
        imd->root_dentry = NULL;
-       imd->metadata_lte = metadata_lte;
+       imd->metadata_blob = metadata_blob;
        imd->security_data = sd;
        imd->modified = 1;
 
@@ -76,8 +75,8 @@ add_empty_image_metadata(WIMStruct *wim)
 
 out_free_security_data:
        free_wim_security_data(sd);
-out_free_metadata_lte:
-       free_lookup_table_entry(metadata_lte);
+out_free_metadata_blob:
+       free_blob_descriptor(metadata_blob);
 out:
        return ret;
 }
diff --git a/src/blob_table.c b/src/blob_table.c
new file mode 100644 (file)
index 0000000..5551c16
--- /dev/null
@@ -0,0 +1,1404 @@
+/*
+ * blob_table.c
+ *
+ * A blob table maps SHA-1 message digests to "blobs", which are nonempty
+ * sequences of binary data.  Within a WIM file, blobs are single-instanced.
+ *
+ * This file also contains code to read and write the corresponding on-disk
+ * representation of this table in the WIM file format.
+ */
+
+/*
+ * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
+ *
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h> /* for unlink()  */
+
+#include "wimlib/assert.h"
+#include "wimlib/blob_table.h"
+#include "wimlib/encoding.h"
+#include "wimlib/endianness.h"
+#include "wimlib/error.h"
+#include "wimlib/metadata.h"
+#include "wimlib/ntfs_3g.h"
+#include "wimlib/resource.h"
+#include "wimlib/unaligned.h"
+#include "wimlib/util.h"
+#include "wimlib/write.h"
+
+/* A hash table mapping SHA-1 message digests to blob descriptors  */
+struct blob_table {
+       struct hlist_head *array;
+       size_t num_blobs;
+       size_t capacity;
+};
+
+struct blob_table *
+new_blob_table(size_t capacity)
+{
+       struct blob_table *table;
+       struct hlist_head *array;
+
+       table = MALLOC(sizeof(struct blob_table));
+       if (table == NULL)
+               goto oom;
+
+       array = CALLOC(capacity, sizeof(array[0]));
+       if (array == NULL) {
+               FREE(table);
+               goto oom;
+       }
+
+       table->num_blobs = 0;
+       table->capacity = capacity;
+       table->array = array;
+       return table;
+
+oom:
+       ERROR("Failed to allocate memory for blob table "
+             "with capacity %zu", capacity);
+       return NULL;
+}
+
+static int
+do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore)
+{
+       free_blob_descriptor(blob);
+       return 0;
+}
+
+void
+free_blob_table(struct blob_table *table)
+{
+       if (table) {
+               for_blob_in_table(table, do_free_blob_descriptor, NULL);
+               FREE(table->array);
+               FREE(table);
+       }
+}
+
+struct blob_descriptor *
+new_blob_descriptor(void)
+{
+       struct blob_descriptor *blob;
+
+       blob = CALLOC(1, sizeof(struct blob_descriptor));
+       if (blob == NULL)
+               return NULL;
+
+       blob->refcnt = 1;
+
+       /* blob->blob_location = BLOB_NONEXISTENT  */
+       BUILD_BUG_ON(BLOB_NONEXISTENT != 0);
+
+       return blob;
+}
+
+struct blob_descriptor *
+clone_blob_descriptor(const struct blob_descriptor *old)
+{
+       struct blob_descriptor *new;
+
+       new = memdup(old, sizeof(struct blob_descriptor));
+       if (new == NULL)
+               return NULL;
+
+       switch (new->blob_location) {
+       case BLOB_IN_WIM:
+               list_add(&new->rdesc_node, &new->rdesc->blob_list);
+               break;
+
+       case BLOB_IN_FILE_ON_DISK:
+#ifdef __WIN32__
+       case BLOB_IN_WINNT_FILE_ON_DISK:
+       case BLOB_WIN32_ENCRYPTED:
+#endif
+#ifdef WITH_FUSE
+       case BLOB_IN_STAGING_FILE:
+               BUILD_BUG_ON((void*)&old->file_on_disk !=
+                            (void*)&old->staging_file_name);
+#endif
+               new->file_on_disk = TSTRDUP(old->file_on_disk);
+               if (new->file_on_disk == NULL)
+                       goto out_free;
+               break;
+       case BLOB_IN_ATTACHED_BUFFER:
+               new->attached_buffer = memdup(old->attached_buffer, old->size);
+               if (new->attached_buffer == NULL)
+                       goto out_free;
+               break;
+#ifdef WITH_NTFS_3G
+       case BLOB_IN_NTFS_VOLUME:
+               if (old->ntfs_loc) {
+                       struct ntfs_location *loc;
+                       loc = memdup(old->ntfs_loc, sizeof(struct ntfs_location));
+                       if (loc == NULL)
+                               goto out_free;
+                       loc->path = NULL;
+                       loc->attr_name = NULL;
+                       new->ntfs_loc = loc;
+                       loc->path = STRDUP(old->ntfs_loc->path);
+                       if (loc->path == NULL)
+                               goto out_free;
+                       if (loc->attr_name_nchars != 0) {
+                               loc->attr_name = utf16le_dup(old->ntfs_loc->attr_name);
+                               if (loc->attr_name == NULL)
+                                       goto out_free;
+                       }
+               }
+               break;
+#endif
+       default:
+               break;
+       }
+       return new;
+
+out_free:
+       free_blob_descriptor(new);
+       return NULL;
+}
+
+static void
+blob_release_location(struct blob_descriptor *blob)
+{
+       switch (blob->blob_location) {
+       case BLOB_IN_WIM:
+               list_del(&blob->rdesc_node);
+               if (list_empty(&blob->rdesc->blob_list))
+                       FREE(blob->rdesc);
+               break;
+       case BLOB_IN_FILE_ON_DISK:
+#ifdef __WIN32__
+       case BLOB_IN_WINNT_FILE_ON_DISK:
+       case BLOB_WIN32_ENCRYPTED:
+#endif
+#ifdef WITH_FUSE
+       case BLOB_IN_STAGING_FILE:
+               BUILD_BUG_ON((void*)&blob->file_on_disk !=
+                            (void*)&blob->staging_file_name);
+#endif
+       case BLOB_IN_ATTACHED_BUFFER:
+               BUILD_BUG_ON((void*)&blob->file_on_disk !=
+                            (void*)&blob->attached_buffer);
+               FREE(blob->file_on_disk);
+               break;
+#ifdef WITH_NTFS_3G
+       case BLOB_IN_NTFS_VOLUME:
+               if (blob->ntfs_loc) {
+                       FREE(blob->ntfs_loc->path);
+                       FREE(blob->ntfs_loc->attr_name);
+                       FREE(blob->ntfs_loc);
+               }
+               break;
+#endif
+       default:
+               break;
+       }
+}
+
+void
+free_blob_descriptor(struct blob_descriptor *blob)
+{
+       if (blob) {
+               blob_release_location(blob);
+               FREE(blob);
+       }
+}
+
+/* Should this blob be retained even if it has no references?  */
+static bool
+should_retain_blob(const struct blob_descriptor *blob)
+{
+       return blob->blob_location == BLOB_IN_WIM;
+}
+
+static void
+finalize_blob(struct blob_descriptor *blob)
+{
+       if (!should_retain_blob(blob))
+               free_blob_descriptor(blob);
+}
+
+/*
+ * Decrements the reference count of the specified blob, which must be either
+ * (a) unhashed, or (b) inserted in the specified blob table.
+ *
+ * If the blob's reference count reaches 0, we may unlink it from @table and
+ * free it.  However, we retain blobs with 0 reference count that originated
+ * from WIM files (BLOB_IN_WIM).  We do this for two reasons:
+ *
+ * 1. This prevents information about valid blobs in a WIM file --- blobs which
+ *    will continue to be present after appending to the WIM file --- from being
+ *    lost merely because we dropped all references to them.
+ *
+ * 2. Blob reference counts we read from WIM files can't be trusted.  It's
+ *    possible that a WIM has reference counts that are too low; WIMGAPI
+ *    sometimes creates WIMs where this is the case.  It's also possible that
+ *    blobs have been referenced from an external WIM; those blobs can
+ *    potentially have any reference count at all, either lower or higher than
+ *    would be expected for this WIM ("this WIM" meaning the owner of @table) if
+ *    it were a standalone WIM.
+ *
+ * So we can't take the reference counts too seriously.  But at least, we do
+ * recalculate by default when writing a new WIM file.
+ */
+void
+blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table)
+{
+       if (unlikely(blob->refcnt == 0))  /* See comment above  */
+               return;
+
+       if (--blob->refcnt != 0)
+               return;
+
+       if (blob->unhashed) {
+               list_del(&blob->unhashed_list);
+       #ifdef WITH_FUSE
+               /* If the blob has been extracted to a staging file for a FUSE
+                * mount, unlink the staging file.  (Note that there still may
+                * be open file descriptors to it.)  */
+               if (blob->blob_location == BLOB_IN_STAGING_FILE)
+                       unlinkat(blob->staging_dir_fd,
+                                blob->staging_file_name, 0);
+       #endif
+       } else {
+               if (!should_retain_blob(blob))
+                       blob_table_unlink(table, blob);
+       }
+
+       /* If FUSE mounts are enabled, then don't actually free the blob
+        * descriptor until the last file descriptor to it has been closed.  */
+#ifdef WITH_FUSE
+       if (blob->num_opened_fds == 0)
+#endif
+               finalize_blob(blob);
+}
+
+#ifdef WITH_FUSE
+void
+blob_decrement_num_opened_fds(struct blob_descriptor *blob)
+{
+       wimlib_assert(blob->num_opened_fds != 0);
+
+       if (--blob->num_opened_fds == 0 && blob->refcnt == 0)
+               finalize_blob(blob);
+}
+#endif
+
+static void
+blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob)
+{
+       size_t i = blob->hash_short % table->capacity;
+
+       hlist_add_head(&blob->hash_list, &table->array[i]);
+}
+
+static void
+enlarge_blob_table(struct blob_table *table)
+{
+       size_t old_capacity, new_capacity;
+       struct hlist_head *old_array, *new_array;
+       struct blob_descriptor *blob;
+       struct hlist_node *cur, *tmp;
+       size_t i;
+
+       old_capacity = table->capacity;
+       new_capacity = old_capacity * 2;
+       new_array = CALLOC(new_capacity, sizeof(struct hlist_head));
+       if (new_array == NULL)
+               return;
+       old_array = table->array;
+       table->array = new_array;
+       table->capacity = new_capacity;
+
+       for (i = 0; i < old_capacity; i++) {
+               hlist_for_each_entry_safe(blob, cur, tmp, &old_array[i], hash_list) {
+                       hlist_del(&blob->hash_list);
+                       blob_table_insert_raw(table, blob);
+               }
+       }
+       FREE(old_array);
+}
+
+/* Insert a blob descriptor into the blob table.  */
+void
+blob_table_insert(struct blob_table *table, struct blob_descriptor *blob)
+{
+       blob_table_insert_raw(table, blob);
+       if (++table->num_blobs > table->capacity)
+               enlarge_blob_table(table);
+}
+
+/* Unlinks a blob descriptor from the blob table; does not free it.  */
+void
+blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob)
+{
+       wimlib_assert(!blob->unhashed);
+       wimlib_assert(table->num_blobs != 0);
+
+       hlist_del(&blob->hash_list);
+       table->num_blobs--;
+}
+
+/* Given a SHA-1 message digest, return the corresponding blob descriptor from
+ * the specified blob table, or NULL if there is none.  */
+struct blob_descriptor *
+lookup_blob(const struct blob_table *table, const u8 *hash)
+{
+       size_t i;
+       struct blob_descriptor *blob;
+       struct hlist_node *pos;
+
+       i = load_size_t_unaligned(hash) % table->capacity;
+       hlist_for_each_entry(blob, pos, &table->array[i], hash_list)
+               if (hashes_equal(hash, blob->hash))
+                       return blob;
+       return NULL;
+}
+
+/* Call a function on all blob descriptors in the specified blob table.  Stop
+ * early and return nonzero if any call to the function returns nonzero.  */
+int
+for_blob_in_table(struct blob_table *table,
+                 int (*visitor)(struct blob_descriptor *, void *), void *arg)
+{
+       struct blob_descriptor *blob;
+       struct hlist_node *pos, *tmp;
+       int ret;
+
+       for (size_t i = 0; i < table->capacity; i++) {
+               hlist_for_each_entry_safe(blob, pos, tmp, &table->array[i],
+                                         hash_list)
+               {
+                       ret = visitor(blob, arg);
+                       if (ret)
+                               return ret;
+               }
+       }
+       return 0;
+}
+
+/*
+ * This is a qsort() callback that sorts blobs into an order optimized for
+ * reading.  Sorting is done primarily by blob location, then secondarily by a
+ * location-dependent order.  For example, blobs in WIM resources are sorted
+ * such that the underlying WIM files will be read sequentially.  This is
+ * especially important for WIM files containing solid resources.
+ */
+int
+cmp_blobs_by_sequential_order(const void *p1, const void *p2)
+{
+       const struct blob_descriptor *blob1, *blob2;
+       int v;
+       WIMStruct *wim1, *wim2;
+
+       blob1 = *(const struct blob_descriptor**)p1;
+       blob2 = *(const struct blob_descriptor**)p2;
+
+       v = (int)blob1->blob_location - (int)blob2->blob_location;
+
+       /* Different resource locations?  */
+       if (v)
+               return v;
+
+       switch (blob1->blob_location) {
+       case BLOB_IN_WIM:
+               wim1 = blob1->rdesc->wim;
+               wim2 = blob2->rdesc->wim;
+
+               /* Different (possibly split) WIMs?  */
+               if (wim1 != wim2) {
+                       v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN);
+                       if (v)
+                               return v;
+               }
+
+               /* Different part numbers in the same WIM?  */
+               v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number;
+               if (v)
+                       return v;
+
+               if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim)
+                       return cmp_u64(blob1->rdesc->offset_in_wim,
+                                      blob2->rdesc->offset_in_wim);
+
+               return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
+
+       case BLOB_IN_FILE_ON_DISK:
+#ifdef WITH_FUSE
+       case BLOB_IN_STAGING_FILE:
+#endif
+#ifdef __WIN32__
+       case BLOB_IN_WINNT_FILE_ON_DISK:
+       case BLOB_WIN32_ENCRYPTED:
+#endif
+               /* Compare files by path: just a heuristic that will place files
+                * in the same directory next to each other.  */
+               return tstrcmp(blob1->file_on_disk, blob2->file_on_disk);
+#ifdef WITH_NTFS_3G
+       case BLOB_IN_NTFS_VOLUME:
+               return tstrcmp(blob1->ntfs_loc->path, blob2->ntfs_loc->path);
+#endif
+       default:
+               /* No additional sorting order defined for this resource
+                * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare
+                * everything equal to each other.  */
+               return 0;
+       }
+}
+
+int
+sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
+              int (*compar)(const void *, const void*))
+{
+       struct list_head *cur;
+       struct blob_descriptor **array;
+       size_t i;
+       size_t array_size;
+       size_t num_blobs = 0;
+
+       list_for_each(cur, blob_list)
+               num_blobs++;
+
+       if (num_blobs <= 1)
+               return 0;
+
+       array_size = num_blobs * sizeof(array[0]);
+       array = MALLOC(array_size);
+       if (array == NULL)
+               return WIMLIB_ERR_NOMEM;
+
+       cur = blob_list->next;
+       for (i = 0; i < num_blobs; i++) {
+               array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset);
+               cur = cur->next;
+       }
+
+       qsort(array, num_blobs, sizeof(array[0]), compar);
+
+       INIT_LIST_HEAD(blob_list);
+       for (i = 0; i < num_blobs; i++) {
+               list_add_tail((struct list_head*)
+                              ((u8*)array[i] + list_head_offset), blob_list);
+       }
+       FREE(array);
+       return 0;
+}
+
+/* Sort the specified list of blobs in an order optimized for sequential
+ * reading.  */
+int
+sort_blob_list_by_sequential_order(struct list_head *blob_list,
+                                  size_t list_head_offset)
+{
+       return sort_blob_list(blob_list, list_head_offset,
+                             cmp_blobs_by_sequential_order);
+}
+
+static int
+add_blob_to_array(struct blob_descriptor *blob, void *_pp)
+{
+       struct blob_descriptor ***pp = _pp;
+       *(*pp)++ = blob;
+       return 0;
+}
+
+/* Iterate through the blob descriptors in the specified blob table in an order
+ * optimized for sequential reading.  */
+int
+for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
+                                            int (*visitor)(struct blob_descriptor *, void *),
+                                            void *arg)
+{
+       struct blob_descriptor **blob_array, **p;
+       size_t num_blobs = table->num_blobs;
+       int ret;
+
+       blob_array = MALLOC(num_blobs * sizeof(blob_array[0]));
+       if (!blob_array)
+               return WIMLIB_ERR_NOMEM;
+       p = blob_array;
+       for_blob_in_table(table, add_blob_to_array, &p);
+
+       wimlib_assert(p == blob_array + num_blobs);
+
+       qsort(blob_array, num_blobs, sizeof(blob_array[0]),
+             cmp_blobs_by_sequential_order);
+       ret = 0;
+       for (size_t i = 0; i < num_blobs; i++) {
+               ret = visitor(blob_array[i], arg);
+               if (ret)
+                       break;
+       }
+       FREE(blob_array);
+       return ret;
+}
+
+/* On-disk format of a blob descriptor in a WIM file.
+ *
+ * Note: if the WIM file contains solid resource(s), then this structure is
+ * sometimes overloaded to describe a "resource" rather than a "blob".  See the
+ * code for details.  */
+struct blob_descriptor_disk {
+
+       /* Size, offset, and flags of the blob.  */
+       struct wim_reshdr_disk reshdr;
+
+       /* Which part of the split WIM this blob is in; indexed from 1. */
+       le16 part_number;
+
+       /* Reference count of this blob over all WIM images.  (But see comment
+        * above blob_decrement_refcnt().)  */
+       le32 refcnt;
+
+       /* SHA-1 message digest of the uncompressed data of this blob, or all
+        * zeroes if this blob is of zero length.  */
+       u8 hash[SHA1_HASH_SIZE];
+} _packed_attribute;
+
+/* Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
+ * count how many specify resources (as opposed to blobs within those
+ * resources).
+ *
+ * Returns the resulting count.  */
+static size_t
+count_solid_resources(const struct blob_descriptor_disk *entries, size_t max)
+{
+       size_t count = 0;
+       do {
+               struct wim_reshdr reshdr;
+
+               get_wim_reshdr(&(entries++)->reshdr, &reshdr);
+
+               if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) {
+                       /* Run was terminated by a stand-alone blob entry.  */
+                       break;
+               }
+
+               if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
+                       /* This is a resource entry.  */
+                       count++;
+               }
+       } while (--max);
+       return count;
+}
+
+/*
+ * Given a run of consecutive blob descriptors with the SOLID flag set and
+ * having @num_rdescs resource entries, load resource information from them into
+ * the resource descriptors in the @rdescs array.
+ *
+ * Returns 0 on success, or a nonzero error code on failure.
+ */
+static int
+do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs,
+                  size_t num_rdescs,
+                  const struct blob_descriptor_disk *entries)
+{
+       for (size_t i = 0; i < num_rdescs; i++) {
+               struct wim_reshdr reshdr;
+               struct alt_chunk_table_header_disk hdr;
+               struct wim_resource_descriptor *rdesc;
+               int ret;
+
+               /* Advance to next resource entry.  */
+
+               do {
+                       get_wim_reshdr(&(entries++)->reshdr, &reshdr);
+               } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER);
+
+               rdesc = rdescs[i];
+
+               wim_res_hdr_to_desc(&reshdr, wim, rdesc);
+
+               /* For solid resources, the uncompressed size, compression type,
+                * and chunk size are stored in the resource itself, not in the
+                * blob table.  */
+
+               ret = full_pread(&wim->in_fd, &hdr,
+                                sizeof(hdr), reshdr.offset_in_wim);
+               if (ret) {
+                       ERROR("Failed to read header of solid resource "
+                             "(offset_in_wim=%"PRIu64")",
+                             reshdr.offset_in_wim);
+                       return ret;
+               }
+
+               rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize);
+
+               /* Compression format numbers must be the same as in
+                * WIMGAPI to be compatible here.  */
+               BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
+               BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
+               BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
+               BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
+               rdesc->compression_type = le32_to_cpu(hdr.compression_format);
+
+               rdesc->chunk_size = le32_to_cpu(hdr.chunk_size);
+
+               DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" "
+                     "(%"TS"/%"PRIu32") @ +%"PRIu64"",
+                     i + 1, num_rdescs,
+                     rdesc->uncompressed_size,
+                     rdesc->size_in_wim,
+                     wimlib_get_compression_type_string(rdesc->compression_type),
+                     rdesc->chunk_size,
+                     rdesc->offset_in_wim);
+       }
+       return 0;
+}
+
+/*
+ * Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
+ * allocate a 'struct wim_resource_descriptor' for each resource within that
+ * run.
+ *
+ * Returns 0 on success, or a nonzero error code on failure.
+ * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret.
+ */
+static int
+load_solid_info(WIMStruct *wim,
+               const struct blob_descriptor_disk *entries,
+               size_t num_remaining_entries,
+               struct wim_resource_descriptor ***rdescs_ret,
+               size_t *num_rdescs_ret)
+{
+       size_t num_rdescs;
+       struct wim_resource_descriptor **rdescs;
+       size_t i;
+       int ret;
+
+       num_rdescs = count_solid_resources(entries, num_remaining_entries);
+       rdescs = CALLOC(num_rdescs, sizeof(rdescs[0]));
+       if (!rdescs)
+               return WIMLIB_ERR_NOMEM;
+
+       for (i = 0; i < num_rdescs; i++) {
+               rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor));
+               if (!rdescs[i]) {
+                       ret = WIMLIB_ERR_NOMEM;
+                       goto out_free_rdescs;
+               }
+       }
+
+       ret = do_load_solid_info(wim, rdescs, num_rdescs, entries);
+       if (ret)
+               goto out_free_rdescs;
+
+       *rdescs_ret = rdescs;
+       *num_rdescs_ret = num_rdescs;
+       return 0;
+
+out_free_rdescs:
+       for (i = 0; i < num_rdescs; i++)
+               FREE(rdescs[i]);
+       FREE(rdescs);
+       return ret;
+}
+
+/* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor
+ * with the SOLID flag set, try to assign it to resource in the current solid
+ * run.  */
+static int
+assign_blob_to_solid_resource(const struct wim_reshdr *reshdr,
+                             struct blob_descriptor *blob,
+                             struct wim_resource_descriptor **rdescs,
+                             size_t num_rdescs)
+{
+       u64 offset = reshdr->offset_in_wim;
+
+       /* XXX: This linear search will be slow in the degenerate case where the
+        * number of solid resources in the run is huge.  */
+       blob->size = reshdr->size_in_wim;
+       blob->flags = reshdr->flags;
+       for (size_t i = 0; i < num_rdescs; i++) {
+               if (offset + blob->size <= rdescs[i]->uncompressed_size) {
+                       blob->offset_in_res = offset;
+                       blob_set_is_located_in_wim_resource(blob, rdescs[i]);
+                       return 0;
+               }
+               offset -= rdescs[i]->uncompressed_size;
+       }
+       ERROR("blob could not be assigned to a solid resource");
+       return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+}
+
+static void
+free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
+{
+       if (rdescs) {
+               for (size_t i = 0; i < num_rdescs; i++)
+                       if (list_empty(&rdescs[i]->blob_list))
+                               FREE(rdescs[i]);
+               FREE(rdescs);
+       }
+}
+
+static int
+cmp_blobs_by_offset_in_res(const void *p1, const void *p2)
+{
+       const struct blob_descriptor *blob1, *blob2;
+
+       blob1 = *(const struct blob_descriptor**)p1;
+       blob2 = *(const struct blob_descriptor**)p2;
+
+       return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
+}
+
+/* Validate the size and location of a WIM resource.  */
+static int
+validate_resource(struct wim_resource_descriptor *rdesc)
+{
+       struct blob_descriptor *blob;
+       bool out_of_order;
+       u64 expected_next_offset;
+       int ret;
+
+       /* Verify that the resource itself has a valid offset and size.  */
+       if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim)
+               goto invalid_due_to_overflow;
+
+       /* Verify that each blob in the resource has a valid offset and size.
+        */
+       expected_next_offset = 0;
+       out_of_order = false;
+       list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
+               if (blob->offset_in_res + blob->size < blob->size ||
+                   blob->offset_in_res + blob->size > rdesc->uncompressed_size)
+                       goto invalid_due_to_overflow;
+
+               if (blob->offset_in_res >= expected_next_offset)
+                       expected_next_offset = blob->offset_in_res + blob->size;
+               else
+                       out_of_order = true;
+       }
+
+       /* If the blobs were not located at strictly increasing positions (not
+        * allowing for overlap), sort them.  Then make sure that none overlap.
+        */
+       if (out_of_order) {
+               ret = sort_blob_list(&rdesc->blob_list,
+                                    offsetof(struct blob_descriptor,
+                                             rdesc_node),
+                                    cmp_blobs_by_offset_in_res);
+               if (ret)
+                       return ret;
+
+               expected_next_offset = 0;
+               list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
+                       if (blob->offset_in_res >= expected_next_offset)
+                               expected_next_offset = blob->offset_in_res + blob->size;
+                       else
+                               goto invalid_due_to_overlap;
+               }
+       }
+
+       return 0;
+
+invalid_due_to_overflow:
+       ERROR("Invalid blob table (offset overflow)");
+       return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+
+invalid_due_to_overlap:
+       ERROR("Invalid blob table (blobs in solid resource overlap)");
+       return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+}
+
+static int
+finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
+{
+       int ret = 0;
+       for (size_t i = 0; i < num_rdescs; i++) {
+               ret = validate_resource(rdescs[i]);
+               if (ret)
+                       break;
+       }
+       free_solid_rdescs(rdescs, num_rdescs);
+       return ret;
+}
+
+/*
+ * read_blob_table() -
+ *
+ * Read the blob table from a WIM file.  Usually, each entry in this table
+ * describes a "blob", or equivalently a "resource", that the WIM file contains,
+ * along with its location and SHA-1 message digest.  Descriptors for
+ * non-metadata blobs will be saved in the in-memory blob table
+ * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a
+ * special location per-image (the wim->image_metadata array).
+ *
+ * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple
+ * blobs that are compressed together.  Such a resource is called a "solid
+ * resource".  Solid resources are still described in the on-disk "blob table",
+ * although the format is not the most logical.  A consecutive sequence of
+ * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run".
+ * A solid run describes a set of solid resources, each of which contains a set
+ * of blobs.  In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size
+ * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource,
+ * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid
+ * resource.  There are some oddities in how we need to determine which solid
+ * resource a blob is actually in; see the code for details.
+ *
+ * Possible return values:
+ *     WIMLIB_ERR_SUCCESS (0)
+ *     WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
+ *     WIMLIB_ERR_NOMEM
+ *
+ *     Or an error code caused by failure to read the blob table from the WIM
+ *     file.
+ */
+int
+read_blob_table(WIMStruct *wim)
+{
+       int ret;
+       size_t num_entries;
+       void *buf = NULL;
+       struct blob_table *table = NULL;
+       struct blob_descriptor *cur_blob = NULL;
+       size_t num_duplicate_blobs = 0;
+       size_t num_wrong_part_blobs = 0;
+       u32 image_index = 0;
+       struct wim_resource_descriptor **cur_solid_rdescs = NULL;
+       size_t cur_num_solid_rdescs = 0;
+
+       DEBUG("Reading blob table.");
+
+       /* Calculate the number of entries in the blob table.  */
+       num_entries = wim->hdr.blob_table_reshdr.uncompressed_size /
+                     sizeof(struct blob_descriptor_disk);
+
+       /* Read the blob table into a buffer.  */
+       ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf);
+       if (ret)
+               goto out;
+
+       /* Allocate a hash table to map SHA-1 message digests into blob
+        * descriptors.  This is the in-memory "blob table".  */
+       table = new_blob_table(num_entries * 2 + 1);
+       if (!table)
+               goto oom;
+
+       /* Allocate and initalize blob descriptors from the raw blob table
+        * buffer.  */
+       for (size_t i = 0; i < num_entries; i++) {
+               const struct blob_descriptor_disk *disk_entry =
+                       &((const struct blob_descriptor_disk*)buf)[i];
+               struct wim_reshdr reshdr;
+               u16 part_number;
+
+               /* Get the resource header  */
+               get_wim_reshdr(&disk_entry->reshdr, &reshdr);
+
+               DEBUG("reshdr: size_in_wim=%"PRIu64", "
+                     "uncompressed_size=%"PRIu64", "
+                     "offset_in_wim=%"PRIu64", "
+                     "flags=0x%02x",
+                     reshdr.size_in_wim, reshdr.uncompressed_size,
+                     reshdr.offset_in_wim, reshdr.flags);
+
+               /* Ignore SOLID flag if it isn't supposed to be used in this WIM
+                * version.  */
+               if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
+                       reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID;
+
+               /* Allocate a new 'struct blob_descriptor'.  */
+               cur_blob = new_blob_descriptor();
+               if (!cur_blob)
+                       goto oom;
+
+               /* Get the part number, reference count, and hash.  */
+               part_number = le16_to_cpu(disk_entry->part_number);
+               cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt);
+               copy_hash(cur_blob->hash, disk_entry->hash);
+
+               if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
+
+                       /* SOLID entry  */
+
+                       if (!cur_solid_rdescs) {
+                               /* Starting new run  */
+                               ret = load_solid_info(wim, disk_entry,
+                                                     num_entries - i,
+                                                     &cur_solid_rdescs,
+                                                     &cur_num_solid_rdescs);
+                               if (ret)
+                                       goto out;
+                       }
+
+                       if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
+                               /* Resource entry, not blob entry  */
+                               goto free_cur_blob_and_continue;
+                       }
+
+                       /* Blob entry  */
+
+                       ret = assign_blob_to_solid_resource(&reshdr,
+                                                           cur_blob,
+                                                           cur_solid_rdescs,
+                                                           cur_num_solid_rdescs);
+                       if (ret)
+                               goto out;
+
+               } else {
+                       /* Normal blob/resource entry; SOLID not set.  */
+
+                       struct wim_resource_descriptor *rdesc;
+
+                       if (unlikely(cur_solid_rdescs)) {
+                               /* This entry terminated a solid run.  */
+                               ret = finish_solid_rdescs(cur_solid_rdescs,
+                                                         cur_num_solid_rdescs);
+                               cur_solid_rdescs = NULL;
+                               if (ret)
+                                       goto out;
+                       }
+
+                       /* How to handle an uncompressed resource with its
+                        * uncompressed size different from its compressed size?
+                        *
+                        * Based on a simple test, WIMGAPI seems to handle this
+                        * as follows:
+                        *
+                        * if (size_in_wim > uncompressed_size) {
+                        *      Ignore uncompressed_size; use size_in_wim
+                        *      instead.
+                        * } else {
+                        *      Honor uncompressed_size, but treat the part of
+                        *      the file data above size_in_wim as all zeros.
+                        * }
+                        *
+                        * So we will do the same.  */
+                       if (unlikely(!(reshdr.flags &
+                                      WIM_RESHDR_FLAG_COMPRESSED) &&
+                                    (reshdr.size_in_wim >
+                                     reshdr.uncompressed_size)))
+                       {
+                               reshdr.uncompressed_size = reshdr.size_in_wim;
+                       }
+
+                       /* Set up a resource descriptor for this blob.  */
+
+                       rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
+                       if (!rdesc)
+                               goto oom;
+
+                       wim_res_hdr_to_desc(&reshdr, wim, rdesc);
+
+                       cur_blob->offset_in_res = 0;
+                       cur_blob->size = reshdr.uncompressed_size;
+                       cur_blob->flags = reshdr.flags;
+
+                       blob_set_is_located_in_wim_resource(cur_blob, rdesc);
+               }
+
+               /* cur_blob is now a blob bound to a resource.  */
+
+               /* Ignore entries with all zeroes in the hash field.  */
+               if (is_zero_hash(cur_blob->hash))
+                       goto free_cur_blob_and_continue;
+
+               /* Verify that the part number matches that of the underlying
+                * WIM file.  */
+               if (part_number != wim->hdr.part_number) {
+                       num_wrong_part_blobs++;
+                       goto free_cur_blob_and_continue;
+               }
+
+               if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
+
+                       /* Blob table entry for a metadata resource.  */
+
+                       /* Metadata entries with no references must be ignored.
+                        * See, for example, the WinPE WIMs from the WAIK v2.1.
+                        */
+                       if (cur_blob->refcnt == 0)
+                               goto free_cur_blob_and_continue;
+
+                       if (cur_blob->refcnt != 1) {
+                               /* We don't currently support this case due to
+                                * the complications of multiple images sharing
+                                * the same metadata resource or a metadata
+                                * resource also being referenced by files.  */
+                               ERROR("Found metadata resource with refcnt != 1");
+                               ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
+                               goto out;
+                       }
+
+                       if (wim->hdr.part_number != 1) {
+                               WARNING("Ignoring metadata resource found in a "
+                                       "non-first part of the split WIM");
+                               goto free_cur_blob_and_continue;
+                       }
+
+                       /* The number of entries in the blob table with
+                        * WIM_RESHDR_FLAG_METADATA set should be the same as
+                        * the image_count field in the WIM header.  */
+                       if (image_index == wim->hdr.image_count) {
+                               WARNING("Found more metadata resources than images");
+                               goto free_cur_blob_and_continue;
+                       }
+
+                       /* Notice very carefully:  We are assigning the metadata
+                        * resources to images in the same order in which their
+                        * blob table entries occur on disk.  (This is also the
+                        * behavior of Microsoft's software.)  In particular,
+                        * this overrides the actual locations of the metadata
+                        * resources themselves in the WIM file as well as any
+                        * information written in the XML data.  */
+                       DEBUG("Found metadata resource for image %"PRIu32" at "
+                             "offset %"PRIu64".",
+                             image_index + 1,
+                             reshdr.offset_in_wim);
+
+                       wim->image_metadata[image_index++]->metadata_blob = cur_blob;
+               } else {
+                       /* Blob table entry for a non-metadata blob.  */
+
+                       /* Ignore this blob if it's a duplicate.  */
+                       if (lookup_blob(table, cur_blob->hash)) {
+                               num_duplicate_blobs++;
+                               goto free_cur_blob_and_continue;
+                       }
+
+                       /* Insert the blob into the in-memory blob table, keyed
+                        * by its SHA-1 message digest.  */
+                       blob_table_insert(table, cur_blob);
+               }
+
+               continue;
+
+       free_cur_blob_and_continue:
+               if (cur_solid_rdescs &&
+                   cur_blob->blob_location == BLOB_IN_WIM)
+                       blob_unset_is_located_in_wim_resource(cur_blob);
+               free_blob_descriptor(cur_blob);
+       }
+       cur_blob = NULL;
+
+       if (cur_solid_rdescs) {
+               /* End of blob table terminated a solid run.  */
+               ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
+               cur_solid_rdescs = NULL;
+               if (ret)
+                       goto out;
+       }
+
+       if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) {
+               WARNING("Could not find metadata resources for all images");
+               for (u32 i = image_index; i < wim->hdr.image_count; i++)
+                       put_image_metadata(wim->image_metadata[i], NULL);
+               wim->hdr.image_count = image_index;
+       }
+
+       if (num_duplicate_blobs > 0)
+               WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs);
+
+       if (num_wrong_part_blobs > 0) {
+               WARNING("Ignoring %zu blobs with wrong part number",
+                       num_wrong_part_blobs);
+       }
+
+       DEBUG("Done reading blob table.");
+       wim->blob_table = table;
+       ret = 0;
+       goto out_free_buf;
+
+oom:
+       ERROR("Not enough memory to read blob table!");
+       ret = WIMLIB_ERR_NOMEM;
+out:
+       free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
+       free_blob_descriptor(cur_blob);
+       free_blob_table(table);
+out_free_buf:
+       FREE(buf);
+       return ret;
+}
+
+static void
+write_blob_descriptor(struct blob_descriptor_disk *disk_entry,
+                     const struct wim_reshdr *out_reshdr,
+                     u16 part_number, u32 refcnt, const u8 *hash)
+{
+       put_wim_reshdr(out_reshdr, &disk_entry->reshdr);
+       disk_entry->part_number = cpu_to_le16(part_number);
+       disk_entry->refcnt = cpu_to_le32(refcnt);
+       copy_hash(disk_entry->hash, hash);
+}
+
+/* Note: the list of blob descriptors must be sorted so that all entries for the
+ * same solid resource are consecutive.  In addition, blob descriptors with
+ * WIM_RESHDR_FLAG_METADATA set must be in the same order as the indices of the
+ * underlying images.  */
+int
+write_blob_table_from_blob_list(struct list_head *blob_list,
+                               struct filedes *out_fd,
+                               u16 part_number,
+                               struct wim_reshdr *out_reshdr,
+                               int write_resource_flags)
+{
+       size_t table_size;
+       struct blob_descriptor *blob;
+       struct blob_descriptor_disk *table_buf;
+       struct blob_descriptor_disk *table_buf_ptr;
+       int ret;
+       u64 prev_res_offset_in_wim = ~0ULL;
+       u64 prev_uncompressed_size;
+       u64 logical_offset;
+
+       table_size = 0;
+       list_for_each_entry(blob, blob_list, blob_table_list) {
+               table_size += sizeof(struct blob_descriptor_disk);
+
+               if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID &&
+                   blob->out_res_offset_in_wim != prev_res_offset_in_wim)
+               {
+                       table_size += sizeof(struct blob_descriptor_disk);
+                       prev_res_offset_in_wim = blob->out_res_offset_in_wim;
+               }
+       }
+
+       DEBUG("Writing WIM blob table (size=%zu, offset=%"PRIu64")",
+             table_size, out_fd->offset);
+
+       table_buf = MALLOC(table_size);
+       if (table_buf == NULL) {
+               ERROR("Failed to allocate %zu bytes for temporary blob table",
+                     table_size);
+               return WIMLIB_ERR_NOMEM;
+       }
+       table_buf_ptr = table_buf;
+
+       prev_res_offset_in_wim = ~0ULL;
+       prev_uncompressed_size = 0;
+       logical_offset = 0;
+       list_for_each_entry(blob, blob_list, blob_table_list) {
+               if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
+                       struct wim_reshdr tmp_reshdr;
+
+                       /* Eww.  When WIMGAPI sees multiple solid resources, it
+                        * expects the offsets to be adjusted as if there were
+                        * really only one solid resource.  */
+
+                       if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) {
+                               /* Put the resource entry for solid resource  */
+                               tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim;
+                               tmp_reshdr.size_in_wim = blob->out_res_size_in_wim;
+                               tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER;
+                               tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID;
+
+                               write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
+                                                     part_number, 1, zero_hash);
+
+                               logical_offset += prev_uncompressed_size;
+
+                               prev_res_offset_in_wim = blob->out_res_offset_in_wim;
+                               prev_uncompressed_size = blob->out_res_uncompressed_size;
+                       }
+                       tmp_reshdr = blob->out_reshdr;
+                       tmp_reshdr.offset_in_wim += logical_offset;
+                       write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
+                                             part_number, blob->out_refcnt, blob->hash);
+               } else {
+                       write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr,
+                                             part_number, blob->out_refcnt, blob->hash);
+               }
+
+       }
+       wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size);
+
+       /* Write the blob table uncompressed.  Although wimlib can handle a
+        * compressed blob table, MS software cannot.  */
+       ret = write_wim_resource_from_buffer(table_buf,
+                                            table_size,
+                                            WIM_RESHDR_FLAG_METADATA,
+                                            out_fd,
+                                            WIMLIB_COMPRESSION_TYPE_NONE,
+                                            0,
+                                            out_reshdr,
+                                            NULL,
+                                            write_resource_flags);
+       FREE(table_buf);
+       DEBUG("ret=%d", ret);
+       return ret;
+}
+
+/* Allocate a blob descriptor for the contents of the buffer, or re-use an
+ * existing descriptor in @blob_table for an identical blob.  */
+struct blob_descriptor *
+new_blob_from_data_buffer(const void *buffer, size_t size,
+                         struct blob_table *blob_table)
+{
+       u8 hash[SHA1_HASH_SIZE];
+       struct blob_descriptor *blob, *existing_blob;
+
+       sha1_buffer(buffer, size, hash);
+       existing_blob = lookup_blob(blob_table, hash);
+       if (existing_blob) {
+               wimlib_assert(existing_blob->size == size);
+               blob = existing_blob;
+               blob->refcnt++;
+       } else {
+               void *buffer_copy;
+               blob = new_blob_descriptor();
+               if (blob == NULL)
+                       return NULL;
+               buffer_copy = memdup(buffer, size);
+               if (buffer_copy == NULL) {
+                       free_blob_descriptor(blob);
+                       return NULL;
+               }
+               blob->blob_location = BLOB_IN_ATTACHED_BUFFER;
+               blob->attached_buffer = buffer_copy;
+               blob->size = size;
+               copy_hash(blob->hash, hash);
+               blob_table_insert(blob_table, blob);
+       }
+       return blob;
+}
+
+/*
+ * Calculate the SHA-1 message digest of a blob and move its descriptor from the
+ * list of unhashed blobs to the blob table, possibly joining it with an
+ * identical blob.
+ *
+ * @blob:
+ *     The blob to hash
+ * @blob_table:
+ *     The blob table in which the blob needs to be indexed
+ * @blob_ret:
+ *     On success, a pointer to the resulting blob descriptor is written to
+ *     this location.  This will be the same as @blob if it was inserted into
+ *     the blob table, or different if a duplicate blob was found.
+ *
+ * Returns 0 on success; nonzero if there is an error reading the blob data.
+ */
+int
+hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
+                  struct blob_descriptor **blob_ret)
+{
+       int ret;
+       struct blob_descriptor *duplicate_blob;
+       struct blob_descriptor **back_ptr;
+
+       wimlib_assert(blob->unhashed);
+
+       /* back_ptr must be saved because @back_inode and @back_stream_id are in
+        * union with the SHA-1 message digest and will no longer be valid once
+        * the SHA-1 has been calculated. */
+       back_ptr = retrieve_pointer_to_unhashed_blob(blob);
+
+       ret = sha1_blob(blob);
+       if (ret)
+               return ret;
+
+       list_del(&blob->unhashed_list);
+       blob->unhashed = 0;
+
+       /* Look for a duplicate blob  */
+       duplicate_blob = lookup_blob(blob_table, blob->hash);
+       if (duplicate_blob) {
+               /* We have a duplicate blob.  Transfer the reference counts from
+                * this blob to the duplicate and update the reference to this
+                * blob (from an stream) to point to the duplicate.  The caller
+                * is responsible for freeing @blob if needed.  */
+               wimlib_assert(duplicate_blob->size == blob->size);
+               duplicate_blob->refcnt += blob->refcnt;
+               blob->refcnt = 0;
+               *back_ptr = duplicate_blob;
+               blob = duplicate_blob;
+       } else {
+               /* No duplicate blob, so we need to insert this blob into the
+                * blob table and treat it as a hashed blob. */
+               blob_table_insert(blob_table, blob);
+       }
+       *blob_ret = blob;
+       return 0;
+}
+
+void
+blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
+                             struct wimlib_resource_entry *wentry)
+{
+       memset(wentry, 0, sizeof(*wentry));
+
+       wentry->uncompressed_size = blob->size;
+       if (blob->blob_location == BLOB_IN_WIM) {
+               wentry->part_number = blob->rdesc->wim->hdr.part_number;
+               if (blob->flags & WIM_RESHDR_FLAG_SOLID) {
+                       wentry->offset = blob->offset_in_res;
+               } else {
+                       wentry->compressed_size = blob->rdesc->size_in_wim;
+                       wentry->offset = blob->rdesc->offset_in_wim;
+               }
+               wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim;
+               wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim;
+               wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size;
+       }
+       copy_hash(wentry->sha1_hash, blob->hash);
+       wentry->reference_count = blob->refcnt;
+       wentry->is_compressed = (blob->flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
+       wentry->is_metadata = (blob->flags & WIM_RESHDR_FLAG_METADATA) != 0;
+       wentry->is_free = (blob->flags & WIM_RESHDR_FLAG_FREE) != 0;
+       wentry->is_spanned = (blob->flags & WIM_RESHDR_FLAG_SPANNED) != 0;
+       wentry->packed = (blob->flags & WIM_RESHDR_FLAG_SOLID) != 0;
+}
+
+struct iterate_blob_context {
+       wimlib_iterate_lookup_table_callback_t cb;
+       void *user_ctx;
+};
+
+static int
+do_iterate_blob(struct blob_descriptor *blob, void *_ctx)
+{
+       struct iterate_blob_context *ctx = _ctx;
+       struct wimlib_resource_entry entry;
+
+       blob_to_wimlib_resource_entry(blob, &entry);
+       return (*ctx->cb)(&entry, ctx->user_ctx);
+}
+
+/* API function documented in wimlib.h  */
+WIMLIBAPI int
+wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
+                           wimlib_iterate_lookup_table_callback_t cb,
+                           void *user_ctx)
+{
+       if (flags != 0)
+               return WIMLIB_ERR_INVALID_PARAM;
+
+       struct iterate_blob_context ctx = {
+               .cb = cb,
+               .user_ctx = user_ctx,
+       };
+       if (wim_has_metadata(wim)) {
+               int ret;
+               for (int i = 0; i < wim->hdr.image_count; i++) {
+                       ret = do_iterate_blob(wim->image_metadata[i]->metadata_blob,
+                                             &ctx);
+                       if (ret)
+                               return ret;
+               }
+       }
+       return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx);
+}
index cbc1aa4..0e68261 100644 (file)
 
 #include <string.h>
 
+#include "wimlib/blob_table.h"
 #include "wimlib/capture.h"
 #include "wimlib/dentry.h"
 #include "wimlib/error.h"
-#include "wimlib/lookup_table.h"
 #include "wimlib/paths.h"
 #include "wimlib/progress.h"
 #include "wimlib/textfile.h"
@@ -71,12 +71,12 @@ do_capture_progress(struct capture_params *params, int status,
 
                /* Successful scan, and visiting inode for the first time  */
 
-               /* Tally size of all data streams.  */
-               const struct wim_lookup_table_entry *lte;
-               for (unsigned i = 0; i <= inode->i_num_ads; i++) {
-                       lte = inode_stream_lte_resolved(inode, i);
-                       if (lte)
-                               params->progress.scan.num_bytes_scanned += lte->size;
+               /* Tally size of all streams.  */
+               for (unsigned i = 0; i < inode->i_num_streams; i++) {
+                       const struct blob_descriptor *blob =
+                               stream_blob_resolved(&inode->i_streams[i]);
+                       if (blob)
+                               params->progress.scan.num_bytes_scanned += blob->size;
                }
 
                /* Tally the file itself.  */
index 442f8b6..8844d6b 100644 (file)
@@ -38,15 +38,15 @@ delete_wim_image(WIMStruct *wim, int image)
        int ret;
 
        /* Load the metadata for the image to be deleted.  This is necessary
-        * because streams referenced by files in the image need to have their
+        * because blobs referenced by files in the image need to have their
         * reference counts decremented.  */
        ret = select_wim_image(wim, image);
        if (ret)
                return ret;
 
        /* Release the reference to the image metadata and decrement reference
-        * counts on the streams referenced by files in the image.  */
-       put_image_metadata(wim->image_metadata[image - 1], wim->lookup_table);
+        * counts on the blobs referenced by files in the image.  */
+       put_image_metadata(wim->image_metadata[image - 1], wim->blob_table);
 
        /* Remove the empty slot from the image metadata array.  */
        memmove(&wim->image_metadata[image - 1], &wim->image_metadata[image],
index 324fbcf..55da50c 100644 (file)
@@ -3,7 +3,7 @@
  */
 
 /*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
+ * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
  *
  * This file is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
  * resource for a WIM image.  */
 struct wim_dentry_on_disk {
 
-       /* Length of this directory entry in bytes, not including any alternate
-        * data stream entries.  Should be a multiple of 8 so that the following
-        * dentry or alternate data stream entry is aligned on an 8-byte
-        * boundary.  (If not, wimlib will round it up.)  It must be at least as
-        * long as the fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE),
-        * plus the lengths of the file name and/or short name if present.
+       /* Length of this directory entry in bytes, not including any extra
+        * stream entries.  Should be a multiple of 8 so that the following
+        * dentry or extra stream entry is aligned on an 8-byte boundary.  (If
+        * not, wimlib will round it up.)  It must be at least as long as the
+        * fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), plus the
+        * lengths of the file name and/or short name if present, plus the size
+        * of any "extra" data.
         *
-        * It is also possible for this field to be 0.  This situation, which is
-        * undocumented, indicates the end of a list of sibling nodes in a
-        * directory.  It also means the real length is 8, because the dentry
-        * included only the length field, but that takes up 8 bytes.  */
+        * It is also possible for this field to be 0.  This case indicates the
+        * end of a list of sibling entries in a directory.  It also means the
+        * real length is 8, because the dentry included only the length field,
+        * but that takes up 8 bytes.  */
        le64 length;
 
-       /* Attributes of the file or directory.  This is a bitwise OR of the
-        * FILE_ATTRIBUTE_* constants and should correspond to the value
+       /* File attributes for the file or directory.  This is a bitwise OR of
+        * the FILE_ATTRIBUTE_* constants and should correspond to the value
         * retrieved by GetFileAttributes() on Windows. */
        le32 attributes;
 
@@ -114,38 +115,25 @@ struct wim_dentry_on_disk {
        le64 last_access_time;
        le64 last_write_time;
 
-       /* Vaguely, the SHA-1 message digest ("hash") of the file's contents.
-        * More specifically, this is for the "unnamed data stream" rather than
-        * any "alternate data streams".  This hash value is used to look up the
-        * corresponding entry in the WIM's stream lookup table to actually find
-        * the file contents within the WIM.
+       /*
+        * Usually this is the SHA-1 message digest of the file's "contents"
+        * (the unnamed data stream).
         *
-        * If the file has no unnamed data stream (e.g. is a directory), then
-        * this field will be all zeroes.  If the unnamed data stream is empty
-        * (i.e. an "empty file"), then this field is also expected to be all
-        * zeroes.  (It will be if wimlib created the WIM image, at least;
-        * otherwise it can't be ruled out that the SHA-1 message digest of 0
-        * bytes of data is given explicitly.)
+        * If the file has FILE_ATTRIBUTE_REPARSE_POINT set, then this is
+        * instead usually the SHA-1 message digest of the uncompressed reparse
+        * point data.
         *
-        * If the file has reparse data, then this field will instead specify
-        * the SHA-1 message digest of the reparse data.  If it is somehow
-        * possible for a file to have both an unnamed data stream and reparse
-        * data, then this is not handled by wimlib.
-        *
-        * As a further special case, if this field is all zeroes but there is
-        * an alternate data stream entry with no name and a nonzero SHA-1
-        * message digest field, then that hash must be used instead of this
-        * one.  In fact, when named data streams are present, some versions of
-        * Windows PE contain a bug where they only look in the alternate data
-        * stream entries for the unnamed data stream, not here.
+        * However, there are some special rules that need to be applied to
+        * interpret this field correctly when extra stream entries are present.
+        * See the code for details.
         */
-       u8 unnamed_stream_hash[SHA1_HASH_SIZE];
+       u8 default_hash[SHA1_HASH_SIZE];
 
        /* The format of the following data is not yet completely known and they
         * do not correspond to Microsoft's documentation.
         *
         * If this directory entry is for a reparse point (has
-        * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the
+        * FILE_ATTRIBUTE_REPARSE_POINT set in the 'attributes' field), then the
         * version of the following fields containing the reparse tag is valid.
         * Furthermore, the field notated as not_rpfixed, as far as I can tell,
         * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the
@@ -180,9 +168,9 @@ struct wim_dentry_on_disk {
                } _packed_attribute nonreparse;
        };
 
-       /* Number of alternate data stream entries that directly follow this
-        * dentry on-disk. */
-       le16 num_alternate_data_streams;
+       /* Number of extra stream entries that directly follow this dentry
+        * on-disk.  */
+       le16 num_extra_streams;
 
        /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE
         * encoded short name (8.3 DOS-compatible name), excluding the null
@@ -216,26 +204,36 @@ struct wim_dentry_on_disk {
        /* u8 tagged_items[] _aligned_attribute(8); */
 
 } _packed_attribute;
-       /* If num_alternate_data_streams != 0, then there are that many
-        * alternate data stream entries following the dentry, on an 8-byte
-        * aligned boundary.  They are not counted in the 'length' field of the
-        * dentry.  */
+       /* If num_extra_streams != 0, then there are that many extra stream
+        * entries following the dentry, starting on the next 8-byte aligned
+        * boundary.  They are not counted in the 'length' field of the dentry.
+        */
 
-/* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry
- * that has names of the specified lengths.  (Zero length means the
- * corresponding name actually does not exist.)  The returned value excludes
- * tagged metadata items as well as any alternate data stream entries that may
- * need to follow the dentry.  */
-static u64
-dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes)
-{
-       u64 length = sizeof(struct wim_dentry_on_disk);
-       if (file_name_nbytes)
-               length += (u32)file_name_nbytes + 2;
-       if (short_name_nbytes)
-               length += (u32)short_name_nbytes + 2;
-       return length;
-}
+/* On-disk format of an extra stream entry.  This represents an extra NTFS-style
+ * "stream" associated with the file, such as a named data stream.  */
+struct wim_extra_stream_entry_on_disk {
+
+       /* Length of this extra stream entry, in bytes.  This includes all
+        * fixed-length fields, plus the name and null terminator if present,
+        * and any needed padding such that the length is a multiple of 8.  */
+       le64 length;
+
+       /* Reserved field  */
+       le64 reserved;
+
+       /* SHA-1 message digest of this stream's uncompressed data, or all
+        * zeroes if this stream's data is of zero length.  */
+       u8 hash[SHA1_HASH_SIZE];
+
+       /* Length of this stream's name, in bytes and excluding the null
+        * terminator; or 0 if this stream is unnamed.  */
+       le16 name_nbytes;
+
+       /* Stream name in UTF-16LE.  It is @name_nbytes bytes long, excluding
+        * the null terminator.  There is a null terminator character if
+        * @name_nbytes != 0; i.e., if this stream is named.  */
+       utf16lechar name[];
+} _packed_attribute;
 
 static void
 do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *file_name,
@@ -319,68 +317,50 @@ dentry_set_name(struct wim_dentry *dentry, const tchar *name)
        return 0;
 }
 
-/* Return the length, in bytes, required for the specified alternate data stream
- * (ADS) entry on-disk.  This accounts for the fixed-length portion of the ADS
- * entry, the {stream name and its null terminator} if present, and the padding
- * after the entry to align the next ADS entry or dentry on an 8-byte boundary
- * in the uncompressed metadata resource buffer.  */
-static u64
-ads_entry_out_total_length(const struct wim_ads_entry *entry)
+/* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry
+ * that has names of the specified lengths.  (Zero length means the
+ * corresponding name actually does not exist.)  The returned value excludes
+ * tagged metadata items as well as any extra stream entries that may need to
+ * follow the dentry.  */
+static size_t
+dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes)
 {
-       u64 len = sizeof(struct wim_ads_entry_on_disk);
-       if (entry->stream_name_nbytes)
-               len += (u32)entry->stream_name_nbytes + 2;
-       return (len + 7) & ~7;
+       size_t length = sizeof(struct wim_dentry_on_disk);
+       if (file_name_nbytes)
+               length += (u32)file_name_nbytes + 2;
+       if (short_name_nbytes)
+               length += (u32)short_name_nbytes + 2;
+       return length;
 }
 
-/*
- * Determine whether to include a "dummy" stream when writing a WIM dentry.
- *
- * Some versions of Microsoft's WIM software (the boot driver(s) in WinPE 3.0,
- * for example) contain a bug where they assume the first alternate data stream
- * (ADS) entry of a dentry with a nonzero ADS count specifies the unnamed
- * stream, even if it has a name and the unnamed stream is already specified in
- * the hash field of the dentry itself.
- *
- * wimlib has to work around this behavior by carefully emulating the behavior
- * of (most versions of) ImageX/WIMGAPI, which move the unnamed stream reference
- * into the alternate stream entries whenever there are named data streams, even
- * though there is already a field in the dentry itself for the unnamed stream
- * reference, which then goes to waste.
- */
-static bool
-inode_needs_dummy_stream(const struct wim_inode *inode)
-{
-       /* Normal case  */
-       if (likely(inode->i_num_ads <= 0))
-               return false;
-
-       /* Overflow check  */
-       if (inode->i_num_ads >= 0xFFFF)
-               return false;
 
-       /* Assume the dentry is okay if it already had an unnamed ADS entry when
-        * it was read in.  */
-       if (!inode->i_canonical_streams)
-               return false;
+/* Return the length, in bytes, required for the specified stream on-disk, when
+ * represented as an extra stream entry.  */
+static size_t
+stream_out_total_length(const struct wim_inode_stream *strm)
+{
+       /* Account for the fixed length portion  */
+       size_t len = sizeof(struct wim_extra_stream_entry_on_disk);
 
-       /* We can't use use this workaround on encrypted files because WIMGAPI
-        * reports that the WIM is in an incorrect format.  */
-       if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)
-               return false;
+       /* For named streams, account for the variable-length name.  */
+       if (stream_is_named(strm))
+               len += utf16le_len_bytes(strm->stream_name) + 2;
 
-       return true;
+       /* Account for any necessary padding to the next 8-byte boundary.  */
+       return (len + 7) & ~7;
 }
 
-/* Calculate the total number of bytes that will be consumed when a dentry is
+/*
+ * Calculate the total number of bytes that will be consumed when a dentry is
  * written.  This includes the fixed-length portion of the dentry, the name
- * fields, any tagged metadata items, and any alternate data stream entries.
- * Also includes all alignment bytes.  */
-u64
+ * fields, any tagged metadata items, and any extra stream entries.  This also
+ * includes all alignment bytes.
+ */
+size_t
 dentry_out_total_length(const struct wim_dentry *dentry)
 {
        const struct wim_inode *inode = dentry->d_inode;
-       u64 len;
+       size_t len;
 
        len = dentry_min_len_with_names(dentry->file_name_nbytes,
                                        dentry->short_name_nbytes);
@@ -391,12 +371,34 @@ dentry_out_total_length(const struct wim_dentry *dentry)
                len = (len + 7) & ~7;
        }
 
-       if (unlikely(inode->i_num_ads)) {
-               if (inode_needs_dummy_stream(inode))
-                       len += ads_entry_out_total_length(&(struct wim_ads_entry){});
+       if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
+               /*
+                * Extra stream entries:
+                *
+                * - Use one extra stream entry for each named data stream
+                * - Use one extra stream entry for the unnamed data stream when there is either:
+                *      - a reparse point stream
+                *      - at least one named data stream (for Windows PE bug workaround)
+                * - Use one extra stream entry for the reparse point stream if there is one
+                */
+               bool have_named_data_stream = false;
+               bool have_reparse_point_stream = false;
+               for (unsigned i = 0; i < inode->i_num_streams; i++) {
+                       const struct wim_inode_stream *strm = &inode->i_streams[i];
+                       if (stream_is_named_data_stream(strm)) {
+                               len += stream_out_total_length(strm);
+                               have_named_data_stream = true;
+                       } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
+                               wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
+                               have_reparse_point_stream = true;
+                       }
+               }
 
-               for (u16 i = 0; i < inode->i_num_ads; i++)
-                       len += ads_entry_out_total_length(&inode->i_ads_entries[i]);
+               if (have_named_data_stream || have_reparse_point_stream) {
+                       if (have_reparse_point_stream)
+                               len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7;
+                       len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7;
+               }
        }
 
        return len;
@@ -991,7 +993,6 @@ new_filler_directory(struct wim_dentry **dentry_ret)
                return ret;
        /* Leave the inode number as 0; this is allowed for non
         * hard-linked files. */
-       dentry->d_inode->i_resolved = 1;
        dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
        *dentry_ret = dentry;
        return 0;
@@ -1036,9 +1037,9 @@ do_free_dentry(struct wim_dentry *dentry, void *_ignore)
 }
 
 static int
-do_free_dentry_and_unref_streams(struct wim_dentry *dentry, void *lookup_table)
+do_free_dentry_and_unref_blobs(struct wim_dentry *dentry, void *blob_table)
 {
-       inode_unref_streams(dentry->d_inode, lookup_table);
+       inode_unref_blobs(dentry->d_inode, blob_table);
        free_dentry(dentry);
        return 0;
 }
@@ -1050,10 +1051,10 @@ do_free_dentry_and_unref_streams(struct wim_dentry *dentry, void *lookup_table)
  *     The root of the dentry tree to free.  If NULL, this function has no
  *     effect.
  *
- * @lookup_table:
- *     A pointer to the lookup table for the WIM, or NULL if not specified.  If
+ * @blob_table:
+ *     A pointer to the blob table for the WIM, or NULL if not specified.  If
  *     specified, this function will decrement the reference counts of the
- *     single-instance streams referenced by the dentries.
+ *     blobs referenced by the dentries.
  *
  * This function also releases references to the corresponding inodes.
  *
@@ -1062,16 +1063,16 @@ do_free_dentry_and_unref_streams(struct wim_dentry *dentry, void *lookup_table)
  * function.
  */
 void
-free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table)
+free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table)
 {
        int (*f)(struct wim_dentry *, void *);
 
-       if (lookup_table)
-               f = do_free_dentry_and_unref_streams;
+       if (blob_table)
+               f = do_free_dentry_and_unref_blobs;
        else
                f = do_free_dentry;
 
-       for_dentry_in_tree_depth(root, f, lookup_table);
+       for_dentry_in_tree_depth(root, f, blob_table);
 }
 
 /* Insert the @child dentry into the case sensitive index of the @dir directory.
@@ -1223,8 +1224,182 @@ read_extra_data(const u8 *p, const u8 *end, struct wim_inode *inode)
        return 0;
 }
 
-/* Read a dentry, including all alternate data stream entries that follow it,
- * from an uncompressed metadata resource buffer.  */
+/*
+ * Set the type of each stream for an encrypted file.
+ *
+ * All data streams of the encrypted file should have been packed into a single
+ * stream in the format provided by ReadEncryptedFileRaw() on Windows.  We
+ * assign this stream type STREAM_TYPE_EFSRPC_RAW_DATA.
+ *
+ * Encrypted files can't have a reparse point stream.  In the on-disk NTFS
+ * format they can, but as far as I know the reparse point stream of an
+ * encrypted file can't be stored in the WIM format in a way that's compatible
+ * with WIMGAPI, nor is there even any way for it to be read or written on
+ * Windows when the process does not have access to the file encryption key.
+ */
+static void
+assign_stream_types_encrypted(struct wim_inode *inode)
+{
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               struct wim_inode_stream *strm = &inode->i_streams[i];
+               if (!stream_is_named(strm) && !is_zero_hash(strm->_stream_hash))
+               {
+                       strm->stream_type = STREAM_TYPE_EFSRPC_RAW_DATA;
+                       return;
+               }
+       }
+}
+
+/*
+ * Set the type of each stream for an unencrypted file.
+ *
+ * There will be an unnamed data stream, a reparse point stream, or both an
+ * unnamed data stream and a reparse point stream.  In addition, there may be
+ * named data streams.
+ */
+static void
+assign_stream_types_unencrypted(struct wim_inode *inode)
+{
+       bool found_reparse_point_stream = false;
+       bool found_unnamed_data_stream = false;
+       struct wim_inode_stream *unnamed_stream_with_zero_hash = NULL;
+
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               struct wim_inode_stream *strm = &inode->i_streams[i];
+
+               if (stream_is_named(strm)) {
+                       /* Named data stream  */
+                       strm->stream_type = STREAM_TYPE_DATA;
+               } else if (!is_zero_hash(strm->_stream_hash)) {
+                       if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
+                           !found_reparse_point_stream) {
+                               found_reparse_point_stream = true;
+                               strm->stream_type = STREAM_TYPE_REPARSE_POINT;
+                       } else if (!found_unnamed_data_stream) {
+                               found_unnamed_data_stream = true;
+                               strm->stream_type = STREAM_TYPE_DATA;
+                       }
+               } else {
+                       /* If no stream name is specified and the hash is zero,
+                        * then remember this stream for later so that we can
+                        * assign it to the unnamed data stream if we don't find
+                        * a better candidate.  */
+                       unnamed_stream_with_zero_hash = strm;
+               }
+       }
+
+       if (!found_unnamed_data_stream && unnamed_stream_with_zero_hash != NULL)
+               unnamed_stream_with_zero_hash->stream_type = STREAM_TYPE_DATA;
+}
+
+/*
+ * Read and interpret the collection of streams for the specified inode.
+ */
+static int
+setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode,
+                   unsigned num_extra_streams, const u8 *default_hash,
+                   u64 *offset_p)
+{
+       const u8 *orig_p = p;
+
+       inode->i_num_streams = 1 + num_extra_streams;
+
+       if (likely(inode->i_num_streams <= ARRAY_LEN(inode->i_embedded_streams))) {
+               inode->i_streams = inode->i_embedded_streams;
+       } else {
+               inode->i_streams = CALLOC(inode->i_num_streams,
+                                         sizeof(inode->i_streams[0]));
+               if (!inode->i_streams)
+                       return WIMLIB_ERR_NOMEM;
+       }
+
+       /* Use the default hash field for the first stream  */
+       inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME;
+       copy_hash(inode->i_streams[0]._stream_hash, default_hash);
+       inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN;
+       inode->i_streams[0].stream_id = 0;
+
+       /* Read the extra stream entries  */
+       for (unsigned i = 1; i < inode->i_num_streams; i++) {
+               struct wim_inode_stream *strm;
+               const struct wim_extra_stream_entry_on_disk *disk_strm;
+               u64 length;
+               u16 name_nbytes;
+
+               strm = &inode->i_streams[i];
+
+               strm->stream_id = i;
+
+               /* Do we have at least the size of the fixed-length data we know
+                * need?  */
+               if ((end - p) < sizeof(struct wim_extra_stream_entry_on_disk))
+                       return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
+
+               disk_strm = (const struct wim_extra_stream_entry_on_disk *)p;
+
+               /* Read the length field  */
+               length = le64_to_cpu(disk_strm->length);
+
+               /* 8-byte align the length  */
+               length = (length + 7) & ~7;
+
+               /* Make sure the length field is neither so small it doesn't
+                * include all the fixed-length data nor so large it overflows
+                * the metadata resource buffer. */
+               if (length < sizeof(struct wim_extra_stream_entry_on_disk) ||
+                   length > (end - p))
+                       return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
+
+               /* Read the rest of the fixed-length data. */
+
+               copy_hash(strm->_stream_hash, disk_strm->hash);
+               name_nbytes = le16_to_cpu(disk_strm->name_nbytes);
+
+               /* If stream_name_nbytes != 0, the stream is named.  */
+               if (name_nbytes != 0) {
+                       /* The name is encoded in UTF16-LE, which uses 2-byte
+                        * coding units, so the length of the name had better be
+                        * an even number of bytes.  */
+                       if (name_nbytes & 1)
+                               return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
+
+                       /* Add the length of the stream name to get the length
+                        * we actually need to read.  Make sure this isn't more
+                        * than the specified length of the entry.  */
+                       if (sizeof(struct wim_extra_stream_entry_on_disk) +
+                           name_nbytes > length)
+                               return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
+
+                       strm->stream_name = utf16le_dupz(disk_strm->name,
+                                                        name_nbytes);
+                       if (!strm->stream_name)
+                               return WIMLIB_ERR_NOMEM;
+               } else {
+                       strm->stream_name = (utf16lechar *)NO_STREAM_NAME;
+               }
+
+               strm->stream_type = STREAM_TYPE_UNKNOWN;
+
+               p += length;
+       }
+
+       inode->i_next_stream_id = inode->i_num_streams;
+
+       /* Now, assign a type to each stream.  Unfortunately this requires
+        * various hacks because stream types aren't explicitly provided in the
+        * WIM on-disk format.  */
+
+       if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED))
+               assign_stream_types_encrypted(inode);
+       else
+               assign_stream_types_unencrypted(inode);
+
+       *offset_p += p - orig_p;
+       return 0;
+}
+
+/* Read a dentry, including all extra stream entries that follow it, from an
+ * uncompressed metadata resource buffer.  */
 static int
 read_dentry(const u8 * restrict buf, size_t buf_len,
            u64 *offset_p, struct wim_dentry **dentry_ret)
@@ -1249,19 +1424,14 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
        /* Check for buffer overrun.  */
        if (unlikely(offset + sizeof(u64) > buf_len ||
                     offset + sizeof(u64) < offset))
-       {
-               ERROR("Directory entry starting at %"PRIu64" ends past the "
-                     "end of the metadata resource (size %zu)",
-                     offset, buf_len);
                return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
-       }
 
        /* Get pointer to the dentry data.  */
        p = &buf[offset];
        disk_dentry = (const struct wim_dentry_on_disk*)p;
 
        /* Get dentry length.  */
-       length = le64_to_cpu(disk_dentry->length);
+       length = (le64_to_cpu(disk_dentry->length) + 7) & ~7;
 
        /* Check for end-of-directory.  */
        if (length <= 8) {
@@ -1270,21 +1440,13 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
        }
 
        /* Validate dentry length.  */
-       if (unlikely(length < sizeof(struct wim_dentry_on_disk))) {
-               ERROR("Directory entry has invalid length of %"PRIu64" bytes",
-                     length);
+       if (unlikely(length < sizeof(struct wim_dentry_on_disk)))
                return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
-       }
 
        /* Check for buffer overrun.  */
        if (unlikely(offset + length > buf_len ||
                     offset + length < offset))
-       {
-               ERROR("Directory entry at offset %"PRIu64" and with size "
-                     "%"PRIu64" ends past the end of the metadata resource "
-                     "(size %zu)", offset, length, buf_len);
                return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
-       }
 
        /* Allocate new dentry structure, along with a preliminary inode.  */
        ret = new_dentry_with_timeless_inode(NULL, &dentry);
@@ -1300,7 +1462,6 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
        inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
        inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
        inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
-       copy_hash(inode->i_hash, disk_dentry->unnamed_stream_hash);
 
        /* I don't know what's going on here.  It seems like M$ screwed up the
         * reparse points, then put the fields in the same place and didn't
@@ -1311,15 +1472,12 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
                inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
                inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2);
                inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed);
-               /* Leave inode->i_ino at 0.  Note that this means the WIM file
-                * cannot archive hard-linked reparse points.  Such a thing
-                * doesn't really make sense anyway, although I believe it's
-                * theoretically possible to have them on NTFS.  */
+               /* Leave inode->i_ino at 0.  Note: this means that WIM cannot
+                * represent multiple hard links to a reparse point file.  */
        } else {
                inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1);
                inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
        }
-       inode->i_num_ads = le16_to_cpu(disk_dentry->num_alternate_data_streams);
 
        /* Now onto reading the names.  There are two of them: the (long) file
         * name, and the short name.  */
@@ -1328,24 +1486,16 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
        file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes);
 
        if (unlikely((short_name_nbytes & 1) | (file_name_nbytes & 1))) {
-               ERROR("Dentry name is not valid UTF-16 (odd number of bytes)!");
                ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
                goto err_free_dentry;
        }
 
        /* We now know the length of the file name and short name.  Make sure
-        * the length of the dentry is large enough to actually hold them.
-        *
-        * The calculated length here is unaligned to allow for the possibility
-        * that the dentry's length is unaligned, although this would be
-        * unexpected.  */
+        * the length of the dentry is large enough to actually hold them.  */
        calculated_size = dentry_min_len_with_names(file_name_nbytes,
                                                    short_name_nbytes);
 
        if (unlikely(length < calculated_size)) {
-               ERROR("Unexpected end of directory entry! (Expected "
-                     "at least %"PRIu64" bytes, got %"PRIu64" bytes.)",
-                     calculated_size, length);
                ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
                goto err_free_dentry;
        }
@@ -1377,39 +1527,23 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
                p += (u32)short_name_nbytes + 2;
        }
 
-       /* Read extra data at end of dentry (but before alternate data stream
-        * entries).  This may contain tagged items.  */
+       /* Read extra data at end of dentry (but before extra stream entries).
+        * This may contain tagged metadata items.  */
        ret = read_extra_data(p, &buf[offset + length], inode);
        if (ret)
                goto err_free_dentry;
 
-       /* Align the dentry length.  */
-       length = (length + 7) & ~7;
-
        offset += length;
 
-       /* Read the alternate data streams, if present.  inode->i_num_ads tells
-        * us how many they are, and they will directly follow the dentry in the
-        * metadata resource buffer.
-        *
-        * Note that each alternate data stream entry begins on an 8-byte
-        * aligned boundary, and the alternate data stream entries seem to NOT
-        * be included in the dentry->length field for some reason.  */
-       if (unlikely(inode->i_num_ads != 0)) {
-               size_t orig_bytes_remaining;
-               size_t bytes_remaining;
-
-               if (offset > buf_len) {
-                       ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
-                       goto err_free_dentry;
-               }
-               bytes_remaining = buf_len - offset;
-               orig_bytes_remaining = bytes_remaining;
-               ret = read_ads_entries(&buf[offset], inode, &bytes_remaining);
-               if (ret)
-                       goto err_free_dentry;
-               offset += (orig_bytes_remaining - bytes_remaining);
-       }
+       /* Set up the inode's collection of streams.  */
+       ret = setup_inode_streams(&buf[offset],
+                                 &buf[buf_len],
+                                 inode,
+                                 le16_to_cpu(disk_dentry->num_extra_streams),
+                                 disk_dentry->default_hash,
+                                 &offset);
+       if (ret)
+               goto err_free_dentry;
 
        *offset_p = offset;  /* Sets offset of next dentry in directory  */
        *dentry_ret = dentry;
@@ -1585,49 +1719,37 @@ err_free_dentry_tree:
        return ret;
 }
 
-/*
- * Write a WIM alternate data stream (ADS) entry to an output buffer.
- *
- * @ads_entry:
- *     The ADS entry to write.
- *
- * @hash:
- *     The hash field to use (instead of the one stored directly in the ADS
- *     entry, which isn't valid if the inode has been "resolved").
- *
- * @p:
- *     The memory location to which to write the data.
- *
- * Returns a pointer to the byte after the last byte written.
- */
 static u8 *
-write_ads_entry(const struct wim_ads_entry *ads_entry,
-               const u8 *hash, u8 * restrict p)
+write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name,
+                        const u8 * restrict hash)
 {
-       struct wim_ads_entry_on_disk *disk_ads_entry =
-                       (struct wim_ads_entry_on_disk*)p;
+       struct wim_extra_stream_entry_on_disk *disk_strm =
+                       (struct wim_extra_stream_entry_on_disk *)p;
        u8 *orig_p = p;
+       size_t name_nbytes;
 
-       disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved);
-       copy_hash(disk_ads_entry->hash, hash);
-       disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes);
-       p += sizeof(struct wim_ads_entry_on_disk);
-       if (ads_entry->stream_name_nbytes) {
-               p = mempcpy(p, ads_entry->stream_name,
-                           (u32)ads_entry->stream_name_nbytes + 2);
-       }
+       if (name == NO_STREAM_NAME)
+               name_nbytes = 0;
+       else
+               name_nbytes = utf16le_len_bytes(name);
+
+       disk_strm->reserved = 0;
+       copy_hash(disk_strm->hash, hash);
+       disk_strm->name_nbytes = cpu_to_le16(name_nbytes);
+       p += sizeof(struct wim_extra_stream_entry_on_disk);
+       if (name_nbytes != 0)
+               p = mempcpy(p, name, name_nbytes + 2);
        /* Align to 8-byte boundary */
        while ((uintptr_t)p & 7)
                *p++ = 0;
-       disk_ads_entry->length = cpu_to_le64(p - orig_p);
+       disk_strm->length = cpu_to_le64(p - orig_p);
        return p;
 }
 
 /*
  * Write a WIM dentry to an output buffer.
  *
- * This includes any alternate data stream entries that may follow the dentry
- * itself.
+ * This includes any extra stream entries that may follow the dentry itself.
  *
  * @dentry:
  *     The dentry to write.
@@ -1643,15 +1765,11 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
        const struct wim_inode *inode;
        struct wim_dentry_on_disk *disk_dentry;
        const u8 *orig_p;
-       const u8 *hash;
-       bool use_dummy_stream;
-       u16 num_ads;
 
        wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */
        orig_p = p;
 
        inode = dentry->d_inode;
-       use_dummy_stream = inode_needs_dummy_stream(inode);
        disk_dentry = (struct wim_dentry_on_disk*)p;
 
        disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
@@ -1664,11 +1782,6 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
        disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
        disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
        disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
-       if (use_dummy_stream)
-               hash = zero_hash;
-       else
-               hash = inode_stream_hash(inode, 0);
-       copy_hash(disk_dentry->unnamed_stream_hash, hash);
        if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
                disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
                disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
@@ -1679,10 +1792,7 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
                disk_dentry->nonreparse.hard_link_group_id =
                        cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
        }
-       num_ads = inode->i_num_ads;
-       if (use_dummy_stream)
-               num_ads++;
-       disk_dentry->num_alternate_data_streams = cpu_to_le16(num_ads);
+
        disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes);
        disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
        p += sizeof(struct wim_dentry_on_disk);
@@ -1702,21 +1812,79 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
        if (inode->i_extra_size) {
                /* Extra tagged items --- not usually present.  */
                p = mempcpy(p, inode->i_extra, inode->i_extra_size);
+
+               /* Align to 8-byte boundary */
                while ((uintptr_t)p & 7)
                        *p++ = 0;
        }
 
        disk_dentry->length = cpu_to_le64(p - orig_p);
 
-       if (use_dummy_stream) {
-               hash = inode_unnamed_stream_hash(inode);
-               p = write_ads_entry(&(struct wim_ads_entry){}, hash, p);
-       }
+       /* Streams  */
+
+       if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
+               const struct wim_inode_stream *efs_strm;
+               const u8 *efs_hash;
+
+               efs_strm = inode_get_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA,
+                                           NO_STREAM_NAME);
+               efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash;
+               copy_hash(disk_dentry->default_hash, efs_hash);
+               disk_dentry->num_extra_streams = cpu_to_le16(0);
+       } else {
+               /*
+                * Extra stream entries:
+                *
+                * - Use one extra stream entry for each named data stream
+                * - Use one extra stream entry for the unnamed data stream when there is either:
+                *      - a reparse point stream
+                *      - at least one named data stream (for Windows PE bug workaround)
+                * - Use one extra stream entry for the reparse point stream if there is one
+                */
+               bool have_named_data_stream = false;
+               bool have_reparse_point_stream = false;
+               u16 num_extra_streams = 0;
+               const u8 *unnamed_data_stream_hash = zero_hash;
+               const u8 *reparse_point_hash;
+               for (unsigned i = 0; i < inode->i_num_streams; i++) {
+                       const struct wim_inode_stream *strm = &inode->i_streams[i];
+                       if (strm->stream_type == STREAM_TYPE_DATA) {
+                               if (stream_is_named(strm))
+                                       have_named_data_stream = true;
+                               else
+                                       unnamed_data_stream_hash = stream_hash(strm);
+                       } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
+                               have_reparse_point_stream = true;
+                               reparse_point_hash = stream_hash(strm);
+                       }
+               }
 
-       /* Write the alternate data streams entries, if any. */
-       for (u16 i = 0; i < inode->i_num_ads; i++) {
-               hash = inode_stream_hash(inode, i + 1);
-               p = write_ads_entry(&inode->i_ads_entries[i], hash, p);
+               if (have_reparse_point_stream || have_named_data_stream) {
+
+                       copy_hash(disk_dentry->default_hash, zero_hash);
+
+                       if (have_reparse_point_stream) {
+                               p = write_extra_stream_entry(p, NO_STREAM_NAME,
+                                                            reparse_point_hash);
+                               num_extra_streams++;
+                       }
+
+                       p = write_extra_stream_entry(p, NO_STREAM_NAME,
+                                                    unnamed_data_stream_hash);
+                       num_extra_streams++;
+               } else {
+                       copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash);
+               }
+
+               for (unsigned i = 0; i < inode->i_num_streams; i++) {
+                       const struct wim_inode_stream *strm = &inode->i_streams[i];
+                       if (stream_is_named_data_stream(strm)) {
+                               p = write_extra_stream_entry(p, strm->stream_name,
+                                                            stream_hash(strm));
+                               num_extra_streams++;
+                       }
+               }
+               disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams);
        }
 
        return p;
index 5819f0e..5ff1df2 100644 (file)
@@ -136,7 +136,7 @@ varname1##_to_##varname2##_nbytes(const chartype1 *in, size_t in_nbytes,\
        bool buf_onheap;                                                \
        bufsize = (worst_case_len_expr) * sizeof(chartype2);            \
        /* Worst case length */                                         \
-       if (bufsize <= STACK_MAX) {                                     \
+       if (bufsize <= STACK_MAX) {                                     \
                buf = alloca(bufsize);                                  \
                buf_onheap = false;                                     \
        } else {                                                        \
@@ -558,7 +558,31 @@ cmp_utf16le_strings(const utf16lechar *s1, size_t n1,
        return (n1 < n2) ? -1 : 1;
 }
 
-/* Duplicate a UTF16-LE string.  The input string might not be null terminated
+/* Like cmp_utf16le_strings(), but assumes the strings are null terminated.  */
+int
+cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2,
+                     bool ignore_case)
+{
+       if (ignore_case) {
+               for (;;) {
+                       u16 c1 = upcase[le16_to_cpu(*s1)];
+                       u16 c2 = upcase[le16_to_cpu(*s2)];
+                       if (c1 != c2)
+                               return (c1 < c2) ? -1 : 1;
+                       if (c1 == 0)
+                               return 0;
+                       s1++, s2++;
+               }
+       } else {
+               while (*s1 && *s1 == *s2)
+                       s1++, s2++;
+               if (*s1 == *s2)
+                       return 0;
+               return (le16_to_cpu(*s1) < le16_to_cpu(*s2)) ? -1 : 1;
+       }
+}
+
+/* Duplicate a UTF-16LE string.  The input string might not be null terminated
  * and might be misaligned, but the returned string is guaranteed to be null
  * terminated and properly aligned.  */
 utf16lechar *
@@ -571,3 +595,32 @@ utf16le_dupz(const void *ustr, size_t usize)
        }
        return dup;
 }
+
+/* Duplicate a null-terminated UTF-16LE string.  */
+utf16lechar *
+utf16le_dup(const utf16lechar *ustr)
+{
+       const utf16lechar *p = ustr;
+       while (*p++)
+               ;
+       return memdup(ustr, (const u8 *)p - (const u8 *)ustr);
+}
+
+/* Return the length, in bytes, of a UTF-null terminated UTF-16 string,
+ * excluding the null terminator.  */
+size_t
+utf16le_len_bytes(const utf16lechar *s)
+{
+       const utf16lechar *p = s;
+       while (*p)
+               p++;
+       return (p - s) * sizeof(utf16lechar);
+}
+
+/* Return the length, in UTF-16 coding units, of a UTF-null terminated UTF-16
+ * string, excluding the null terminator.  */
+size_t
+utf16le_len_chars(const utf16lechar *s)
+{
+       return utf16le_len_bytes(s) / sizeof(utf16lechar);
+}
index bfc36bd..8d73328 100644 (file)
 #endif
 
 #include "wimlib.h"
+#include "wimlib/blob_table.h"
 #include "wimlib/error.h"
 #include "wimlib/inode.h"
-#include "wimlib/lookup_table.h"
 #include "wimlib/metadata.h"
 #include "wimlib/xml.h"
 
 static int
-lte_set_not_exported(struct wim_lookup_table_entry *lte, void *_ignore)
+blob_set_not_exported(struct blob_descriptor *blob, void *_ignore)
 {
-       lte->out_refcnt = 0;
-       lte->was_exported = 0;
+       blob->out_refcnt = 0;
+       blob->was_exported = 0;
        return 0;
 }
 
 static int
-lte_rollback_export(struct wim_lookup_table_entry *lte, void *_lookup_table)
+blob_rollback_export(struct blob_descriptor *blob, void *_blob_table)
 {
-       struct wim_lookup_table *lookup_table = _lookup_table;
+       struct blob_table *blob_table = _blob_table;
 
-       lte->refcnt -= lte->out_refcnt;
-       if (lte->was_exported) {
-               lookup_table_unlink(lookup_table, lte);
-               free_lookup_table_entry(lte);
+       blob->refcnt -= blob->out_refcnt;
+       if (blob->was_exported) {
+               blob_table_unlink(blob_table, blob);
+               free_blob_descriptor(blob);
        }
        return 0;
 }
 
 static int
-inode_export_streams(struct wim_inode *inode,
-                    struct wim_lookup_table *src_lookup_table,
-                    struct wim_lookup_table *dest_lookup_table,
-                    bool gift)
+inode_export_blobs(struct wim_inode *inode, struct blob_table *src_blob_table,
+                  struct blob_table *dest_blob_table, bool gift)
 {
        unsigned i;
        const u8 *hash;
-       struct wim_lookup_table_entry *src_lte, *dest_lte;
+       struct blob_descriptor *src_blob, *dest_blob;
 
        inode_unresolve_streams(inode);
-       for (i = 0; i <= inode->i_num_ads; i++) {
 
-               /* Retrieve SHA1 message digest of stream to export.  */
-               hash = inode_stream_hash(inode, i);
-               if (is_zero_hash(hash))  /* Empty stream?  */
+       for (i = 0; i < inode->i_num_streams; i++) {
+
+               /* Retrieve SHA-1 message digest of blob to export.  */
+               hash = stream_hash(&inode->i_streams[i]);
+               if (is_zero_hash(hash))  /* Empty blob?  */
                        continue;
 
-               /* Search for the stream (via SHA1 message digest) in the
+               /* Search for the blob (via SHA-1 message digest) in the
                 * destination WIM.  */
-               dest_lte = lookup_stream(dest_lookup_table, hash);
-               if (!dest_lte) {
-                       /* Stream not yet present in destination WIM.  Search
-                        * for it in the source WIM, then export it into the
+               dest_blob = lookup_blob(dest_blob_table, hash);
+               if (!dest_blob) {
+                       /* Blob not yet present in destination WIM.  Search for
+                        * it in the source WIM, then export it into the
                         * destination WIM.  */
-                       src_lte = lookup_stream(src_lookup_table, hash);
-                       if (!src_lte)
-                               return stream_not_found_error(inode, hash);
+                       src_blob = lookup_blob(src_blob_table, hash);
+                       if (!src_blob)
+                               return blob_not_found_error(inode, hash);
 
                        if (gift) {
-                               dest_lte = src_lte;
-                               lookup_table_unlink(src_lookup_table, src_lte);
+                               dest_blob = src_blob;
+                               blob_table_unlink(src_blob_table, src_blob);
                        } else {
-                               dest_lte = clone_lookup_table_entry(src_lte);
-                               if (!dest_lte)
+                               dest_blob = clone_blob_descriptor(src_blob);
+                               if (!dest_blob)
                                        return WIMLIB_ERR_NOMEM;
                        }
-                       dest_lte->refcnt = 0;
-                       dest_lte->out_refcnt = 0;
-                       dest_lte->was_exported = 1;
-                       lookup_table_insert(dest_lookup_table, dest_lte);
+                       dest_blob->refcnt = 0;
+                       dest_blob->out_refcnt = 0;
+                       dest_blob->was_exported = 1;
+                       blob_table_insert(dest_blob_table, dest_blob);
                }
 
-               /* Stream is present in destination WIM (either pre-existing,
+               /* Blob is present in destination WIM (either pre-existing,
                 * already exported, or just exported above).  Increment its
                 * reference count appropriately.   Note: we use 'refcnt' for
                 * the raw reference count, but 'out_refcnt' for references
                 * arising just from the export operation; this is used to roll
                 * back a failed export if needed.  */
-               dest_lte->refcnt += inode->i_nlink;
-               dest_lte->out_refcnt += inode->i_nlink;
+               dest_blob->refcnt += inode->i_nlink;
+               dest_blob->out_refcnt += inode->i_nlink;
        }
        return 0;
 }
@@ -155,16 +154,16 @@ wimlib_export_image(WIMStruct *src_wim,
        }
        orig_dest_image_count = dest_wim->hdr.image_count;
 
-       /* Stream checksums must be known before proceeding.  */
-       ret = wim_checksum_unhashed_streams(src_wim);
+       /* Blob checksums must be known before proceeding.  */
+       ret = wim_checksum_unhashed_blobs(src_wim);
        if (ret)
                return ret;
-       ret = wim_checksum_unhashed_streams(dest_wim);
+       ret = wim_checksum_unhashed_blobs(dest_wim);
        if (ret)
                return ret;
 
        /* Enable rollbacks  */
-       for_lookup_table_entry(dest_wim->lookup_table, lte_set_not_exported, NULL);
+       for_blob_in_table(dest_wim->blob_table, blob_set_not_exported, NULL);
 
        /* Export each requested image.  */
        for (src_image = start_src_image;
@@ -207,12 +206,12 @@ wimlib_export_image(WIMStruct *src_wim,
                src_imd = wim_get_current_image_metadata(src_wim);
 
                /* Iterate through inodes in the source image and export their
-                * streams into the destination WIM.  */
+                * blobs into the destination WIM.  */
                image_for_each_inode(inode, src_imd) {
-                       ret = inode_export_streams(inode,
-                                                  src_wim->lookup_table,
-                                                  dest_wim->lookup_table,
-                                                  export_flags & WIMLIB_EXPORT_FLAG_GIFT);
+                       ret = inode_export_blobs(inode,
+                                                src_wim->blob_table,
+                                                dest_wim->blob_table,
+                                                export_flags & WIMLIB_EXPORT_FLAG_GIFT);
                        if (ret)
                                goto out_rollback;
                }
@@ -259,8 +258,8 @@ wimlib_export_image(WIMStruct *src_wim,
        }
 
        if (export_flags & WIMLIB_EXPORT_FLAG_GIFT) {
-               free_lookup_table(src_wim->lookup_table);
-               src_wim->lookup_table = NULL;
+               free_blob_table(src_wim->blob_table);
+               src_wim->blob_table = NULL;
        }
        return 0;
 
@@ -275,7 +274,7 @@ out_rollback:
                put_image_metadata(dest_wim->image_metadata[
                                        --dest_wim->hdr.image_count], NULL);
        }
-       for_lookup_table_entry(dest_wim->lookup_table, lte_rollback_export,
-                              dest_wim->lookup_table);
+       for_blob_in_table(dest_wim->blob_table, blob_rollback_export,
+                         dest_wim->blob_table);
        return ret;
 }
index a5be7d1..5b1c1cc 100644 (file)
@@ -6,7 +6,7 @@
  */
 
 /*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
+ * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
  *
  * This file is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
 
 #include "wimlib/apply.h"
 #include "wimlib/assert.h"
+#include "wimlib/blob_table.h"
 #include "wimlib/dentry.h"
 #include "wimlib/encoding.h"
 #include "wimlib/endianness.h"
 #include "wimlib/error.h"
-#include "wimlib/lookup_table.h"
 #include "wimlib/metadata.h"
 #include "wimlib/pathlist.h"
 #include "wimlib/paths.h"
@@ -136,83 +136,55 @@ end_file_metadata_phase(struct apply_ctx *ctx)
        return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA);
 }
 
-/* Check whether the extraction of a dentry should be skipped completely.  */
-static bool
-dentry_is_supported(struct wim_dentry *dentry,
-                   const struct wim_features *supported_features)
-{
-       struct wim_inode *inode = dentry->d_inode;
-
-       if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
-               if (!(supported_features->reparse_points ||
-                     (inode_is_symlink(inode) &&
-                      supported_features->symlink_reparse_points)))
-                       return false;
-       }
-
-       if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
-               if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) {
-                       if (!supported_features->encrypted_directories)
-                               return false;
-               } else {
-                       if (!supported_features->encrypted_files)
-                               return false;
-               }
-       }
-
-       return true;
-}
-
-
 #define PWM_ALLOW_WIM_HDR 0x00001
 
-/* Read the header from a stream in a pipable WIM.  */
+/* Read the header for a blob in a pipable WIM.  */
 static int
-read_pwm_stream_header(WIMStruct *pwm, struct wim_lookup_table_entry *lte,
-                      struct wim_resource_spec *rspec,
-                      int flags, struct wim_header_disk *hdr_ret)
+read_pwm_blob_header(WIMStruct *pwm, struct blob_descriptor *blob,
+                    struct wim_resource_descriptor *rdesc,
+                    int flags, struct wim_header_disk *hdr_ret)
 {
        union {
-               struct pwm_stream_hdr stream_hdr;
+               struct pwm_blob_hdr blob_hdr;
                struct wim_header_disk pwm_hdr;
        } buf;
        struct wim_reshdr reshdr;
        int ret;
 
-       ret = full_read(&pwm->in_fd, &buf.stream_hdr, sizeof(buf.stream_hdr));
+       ret = full_read(&pwm->in_fd, &buf.blob_hdr, sizeof(buf.blob_hdr));
        if (ret)
                goto read_error;
 
        if ((flags & PWM_ALLOW_WIM_HDR) &&
-           le64_to_cpu(buf.stream_hdr.magic) == PWM_MAGIC)
+           le64_to_cpu(buf.blob_hdr.magic) == PWM_MAGIC)
        {
-               BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.stream_hdr));
-               ret = full_read(&pwm->in_fd, &buf.stream_hdr + 1,
-                               sizeof(buf.pwm_hdr) - sizeof(buf.stream_hdr));
+               BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.blob_hdr));
+               ret = full_read(&pwm->in_fd, &buf.blob_hdr + 1,
+                               sizeof(buf.pwm_hdr) - sizeof(buf.blob_hdr));
 
                if (ret)
                        goto read_error;
-               lte->resource_location = RESOURCE_NONEXISTENT;
+               blob->blob_location = BLOB_NONEXISTENT;
                memcpy(hdr_ret, &buf.pwm_hdr, sizeof(buf.pwm_hdr));
                return 0;
        }
 
-       if (le64_to_cpu(buf.stream_hdr.magic) != PWM_STREAM_MAGIC) {
-               ERROR("Data read on pipe is invalid (expected stream header).");
+       if (le64_to_cpu(buf.blob_hdr.magic) != PWM_BLOB_MAGIC) {
+               ERROR("Data read on pipe is invalid (expected blob header).");
                return WIMLIB_ERR_INVALID_PIPABLE_WIM;
        }
 
-       copy_hash(lte->hash, buf.stream_hdr.hash);
+       copy_hash(blob->hash, buf.blob_hdr.hash);
 
        reshdr.size_in_wim = 0;
-       reshdr.flags = le32_to_cpu(buf.stream_hdr.flags);
+       reshdr.flags = le32_to_cpu(buf.blob_hdr.flags);
        reshdr.offset_in_wim = pwm->in_fd.offset;
-       reshdr.uncompressed_size = le64_to_cpu(buf.stream_hdr.uncompressed_size);
-       wim_res_hdr_to_spec(&reshdr, pwm, rspec);
-       lte_bind_wim_resource_spec(lte, rspec);
-       lte->flags = rspec->flags;
-       lte->size = rspec->uncompressed_size;
-       lte->offset_in_res = 0;
+       reshdr.uncompressed_size = le64_to_cpu(buf.blob_hdr.uncompressed_size);
+       wim_res_hdr_to_desc(&reshdr, pwm, rdesc);
+       blob_set_is_located_in_wim_resource(blob, rdesc);
+       blob->flags = rdesc->flags;
+       blob->size = rdesc->uncompressed_size;
+       blob->offset_in_res = 0;
        return 0;
 
 read_error:
@@ -221,24 +193,24 @@ read_error:
 }
 
 static int
-load_streams_from_pipe(struct apply_ctx *ctx,
-                      const struct read_stream_list_callbacks *cbs)
+read_blobs_from_pipe(struct apply_ctx *ctx,
+                    const struct read_blob_list_callbacks *cbs)
 {
-       struct wim_lookup_table_entry *found_lte = NULL;
-       struct wim_resource_spec *rspec = NULL;
-       struct wim_lookup_table *lookup_table;
+       struct blob_descriptor *found_blob = NULL;
+       struct wim_resource_descriptor *rdesc = NULL;
+       struct blob_table *blob_table;
        int ret;
 
        ret = WIMLIB_ERR_NOMEM;
-       found_lte = new_lookup_table_entry();
-       if (!found_lte)
+       found_blob = new_blob_descriptor();
+       if (!found_blob)
                goto out;
 
-       rspec = MALLOC(sizeof(struct wim_resource_spec));
-       if (!rspec)
+       rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
+       if (!rdesc)
                goto out;
 
-       lookup_table = ctx->wim->lookup_table;
+       blob_table = ctx->wim->blob_table;
        memcpy(ctx->progress.extract.guid, ctx->wim->hdr.guid, WIM_GUID_LEN);
        ctx->progress.extract.part_number = ctx->wim->hdr.part_number;
        ctx->progress.extract.total_parts = ctx->wim->hdr.total_parts;
@@ -246,48 +218,48 @@ load_streams_from_pipe(struct apply_ctx *ctx,
        if (ret)
                goto out;
 
-       while (ctx->num_streams_remaining) {
+       while (ctx->num_blobs_remaining) {
                struct wim_header_disk pwm_hdr;
-               struct wim_lookup_table_entry *needed_lte;
+               struct blob_descriptor *needed_blob;
 
-               if (found_lte->resource_location != RESOURCE_NONEXISTENT)
-                       lte_unbind_wim_resource_spec(found_lte);
-               ret = read_pwm_stream_header(ctx->wim, found_lte, rspec,
-                                            PWM_ALLOW_WIM_HDR, &pwm_hdr);
+               if (found_blob->blob_location != BLOB_NONEXISTENT)
+                       blob_unset_is_located_in_wim_resource(found_blob);
+               ret = read_pwm_blob_header(ctx->wim, found_blob, rdesc,
+                                          PWM_ALLOW_WIM_HDR, &pwm_hdr);
                if (ret)
                        goto out;
 
-               if ((found_lte->resource_location != RESOURCE_NONEXISTENT)
-                   && !(found_lte->flags & WIM_RESHDR_FLAG_METADATA)
-                   && (needed_lte = lookup_stream(lookup_table, found_lte->hash))
-                   && (needed_lte->out_refcnt))
+               if ((found_blob->blob_location != BLOB_NONEXISTENT)
+                   && !(found_blob->flags & WIM_RESHDR_FLAG_METADATA)
+                   && (needed_blob = lookup_blob(blob_table, found_blob->hash))
+                   && (needed_blob->out_refcnt))
                {
-                       needed_lte->offset_in_res = found_lte->offset_in_res;
-                       needed_lte->flags = found_lte->flags;
-                       needed_lte->size = found_lte->size;
+                       needed_blob->offset_in_res = found_blob->offset_in_res;
+                       needed_blob->flags = found_blob->flags;
+                       needed_blob->size = found_blob->size;
 
-                       lte_unbind_wim_resource_spec(found_lte);
-                       lte_bind_wim_resource_spec(needed_lte, rspec);
+                       blob_unset_is_located_in_wim_resource(found_blob);
+                       blob_set_is_located_in_wim_resource(needed_blob, rdesc);
 
-                       ret = (*cbs->begin_stream)(needed_lte,
-                                                  cbs->begin_stream_ctx);
+                       ret = (*cbs->begin_blob)(needed_blob,
+                                                cbs->begin_blob_ctx);
                        if (ret) {
-                               lte_unbind_wim_resource_spec(needed_lte);
+                               blob_unset_is_located_in_wim_resource(needed_blob);
                                goto out;
                        }
 
-                       ret = extract_stream(needed_lte, needed_lte->size,
-                                            cbs->consume_chunk,
-                                            cbs->consume_chunk_ctx);
+                       ret = extract_blob(needed_blob, needed_blob->size,
+                                          cbs->consume_chunk,
+                                          cbs->consume_chunk_ctx);
 
-                       ret = (*cbs->end_stream)(needed_lte, ret,
-                                                cbs->end_stream_ctx);
-                       lte_unbind_wim_resource_spec(needed_lte);
+                       ret = (*cbs->end_blob)(needed_blob, ret,
+                                              cbs->end_blob_ctx);
+                       blob_unset_is_located_in_wim_resource(needed_blob);
                        if (ret)
                                goto out;
-                       ctx->num_streams_remaining--;
-               } else if (found_lte->resource_location != RESOURCE_NONEXISTENT) {
-                       ret = skip_wim_stream(found_lte);
+                       ctx->num_blobs_remaining--;
+               } else if (found_blob->blob_location != BLOB_NONEXISTENT) {
+                       ret = skip_wim_resource(found_blob->rdesc);
                        if (ret)
                                goto out;
                } else {
@@ -312,9 +284,9 @@ load_streams_from_pipe(struct apply_ctx *ctx,
        }
        ret = 0;
 out:
-       if (found_lte && found_lte->resource_location != RESOURCE_IN_WIM)
-               FREE(rspec);
-       free_lookup_table_entry(found_lte);
+       if (found_blob && found_blob->blob_location != BLOB_IN_WIM)
+               FREE(rdesc);
+       free_blob_descriptor(found_blob);
        return ret;
 }
 
@@ -358,17 +330,17 @@ retry:
 }
 
 static int
-begin_extract_stream_wrapper(struct wim_lookup_table_entry *lte, void *_ctx)
+begin_extract_blob_wrapper(struct blob_descriptor *blob, void *_ctx)
 {
        struct apply_ctx *ctx = _ctx;
 
-       ctx->cur_stream = lte;
-       ctx->cur_stream_offset = 0;
+       ctx->cur_blob = blob;
+       ctx->cur_blob_offset = 0;
 
-       if (unlikely(lte->out_refcnt > MAX_OPEN_STREAMS))
+       if (unlikely(blob->out_refcnt > MAX_OPEN_FILES))
                return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name);
        else
-               return (*ctx->saved_cbs->begin_stream)(lte, ctx->saved_cbs->begin_stream_ctx);
+               return (*ctx->saved_cbs->begin_blob)(blob, ctx->saved_cbs->begin_blob_ctx);
 }
 
 static int
@@ -378,17 +350,18 @@ extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx)
        union wimlib_progress_info *progress = &ctx->progress;
        int ret;
 
-       ctx->cur_stream_offset += size;
+       ctx->cur_blob_offset += size;
 
        if (likely(ctx->supported_features.hard_links)) {
                progress->extract.completed_bytes +=
-                       (u64)size * ctx->cur_stream->out_refcnt;
-               if (ctx->cur_stream_offset == ctx->cur_stream->size)
-                       progress->extract.completed_streams += ctx->cur_stream->out_refcnt;
+                       (u64)size * ctx->cur_blob->out_refcnt;
+               if (ctx->cur_blob_offset == ctx->cur_blob->size)
+                       progress->extract.completed_streams += ctx->cur_blob->out_refcnt;
        } else {
-               const struct stream_owner *owners = stream_owners(ctx->cur_stream);
-               for (u32 i = 0; i < ctx->cur_stream->out_refcnt; i++) {
-                       const struct wim_inode *inode = owners[i].inode;
+               const struct blob_extraction_target *targets =
+                       blob_extraction_targets(ctx->cur_blob);
+               for (u32 i = 0; i < ctx->cur_blob->out_refcnt; i++) {
+                       const struct wim_inode *inode = targets[i].inode;
                        const struct wim_dentry *dentry;
 
                        list_for_each_entry(dentry,
@@ -396,7 +369,7 @@ extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx)
                                            d_extraction_alias_node)
                        {
                                progress->extract.completed_bytes += size;
-                               if (ctx->cur_stream_offset == ctx->cur_stream->size)
+                               if (ctx->cur_blob_offset == ctx->cur_blob->size)
                                        progress->extract.completed_streams++;
                        }
                }
@@ -451,63 +424,62 @@ extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx)
 static int
 extract_from_tmpfile(const tchar *tmpfile_name, struct apply_ctx *ctx)
 {
-       struct wim_lookup_table_entry tmpfile_lte;
-       struct wim_lookup_table_entry *orig_lte = ctx->cur_stream;
-       const struct read_stream_list_callbacks *cbs = ctx->saved_cbs;
+       struct blob_descriptor tmpfile_blob;
+       struct blob_descriptor *orig_blob = ctx->cur_blob;
+       const struct read_blob_list_callbacks *cbs = ctx->saved_cbs;
        int ret;
-       const u32 orig_refcnt = orig_lte->out_refcnt;
+       const u32 orig_refcnt = orig_blob->out_refcnt;
 
-       BUILD_BUG_ON(MAX_OPEN_STREAMS < ARRAY_LEN(orig_lte->inline_stream_owners));
+       BUILD_BUG_ON(MAX_OPEN_FILES <
+                    ARRAY_LEN(orig_blob->inline_blob_extraction_targets));
 
-       struct stream_owner *owners = orig_lte->stream_owners;
+       struct blob_extraction_target *targets = orig_blob->blob_extraction_targets;
 
-       /* Copy the stream's data from the temporary file to each of its
-        * destinations.
+       /* Copy the blob's data from the temporary file to each of its targets.
         *
-        * This is executed only in the very uncommon case that a
-        * single-instance stream is being extracted to more than
-        * MAX_OPEN_STREAMS locations!  */
+        * This is executed only in the very uncommon case that a blob is being
+        * extracted to more than MAX_OPEN_FILES targets!  */
 
-       memcpy(&tmpfile_lte, orig_lte, sizeof(struct wim_lookup_table_entry));
-       tmpfile_lte.resource_location = RESOURCE_IN_FILE_ON_DISK;
-       tmpfile_lte.file_on_disk = ctx->tmpfile_name;
+       memcpy(&tmpfile_blob, orig_blob, sizeof(struct blob_descriptor));
+       tmpfile_blob.blob_location = BLOB_IN_FILE_ON_DISK;
+       tmpfile_blob.file_on_disk = ctx->tmpfile_name;
        ret = 0;
        for (u32 i = 0; i < orig_refcnt; i++) {
 
                /* Note: it usually doesn't matter whether we pass the original
-                * stream entry to callbacks provided by the extraction backend
-                * as opposed to the tmpfile stream entry, since they shouldn't
-                * actually read data from the stream other than through the
-                * read_stream_prefix() call below.  But for
+                * blob descriptor to callbacks provided by the extraction
+                * backend as opposed to the tmpfile blob descriptor, since they
+                * shouldn't actually read data from the blob other than through
+                * the read_blob_prefix() call below.  But for
                 * WIMLIB_EXTRACT_FLAG_WIMBOOT mode on Windows it does matter
-                * because it needs the original stream location in order to
-                * create the external backing reference.  */
+                * because it needs access to the original WIM resource
+                * descriptor in order to create the external backing reference.
+                */
 
-               orig_lte->out_refcnt = 1;
-               orig_lte->inline_stream_owners[0] = owners[i];
+               orig_blob->out_refcnt = 1;
+               orig_blob->inline_blob_extraction_targets[0] = targets[i];
 
-               ret = (*cbs->begin_stream)(orig_lte, cbs->begin_stream_ctx);
+               ret = (*cbs->begin_blob)(orig_blob, cbs->begin_blob_ctx);
                if (ret)
                        break;
 
                /* Extra SHA-1 isn't necessary here, but it shouldn't hurt as
                 * this case is very rare anyway.  */
-               ret = extract_stream(&tmpfile_lte, tmpfile_lte.size,
-                                    cbs->consume_chunk,
-                                    cbs->consume_chunk_ctx);
+               ret = extract_blob(&tmpfile_blob, tmpfile_blob.size,
+                                  cbs->consume_chunk,
+                                  cbs->consume_chunk_ctx);
 
-               ret = (*cbs->end_stream)(orig_lte, ret, cbs->end_stream_ctx);
+               ret = (*cbs->end_blob)(orig_blob, ret, cbs->end_blob_ctx);
                if (ret)
                        break;
        }
-       FREE(owners);
-       orig_lte->out_refcnt = 0;
+       FREE(targets);
+       orig_blob->out_refcnt = 0;
        return ret;
 }
 
 static int
-end_extract_stream_wrapper(struct wim_lookup_table_entry *stream,
-                          int status, void *_ctx)
+end_extract_blob_wrapper(struct blob_descriptor *blob, int status, void *_ctx)
 {
        struct apply_ctx *ctx = _ctx;
 
@@ -520,49 +492,49 @@ end_extract_stream_wrapper(struct wim_lookup_table_entry *stream,
                FREE(ctx->tmpfile_name);
                return status;
        } else {
-               return (*ctx->saved_cbs->end_stream)(stream, status,
-                                                    ctx->saved_cbs->end_stream_ctx);
+               return (*ctx->saved_cbs->end_blob)(blob, status,
+                                                  ctx->saved_cbs->end_blob_ctx);
        }
 }
 
 /*
- * Read the list of single-instance streams to extract and feed their data into
- * the specified callback functions.
+ * Read the list of blobs to extract and feed their data into the specified
+ * callback functions.
  *
- * This handles checksumming each stream.
+ * This handles checksumming each blob.
  *
  * This also handles sending WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS.
  *
  * This also works if the WIM is being read from a pipe, whereas attempting to
- * read streams directly (e.g. with read_full_stream_into_buf()) will not.
+ * read blobs directly (e.g. with read_full_blob_into_buf()) will not.
  *
- * This also will split up streams that will need to be extracted to more than
- * MAX_OPEN_STREAMS locations, as measured by the 'out_refcnt' of each stream.
+ * This also will split up blobs that will need to be extracted to more than
+ * MAX_OPEN_FILES locations, as measured by the 'out_refcnt' of each blob.
  * Therefore, the apply_operations implementation need not worry about running
  * out of file descriptors, unless it might open more than one file descriptor
  * per nominal destination (e.g. Win32 currently might because the destination
  * file system might not support hard links).
  */
 int
-extract_stream_list(struct apply_ctx *ctx,
-                   const struct read_stream_list_callbacks *cbs)
+extract_blob_list(struct apply_ctx *ctx,
+                 const struct read_blob_list_callbacks *cbs)
 {
-       struct read_stream_list_callbacks wrapper_cbs = {
-               .begin_stream      = begin_extract_stream_wrapper,
-               .begin_stream_ctx  = ctx,
+       struct read_blob_list_callbacks wrapper_cbs = {
+               .begin_blob        = begin_extract_blob_wrapper,
+               .begin_blob_ctx    = ctx,
                .consume_chunk     = extract_chunk_wrapper,
                .consume_chunk_ctx = ctx,
-               .end_stream        = end_extract_stream_wrapper,
-               .end_stream_ctx    = ctx,
+               .end_blob          = end_extract_blob_wrapper,
+               .end_blob_ctx      = ctx,
        };
        ctx->saved_cbs = cbs;
        if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
-               return load_streams_from_pipe(ctx, &wrapper_cbs);
+               return read_blobs_from_pipe(ctx, &wrapper_cbs);
        } else {
-               return read_stream_list(&ctx->stream_list,
-                                       offsetof(struct wim_lookup_table_entry,
-                                                extraction_list),
-                                       &wrapper_cbs, VERIFY_STREAM_HASHES);
+               return read_blob_list(&ctx->blob_list,
+                                     offsetof(struct blob_descriptor,
+                                              extraction_list),
+                                     &wrapper_cbs, VERIFY_BLOB_HASHES);
        }
 }
 
@@ -573,38 +545,39 @@ extract_stream_list(struct apply_ctx *ctx,
  * unnamed data stream only.  */
 static int
 extract_dentry_to_stdout(struct wim_dentry *dentry,
-                        const struct wim_lookup_table *lookup_table)
+                        const struct blob_table *blob_table)
 {
        struct wim_inode *inode = dentry->d_inode;
-       struct wim_lookup_table_entry *lte;
+       struct blob_descriptor *blob;
        struct filedes _stdout;
 
        if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT |
-                                  FILE_ATTRIBUTE_DIRECTORY))
+                                  FILE_ATTRIBUTE_DIRECTORY |
+                                  FILE_ATTRIBUTE_ENCRYPTED))
        {
                ERROR("\"%"TS"\" is not a regular file and therefore cannot be "
                      "extracted to standard output", dentry_full_path(dentry));
                return WIMLIB_ERR_NOT_A_REGULAR_FILE;
        }
 
-       lte = inode_unnamed_lte(inode, lookup_table);
-       if (!lte) {
-               const u8 *hash = inode_unnamed_stream_hash(inode);
+       blob = inode_get_blob_for_unnamed_data_stream(inode, blob_table);
+       if (!blob) {
+               const u8 *hash = inode_get_hash_of_unnamed_data_stream(inode);
                if (!is_zero_hash(hash))
-                       return stream_not_found_error(inode, hash);
+                       return blob_not_found_error(inode, hash);
                return 0;
        }
 
        filedes_init(&_stdout, STDOUT_FILENO);
-       return extract_full_stream_to_fd(lte, &_stdout);
+       return extract_full_blob_to_fd(blob, &_stdout);
 }
 
 static int
 extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries,
-                          const struct wim_lookup_table *lookup_table)
+                          const struct blob_table *blob_table)
 {
        for (size_t i = 0; i < num_dentries; i++) {
-               int ret = extract_dentry_to_stdout(dentries[i], lookup_table);
+               int ret = extract_dentry_to_stdout(dentries[i], blob_table);
                if (ret)
                        return ret;
        }
@@ -752,13 +725,13 @@ destroy_dentry_list(struct list_head *dentry_list)
 }
 
 static void
-destroy_stream_list(struct list_head *stream_list)
+destroy_blob_list(struct list_head *blob_list)
 {
-       struct wim_lookup_table_entry *lte;
+       struct blob_descriptor *blob;
 
-       list_for_each_entry(lte, stream_list, extraction_list)
-               if (lte->out_refcnt > ARRAY_LEN(lte->inline_stream_owners))
-                       FREE(lte->stream_owners);
+       list_for_each_entry(blob, blob_list, extraction_list)
+               if (blob->out_refcnt > ARRAY_LEN(blob->inline_blob_extraction_targets))
+                       FREE(blob->blob_extraction_targets);
 }
 
 #ifdef __WIN32__
@@ -814,9 +787,6 @@ dentry_calculate_extraction_name(struct wim_dentry *dentry,
 {
        int ret;
 
-       if (unlikely(!dentry_is_supported(dentry, &ctx->supported_features)))
-               goto skip_dentry;
-
        if (dentry_is_root(dentry))
                return 0;
 
@@ -965,34 +935,35 @@ dentry_list_calculate_extraction_names(struct list_head *dentry_list,
 
 static int
 dentry_resolve_streams(struct wim_dentry *dentry, int extract_flags,
-                      struct wim_lookup_table *lookup_table)
+                      struct blob_table *blob_table)
 {
        struct wim_inode *inode = dentry->d_inode;
-       struct wim_lookup_table_entry *lte;
+       struct blob_descriptor *blob;
        int ret;
        bool force = false;
 
-       /* Special case:  when extracting from a pipe, the WIM lookup table is
+       /* Special case:  when extracting from a pipe, the WIM blob table is
         * initially empty, so "resolving" an inode's streams is initially not
-        * possible.  However, we still need to keep track of which streams,
-        * identified by SHA1 message digests, need to be extracted, so we
-        * "resolve" the inode's streams anyway by allocating new entries.  */
+        * possible.  However, we still need to keep track of which blobs,
+        * identified by SHA-1 message digests, need to be extracted, so we
+        * "resolve" the inode's streams anyway by allocating a 'struct
+        * blob_descriptor' for each one.  */
        if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE)
                force = true;
-       ret = inode_resolve_streams(inode, lookup_table, force);
+       ret = inode_resolve_streams(inode, blob_table, force);
        if (ret)
                return ret;
-       for (u32 i = 0; i <= inode->i_num_ads; i++) {
-               lte = inode_stream_lte_resolved(inode, i);
-               if (lte)
-                       lte->out_refcnt = 0;
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               blob = stream_blob_resolved(&inode->i_streams[i]);
+               if (blob)
+                       blob->out_refcnt = 0;
        }
        return 0;
 }
 
 /*
  * For each dentry to be extracted, resolve all streams in the corresponding
- * inode and set 'out_refcnt' in each to 0.
+ * inode and set 'out_refcnt' in all referenced blob_descriptors to 0.
  *
  * Possible error codes: WIMLIB_ERR_RESOURCE_NOT_FOUND, WIMLIB_ERR_NOMEM.
  */
@@ -1006,7 +977,7 @@ dentry_list_resolve_streams(struct list_head *dentry_list,
        list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
                ret = dentry_resolve_streams(dentry,
                                             ctx->extract_flags,
-                                            ctx->wim->lookup_table);
+                                            ctx->wim->blob_table);
                if (ret)
                        return ret;
        }
@@ -1014,142 +985,155 @@ dentry_list_resolve_streams(struct list_head *dentry_list,
 }
 
 static int
-ref_stream(struct wim_lookup_table_entry *lte, unsigned stream_idx,
-          struct wim_dentry *dentry, struct apply_ctx *ctx)
+ref_stream(struct wim_inode_stream *strm, struct wim_dentry *dentry,
+          struct apply_ctx *ctx)
 {
        struct wim_inode *inode = dentry->d_inode;
-       struct stream_owner *stream_owners;
+       struct blob_descriptor *blob = stream_blob_resolved(strm);
+       struct blob_extraction_target *targets;
 
-       if (!lte)
+       if (!blob)
                return 0;
 
-       /* Tally the size only for each extraction of the stream (not hard
-        * links).  */
+       /* Tally the size only for each actual extraction of the stream (not
+        * additional hard links to the inode).  */
        if (inode->i_visited && ctx->supported_features.hard_links)
                return 0;
 
-       ctx->progress.extract.total_bytes += lte->size;
+       ctx->progress.extract.total_bytes += blob->size;
        ctx->progress.extract.total_streams++;
 
        if (inode->i_visited)
                return 0;
 
-       /* Add stream to the dentry_list only one time, even if it's going
-        * to be extracted to multiple inodes.  */
-       if (lte->out_refcnt == 0) {
-               list_add_tail(&lte->extraction_list, &ctx->stream_list);
-               ctx->num_streams_remaining++;
+       /* Add each blob to 'ctx->blob_list' only one time, regardless of how
+        * many extraction targets it will have.  */
+       if (blob->out_refcnt == 0) {
+               list_add_tail(&blob->extraction_list, &ctx->blob_list);
+               ctx->num_blobs_remaining++;
        }
 
-       /* If inode not yet been visited, append it to the stream_owners array.  */
-       if (lte->out_refcnt < ARRAY_LEN(lte->inline_stream_owners)) {
-               stream_owners = lte->inline_stream_owners;
+       /* Set this stream as an extraction target of 'blob'.  */
+
+       if (blob->out_refcnt < ARRAY_LEN(blob->inline_blob_extraction_targets)) {
+               targets = blob->inline_blob_extraction_targets;
        } else {
-               struct stream_owner *prev_stream_owners;
-               size_t alloc_stream_owners;
+               struct blob_extraction_target *prev_targets;
+               size_t alloc_blob_extraction_targets;
 
-               if (lte->out_refcnt == ARRAY_LEN(lte->inline_stream_owners)) {
-                       prev_stream_owners = NULL;
-                       alloc_stream_owners = ARRAY_LEN(lte->inline_stream_owners);
+               if (blob->out_refcnt == ARRAY_LEN(blob->inline_blob_extraction_targets)) {
+                       prev_targets = NULL;
+                       alloc_blob_extraction_targets = ARRAY_LEN(blob->inline_blob_extraction_targets);
                } else {
-                       prev_stream_owners = lte->stream_owners;
-                       alloc_stream_owners = lte->alloc_stream_owners;
+                       prev_targets = blob->blob_extraction_targets;
+                       alloc_blob_extraction_targets = blob->alloc_blob_extraction_targets;
                }
 
-               if (lte->out_refcnt == alloc_stream_owners) {
-                       alloc_stream_owners *= 2;
-                       stream_owners = REALLOC(prev_stream_owners,
-                                              alloc_stream_owners *
-                                               sizeof(stream_owners[0]));
-                       if (!stream_owners)
+               if (blob->out_refcnt == alloc_blob_extraction_targets) {
+                       alloc_blob_extraction_targets *= 2;
+                       targets = REALLOC(prev_targets,
+                                         alloc_blob_extraction_targets *
+                                         sizeof(targets[0]));
+                       if (!targets)
                                return WIMLIB_ERR_NOMEM;
-                       if (!prev_stream_owners) {
-                               memcpy(stream_owners,
-                                      lte->inline_stream_owners,
-                                      sizeof(lte->inline_stream_owners));
+                       if (!prev_targets) {
+                               memcpy(targets,
+                                      blob->inline_blob_extraction_targets,
+                                      sizeof(blob->inline_blob_extraction_targets));
                        }
-                       lte->stream_owners = stream_owners;
-                       lte->alloc_stream_owners = alloc_stream_owners;
+                       blob->blob_extraction_targets = targets;
+                       blob->alloc_blob_extraction_targets = alloc_blob_extraction_targets;
                }
-               stream_owners = lte->stream_owners;
-       }
-       stream_owners[lte->out_refcnt].inode = inode;
-       if (stream_idx == 0) {
-               stream_owners[lte->out_refcnt].stream_name = NULL;
-       } else {
-               stream_owners[lte->out_refcnt].stream_name =
-                       inode->i_ads_entries[stream_idx - 1].stream_name;
+               targets = blob->blob_extraction_targets;
        }
-       lte->out_refcnt++;
+       targets[blob->out_refcnt].inode = inode;
+       targets[blob->out_refcnt].stream = strm;
+       blob->out_refcnt++;
        return 0;
 }
 
 static int
-ref_unnamed_stream(struct wim_dentry *dentry, struct apply_ctx *ctx)
+ref_stream_if_needed(struct wim_dentry *dentry, struct wim_inode *inode,
+                    struct wim_inode_stream *strm, struct apply_ctx *ctx)
 {
-       struct wim_inode *inode = dentry->d_inode;
-       int ret;
-       unsigned stream_idx;
-       struct wim_lookup_table_entry *stream;
-
-       if (unlikely(ctx->apply_ops->will_externally_back)) {
-               ret = (*ctx->apply_ops->will_externally_back)(dentry, ctx);
-               if (ret >= 0) {
-                       if (ret) /* Error */
-                               return ret;
-                       /* Will externally back */
-                       return 0;
+       bool need_stream = false;
+       switch (strm->stream_type) {
+       case STREAM_TYPE_DATA:
+               if (stream_is_named(strm)) {
+                       /* Named data stream  */
+                       if (ctx->supported_features.named_data_streams)
+                               need_stream = true;
+               } else if (!(inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
+                                                   FILE_ATTRIBUTE_ENCRYPTED))
+                          && !(inode_is_symlink(inode)
+                               && !ctx->supported_features.reparse_points
+                               && ctx->supported_features.symlink_reparse_points))
+               {
+                       /*
+                        * Unnamed data stream.  Skip if any of the following is true:
+                        *
+                        * - file is a directory
+                        * - file is encrypted
+                        * - backend needs to create the file as UNIX symlink
+                        * - backend will extract the stream as externally backed
+                        */
+                       if (ctx->apply_ops->will_externally_back) {
+                               int ret = (*ctx->apply_ops->will_externally_back)(dentry, ctx);
+                               if (ret > 0) /* Error?  */
+                                       return ret;
+                               if (ret < 0) /* Won't externally back?  */
+                                       need_stream = true;
+                       } else {
+                               need_stream = true;
+                       }
+               }
+               break;
+       case STREAM_TYPE_REPARSE_POINT:
+               wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
+               if (ctx->supported_features.reparse_points ||
+                   (inode_is_symlink(inode) &&
+                    ctx->supported_features.symlink_reparse_points))
+                       need_stream = true;
+               break;
+       case STREAM_TYPE_EFSRPC_RAW_DATA:
+               wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED);
+               if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) {
+                       if (ctx->supported_features.encrypted_directories)
+                               need_stream = true;
+               } else {
+                       if (ctx->supported_features.encrypted_files)
+                               need_stream = true;
                }
-               /* Won't externally back */
+               break;
        }
-
-       stream = inode_unnamed_stream_resolved(inode, &stream_idx);
-       return ref_stream(stream, stream_idx, dentry, ctx);
+       if (need_stream)
+               return ref_stream(strm, dentry, ctx);
+       return 0;
 }
 
 static int
 dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx)
 {
        struct wim_inode *inode = dentry->d_inode;
-       int ret;
-
-       /* The unnamed data stream will almost always be extracted, but there
-        * exist cases in which it won't be.  */
-       ret = ref_unnamed_stream(dentry, ctx);
-       if (ret)
-               return ret;
-
-       /* Named data streams will be extracted only if supported in the current
-        * extraction mode and volume, and to avoid complications, if not doing
-        * a linked extraction.  */
-       if (ctx->supported_features.named_data_streams) {
-               for (unsigned i = 0; i < inode->i_num_ads; i++) {
-                       if (!inode->i_ads_entries[i].stream_name_nbytes)
-                               continue;
-                       ret = ref_stream(inode->i_ads_entries[i].lte, i + 1,
-                                        dentry, ctx);
-                       if (ret)
-                               return ret;
-               }
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               int ret = ref_stream_if_needed(dentry, inode,
+                                              &inode->i_streams[i], ctx);
+               if (ret)
+                       return ret;
        }
        inode->i_visited = 1;
        return 0;
 }
 
 /*
- * For each dentry to be extracted, iterate through the data streams of the
- * corresponding inode.  For each such stream that is not to be ignored due to
- * the supported features or extraction flags, add it to the list of streams to
- * be extracted (ctx->stream_list) if not already done so.
+ * Given a list of dentries to be extracted, build the list of blobs that need
+ * to be extracted, and for each blob determine the streams to which that blob
+ * will be extracted.
  *
- * Also builds a mapping from each stream to the inodes referencing it.
- *
- * This also initializes the extract progress info with byte and stream
+ * This also initializes the extract progress info with byte and blob
  * information.
  *
  * ctx->supported_features must be filled in.
- *
- * Possible error codes: WIMLIB_ERR_NOMEM.
  */
 static int
 dentry_list_ref_streams(struct list_head *dentry_list, struct apply_ctx *ctx)
@@ -1207,7 +1191,7 @@ inode_tally_features(const struct wim_inode *inode,
                features->not_context_indexed_files++;
        if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE)
                features->sparse_files++;
-       if (inode_has_named_stream(inode))
+       if (inode_has_named_data_stream(inode))
                features->named_data_streams++;
        if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
                features->reparse_points++;
@@ -1258,6 +1242,18 @@ do_feature_check(const struct wim_features *required_features,
                 const struct wim_features *supported_features,
                 int extract_flags)
 {
+       /* Encrypted files.  */
+       if (required_features->encrypted_files &&
+           !supported_features->encrypted_files)
+               WARNING("Ignoring EFS-encrypted data of %lu files",
+                       required_features->encrypted_files);
+
+       /* Named data streams.  */
+       if (required_features->named_data_streams &&
+           !supported_features->named_data_streams)
+               WARNING("Ignoring named data streams of %lu files",
+                       required_features->named_data_streams);
+
        /* File attributes.  */
        if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES)) {
                /* Note: Don't bother the user about FILE_ATTRIBUTE_ARCHIVE.
@@ -1295,18 +1291,6 @@ do_feature_check(const struct wim_features *required_features,
                                required_features->encrypted_directories);
        }
 
-       /* Encrypted files.  */
-       if (required_features->encrypted_files &&
-           !supported_features->encrypted_files)
-               WARNING("Ignoring %lu encrypted files",
-                       required_features->encrypted_files);
-
-       /* Named data streams.  */
-       if (required_features->named_data_streams &&
-           (!supported_features->named_data_streams))
-               WARNING("Ignoring named data streams of %lu files",
-                       required_features->named_data_streams);
-
        /* Hard links.  */
        if (required_features->hard_links && !supported_features->hard_links)
                WARNING("Extracting %lu hard links as independent files",
@@ -1326,12 +1310,11 @@ do_feature_check(const struct wim_features *required_features,
        {
                if (supported_features->symlink_reparse_points) {
                        if (required_features->other_reparse_points) {
-                               WARNING("Ignoring %lu non-symlink/junction "
-                                       "reparse point files",
+                               WARNING("Ignoring reparse data of %lu non-symlink/junction files",
                                        required_features->other_reparse_points);
                        }
                } else {
-                       WARNING("Ignoring %lu reparse point files",
+                       WARNING("Ignoring reparse data of %lu files",
                                required_features->reparse_points);
                }
        }
@@ -1415,7 +1398,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
 
        if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
                ret = extract_dentries_to_stdout(trees, num_trees,
-                                                wim->lookup_table);
+                                                wim->blob_table);
                goto out;
        }
 
@@ -1453,7 +1436,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
                                                                         wim->current_image);
                ctx->progress.extract.target = target;
        }
-       INIT_LIST_HEAD(&ctx->stream_list);
+       INIT_LIST_HEAD(&ctx->blob_list);
        filedes_invalidate(&ctx->tmpfile_fd);
        ctx->apply_ops = ops;
 
@@ -1494,8 +1477,8 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
        if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
                /* When extracting from a pipe, the number of bytes of data to
                 * extract can't be determined in the normal way (examining the
-                * lookup table), since at this point all we have is a set of
-                * SHA1 message digests of streams that need to be extracted.
+                * blob table), since at this point all we have is a set of
+                * SHA-1 message digests of blobs that need to be extracted.
                 * However, we can get a reasonably accurate estimate by taking
                 * <TOTALBYTES> from the corresponding <IMAGE> in the WIM XML
                 * data.  This does assume that a full image is being extracted,
@@ -1538,7 +1521,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees,
                                       WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END :
                                       WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END));
 out_cleanup:
-       destroy_stream_list(&ctx->stream_list);
+       destroy_blob_list(&ctx->blob_list);
        destroy_dentry_list(&dentry_list);
        FREE(ctx);
 out:
@@ -1683,7 +1666,7 @@ do_wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target,
        if (ret)
                return ret;
 
-       ret = wim_checksum_unhashed_streams(wim);
+       ret = wim_checksum_unhashed_blobs(wim);
        if (ret)
                return ret;
 
@@ -1905,8 +1888,8 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd,
 
        /* Read the WIM header from the pipe and get a WIMStruct to represent
         * the pipable WIM.  Caveats:  Unlike getting a WIMStruct with
-        * wimlib_open_wim(), getting a WIMStruct in this way will result in
-        * an empty lookup table, no XML data read, and no filename set.  */
+        * wimlib_open_wim(), getting a WIMStruct in this way will result in an
+        * empty blob table, no XML data read, and no filename set.  */
        ret = open_wim_as_WIMStruct(&pipe_fd, WIMLIB_OPEN_FLAG_FROM_PIPE, &pwm,
                                    progfunc, progctx);
        if (ret)
@@ -1938,21 +1921,20 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd,
         * write_pipable_wim() for more details about the format of pipable
         * WIMs.)  */
        {
-               struct wim_lookup_table_entry xml_lte;
-               struct wim_resource_spec xml_rspec;
-               ret = read_pwm_stream_header(pwm, &xml_lte, &xml_rspec, 0, NULL);
+               struct blob_descriptor xml_blob;
+               struct wim_resource_descriptor xml_rdesc;
+               ret = read_pwm_blob_header(pwm, &xml_blob, &xml_rdesc, 0, NULL);
                if (ret)
                        goto out_wimlib_free;
 
-               if (!(xml_lte.flags & WIM_RESHDR_FLAG_METADATA))
+               if (!(xml_blob.flags & WIM_RESHDR_FLAG_METADATA))
                {
-                       ERROR("Expected XML data, but found non-metadata "
-                             "stream.");
+                       ERROR("Expected XML data, but found non-metadata resource.");
                        ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
                        goto out_wimlib_free;
                }
 
-               wim_res_spec_to_hdr(&xml_rspec, &pwm->hdr.xml_data_reshdr);
+               wim_res_desc_to_hdr(&xml_rdesc, &pwm->hdr.xml_data_reshdr);
 
                ret = read_wim_xml_data(pwm);
                if (ret)
@@ -1991,33 +1973,33 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd,
 
        /* Load the needed metadata resource.  */
        for (i = 1; i <= pwm->hdr.image_count; i++) {
-               struct wim_lookup_table_entry *metadata_lte;
+               struct blob_descriptor *metadata_blob;
                struct wim_image_metadata *imd;
-               struct wim_resource_spec *metadata_rspec;
+               struct wim_resource_descriptor *metadata_rdesc;
 
-               metadata_lte = new_lookup_table_entry();
-               if (metadata_lte == NULL) {
+               metadata_blob = new_blob_descriptor();
+               if (metadata_blob == NULL) {
                        ret = WIMLIB_ERR_NOMEM;
                        goto out_wimlib_free;
                }
-               metadata_rspec = MALLOC(sizeof(struct wim_resource_spec));
-               if (metadata_rspec == NULL) {
+               metadata_rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
+               if (metadata_rdesc == NULL) {
                        ret = WIMLIB_ERR_NOMEM;
-                       free_lookup_table_entry(metadata_lte);
+                       free_blob_descriptor(metadata_blob);
                        goto out_wimlib_free;
                }
 
-               ret = read_pwm_stream_header(pwm, metadata_lte, metadata_rspec, 0, NULL);
+               ret = read_pwm_blob_header(pwm, metadata_blob, metadata_rdesc, 0, NULL);
                imd = pwm->image_metadata[i - 1];
-               imd->metadata_lte = metadata_lte;
+               imd->metadata_blob = metadata_blob;
                if (ret) {
-                       FREE(metadata_rspec);
+                       FREE(metadata_rdesc);
                        goto out_wimlib_free;
                }
 
-               if (!(metadata_lte->flags & WIM_RESHDR_FLAG_METADATA)) {
+               if (!(metadata_blob->flags & WIM_RESHDR_FLAG_METADATA)) {
                        ERROR("Expected metadata resource, but found "
-                             "non-metadata stream.");
+                             "non-metadata resource.");
                        ret = WIMLIB_ERR_INVALID_PIPABLE_WIM;
                        goto out_wimlib_free;
                }
@@ -2032,7 +2014,7 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd,
                } else {
                        /* Metadata resource is not for the image being
                         * extracted.  Skip over it.  */
-                       ret = skip_wim_stream(metadata_lte);
+                       ret = skip_wim_resource(metadata_rdesc);
                        if (ret)
                                goto out_wimlib_free;
                }
index 634f4da..8208899 100644 (file)
@@ -143,7 +143,7 @@ read_wim_header(WIMStruct *wim, struct wim_header *hdr)
                return WIMLIB_ERR_IMAGE_COUNT;
        }
 
-       get_wim_reshdr(&disk_hdr.lookup_table_reshdr, &hdr->lookup_table_reshdr);
+       get_wim_reshdr(&disk_hdr.blob_table_reshdr, &hdr->blob_table_reshdr);
        get_wim_reshdr(&disk_hdr.xml_data_reshdr, &hdr->xml_data_reshdr);
        get_wim_reshdr(&disk_hdr.boot_metadata_reshdr, &hdr->boot_metadata_reshdr);
        hdr->boot_idx = le32_to_cpu(disk_hdr.boot_idx);
@@ -182,7 +182,7 @@ write_wim_header_at_offset(const struct wim_header *hdr, struct filedes *out_fd,
        disk_hdr.part_number = cpu_to_le16(hdr->part_number);
        disk_hdr.total_parts = cpu_to_le16(hdr->total_parts);
        disk_hdr.image_count = cpu_to_le32(hdr->image_count);
-       put_wim_reshdr(&hdr->lookup_table_reshdr, &disk_hdr.lookup_table_reshdr);
+       put_wim_reshdr(&hdr->blob_table_reshdr, &disk_hdr.blob_table_reshdr);
        put_wim_reshdr(&hdr->xml_data_reshdr, &disk_hdr.xml_data_reshdr);
        put_wim_reshdr(&hdr->boot_metadata_reshdr, &disk_hdr.boot_metadata_reshdr);
        disk_hdr.boot_idx = cpu_to_le32(hdr->boot_idx);
@@ -313,14 +313,14 @@ wimlib_print_header(const WIMStruct *wim)
        tprintf(T("Part Number                 = %hu\n"), hdr->part_number);
        tprintf(T("Total Parts                 = %hu\n"), hdr->total_parts);
        tprintf(T("Image Count                 = %u\n"), hdr->image_count);
-       tprintf(T("Lookup Table Size           = %"PRIu64"\n"),
-                               (u64)hdr->lookup_table_reshdr.size_in_wim);
-       tprintf(T("Lookup Table Flags          = 0x%hhx\n"),
-                               (u8)hdr->lookup_table_reshdr.flags);
-       tprintf(T("Lookup Table Offset         = %"PRIu64"\n"),
-                               hdr->lookup_table_reshdr.offset_in_wim);
-       tprintf(T("Lookup Table Original_size  = %"PRIu64"\n"),
-                               hdr->lookup_table_reshdr.uncompressed_size);
+       tprintf(T("Blob Table Size           = %"PRIu64"\n"),
+                               (u64)hdr->blob_table_reshdr.size_in_wim);
+       tprintf(T("Blob Table Flags          = 0x%hhx\n"),
+                               (u8)hdr->blob_table_reshdr.flags);
+       tprintf(T("Blob Table Offset         = %"PRIu64"\n"),
+                               hdr->blob_table_reshdr.offset_in_wim);
+       tprintf(T("Blob Table Original_size  = %"PRIu64"\n"),
+                               hdr->blob_table_reshdr.uncompressed_size);
        tprintf(T("XML Data Size               = %"PRIu64"\n"),
                                (u64)hdr->xml_data_reshdr.size_in_wim);
        tprintf(T("XML Data Flags              = 0x%hhx\n"),
index 6384b11..1114a15 100644 (file)
@@ -8,7 +8,7 @@
  */
 
 /*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
+ * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
  *
  * This file is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
 #include <errno.h>
 
 #include "wimlib/assert.h"
+#include "wimlib/blob_table.h"
 #include "wimlib/dentry.h"
 #include "wimlib/encoding.h"
-#include "wimlib/endianness.h"
 #include "wimlib/error.h"
 #include "wimlib/inode.h"
-#include "wimlib/lookup_table.h"
-#include "wimlib/security.h"
 #include "wimlib/timestamp.h"
 
+/*
+ * The 'stream_name' field of unnamed streams always points to this array, which
+ * is an empty UTF-16 string.
+ */
+const utf16lechar NO_STREAM_NAME[1];
+
 /* Allocate a new inode.  Set the timestamps to the current time.  */
 struct wim_inode *
 new_inode(void)
@@ -62,31 +66,28 @@ new_timeless_inode(void)
        if (inode) {
                inode->i_security_id = -1;
                /*inode->i_nlink = 0;*/
-               inode->i_next_stream_id = 1;
                inode->i_not_rpfixed = 1;
-               inode->i_canonical_streams = 1;
                INIT_LIST_HEAD(&inode->i_list);
                INIT_LIST_HEAD(&inode->i_dentry);
        }
        return inode;
 }
 
-/* Free memory allocated within an alternate data stream entry.  */
-static void
-destroy_ads_entry(struct wim_ads_entry *ads_entry)
+static inline void
+destroy_stream(struct wim_inode_stream *strm)
 {
-       FREE(ads_entry->stream_name);
+       if (strm->stream_name != NO_STREAM_NAME)
+               FREE(strm->stream_name);
 }
 
 static void
 free_inode(struct wim_inode *inode)
 {
-       if (unlikely(inode->i_ads_entries)) {
-               for (unsigned i = 0; i < inode->i_num_ads; i++)
-                       destroy_ads_entry(&inode->i_ads_entries[i]);
-               FREE(inode->i_ads_entries);
-       }
-       if (unlikely(inode->i_extra))
+       for (unsigned i = 0; i < inode->i_num_streams; i++)
+               destroy_stream(&inode->i_streams[i]);
+       if (inode->i_streams != inode->i_embedded_streams)
+               FREE(inode->i_streams);
+       if (inode->i_extra)
                FREE(inode->i_extra);
        /* HACK: This may instead delete the inode from i_list, but hlist_del()
         * behaves the same as list_del(). */
@@ -155,346 +156,285 @@ inode_dec_num_opened_fds(struct wim_inode *inode)
 #endif
 
 /*
- * Returns the alternate data stream entry belonging to @inode that has the
- * stream name @stream_name, or NULL if the inode has no alternate data stream
- * with that name.
+ * Retrieve a stream of an inode.
  *
- * If @p stream_name is the empty string, NULL is returned --- that is, this
- * function will not return "unnamed" alternate data stream entries.
+ * @inode
+ *     The inode from which the stream is desired
+ * @stream_type
+ *     The type of the stream desired
+ * @stream_name
+ *     The name of the stream desired as a null-terminated UTF-16LE string, or
+ *     NO_STREAM_NAME if an unnamed stream is desired
  *
- * If NULL is returned, errno is set.
+ * Returns a pointer to the stream if found, otherwise NULL.
  */
-struct wim_ads_entry *
-inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name)
+struct wim_inode_stream *
+inode_get_stream(const struct wim_inode *inode, int stream_type,
+                const utf16lechar *stream_name)
 {
-       int ret;
-       const utf16lechar *stream_name_utf16le;
-       size_t stream_name_utf16le_nbytes;
-       unsigned i;
-       struct wim_ads_entry *result;
-
-       if (inode->i_num_ads == 0) {
-               errno = ENOENT;
-               return NULL;
-       }
-
-       if (stream_name[0] == T('\0')) {
-               errno = ENOENT;
-               return NULL;
+       if (stream_name == NO_STREAM_NAME)  /* Optimization  */
+               return inode_get_unnamed_stream(inode, stream_type);
+
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               struct wim_inode_stream *strm = &inode->i_streams[i];
+               if (strm->stream_type == stream_type &&
+                   !cmp_utf16le_strings_z(strm->stream_name, stream_name,
+                                          default_ignore_case))
+               {
+                       return strm;
+               }
        }
+       return NULL;
+}
 
-       ret = tstr_get_utf16le_and_len(stream_name, &stream_name_utf16le,
-                                      &stream_name_utf16le_nbytes);
-       if (ret)
-               return NULL;
-
-       i = 0;
-       result = NULL;
-       do {
-               if (!cmp_utf16le_strings(inode->i_ads_entries[i].stream_name,
-                                        inode->i_ads_entries[i].stream_name_nbytes /
-                                               sizeof(utf16lechar),
-                                        stream_name_utf16le,
-                                        stream_name_utf16le_nbytes /
-                                               sizeof(utf16lechar),
-                                        default_ignore_case))
+/*
+ * This is equivalent to inode_get_stream(inode, stream_type, NO_STREAM_NAME),
+ * but this optimizes for the unnamed case by not doing full string comparisons.
+ */
+struct wim_inode_stream *
+inode_get_unnamed_stream(const struct wim_inode *inode, int stream_type)
+{
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               struct wim_inode_stream *strm = &inode->i_streams[i];
+               if (strm->stream_type == stream_type &&
+                   strm->stream_name == NO_STREAM_NAME)
                {
-                       result = &inode->i_ads_entries[i];
-                       break;
+                       return strm;
                }
-       } while (++i != inode->i_num_ads);
-
-       tstr_put_utf16le(stream_name_utf16le);
-
-       if (!result)
-               errno = ENOENT;
-       return result;
+       }
+       return NULL;
 }
 
-static struct wim_ads_entry *
-do_inode_add_ads(struct wim_inode *inode,
-                utf16lechar *stream_name, size_t stream_name_nbytes)
+/*
+ * Add a new stream to the specified inode.
+ *
+ * @inode
+ *     The inode to which to add the stream
+ * @stream_type
+ *     The type of the stream being added
+ * @stream_name
+ *     The name of the stream being added as a null-terminated UTF-16LE string,
+ *     or NO_STREAM_NAME if the stream is unnamed
+ * @blob
+ *     The blob that the new stream will initially reference, or NULL
+ *
+ * Returns a pointer to the new stream, or NULL with errno set if it could not
+ * be added.
+ */
+struct wim_inode_stream *
+inode_add_stream(struct wim_inode *inode, int stream_type,
+                const utf16lechar *stream_name, struct blob_descriptor *blob)
 {
-       unsigned num_ads;
-       struct wim_ads_entry *ads_entries;
-       struct wim_ads_entry *new_entry;
-
-       if (unlikely(inode->i_num_ads >= 0xfffe)) {
-               ERROR("File \"%"TS"\" has too many alternate data streams!",
+       if (inode->i_num_streams >= 0xFFFF) {
+               ERROR("Inode has too many streams! Path=\"%"TS"\"",
                      inode_first_full_path(inode));
                errno = EFBIG;
                return NULL;
        }
-       num_ads = inode->i_num_ads + 1;
-       ads_entries = REALLOC(inode->i_ads_entries,
-                             num_ads * sizeof(inode->i_ads_entries[0]));
-       if (!ads_entries)
-               return NULL;
-
-       inode->i_ads_entries = ads_entries;
-
-       new_entry = &inode->i_ads_entries[num_ads - 1];
-
-       memset(new_entry, 0, sizeof(struct wim_ads_entry));
-       new_entry->stream_name = stream_name;
-       new_entry->stream_name_nbytes = stream_name_nbytes;
-       new_entry->stream_id = inode->i_next_stream_id++;
-       inode->i_num_ads = num_ads;
-       return new_entry;
-}
 
-/*
- * Add an alternate data stream entry to a WIM inode (UTF-16LE version).  On
- * success, returns a pointer to the new entry.  Note that this pointer might
- * become invalid if another ADS entry is added to the inode.  On failure,
- * returns NULL and sets errno.
- */
-struct wim_ads_entry *
-inode_add_ads_utf16le(struct wim_inode *inode,
-                     const utf16lechar *stream_name, size_t stream_name_nbytes)
-{
-       utf16lechar *dup = NULL;
-       struct wim_ads_entry *result;
+       struct wim_inode_stream *streams;
+       struct wim_inode_stream *new_strm;
 
-       if (stream_name_nbytes) {
-               dup = utf16le_dupz(stream_name, stream_name_nbytes);
-               if (!dup)
+       if (inode->i_streams == inode->i_embedded_streams) {
+               if (inode->i_num_streams < ARRAY_LEN(inode->i_embedded_streams)) {
+                       streams = inode->i_embedded_streams;
+               } else {
+                       streams = MALLOC((inode->i_num_streams + 1) *
+                                               sizeof(inode->i_streams[0]));
+                       if (!streams)
+                               return NULL;
+                       memcpy(streams, inode->i_streams,
+                              (inode->i_num_streams *
+                                       sizeof(inode->i_streams[0])));
+                       inode->i_streams = streams;
+               }
+       } else {
+               streams = REALLOC(inode->i_streams,
+                                 (inode->i_num_streams + 1) *
+                                       sizeof(inode->i_streams[0]));
+               if (!streams)
                        return NULL;
+               inode->i_streams = streams;
        }
+       new_strm = &streams[inode->i_num_streams];
+
+       memset(new_strm, 0, sizeof(*new_strm));
+
+       new_strm->stream_type = stream_type;
+       if (!*stream_name) {
+               /* Unnamed stream  */
+               new_strm->stream_name = (utf16lechar *)NO_STREAM_NAME;
+       } else {
+               /* Named stream  */
+               new_strm->stream_name = utf16le_dup(stream_name);
+               if (!new_strm->stream_name)
+                       return NULL;
+       }
+       new_strm->stream_id = inode->i_next_stream_id++;
 
-       result = do_inode_add_ads(inode, dup, stream_name_nbytes);
-       if (!result)
-               FREE(dup);
-       return result;
-}
+       stream_set_blob(new_strm, blob);
 
-/*
- * Add an alternate data stream entry to a WIM inode (tchar version).  On
- * success, returns a pointer to the new entry.  Note that this pointer might
- * become invalid if another ADS entry is added to the inode.  On failure,
- * returns NULL and sets errno.
- */
-struct wim_ads_entry *
-inode_add_ads(struct wim_inode *inode, const tchar *stream_name)
-{
-       utf16lechar *stream_name_utf16le = NULL;
-       size_t stream_name_utf16le_nbytes = 0;
-       struct wim_ads_entry *result;
-
-       if (stream_name && *stream_name)
-               if (tstr_to_utf16le(stream_name,
-                                   tstrlen(stream_name) * sizeof(tchar),
-                                   &stream_name_utf16le,
-                                   &stream_name_utf16le_nbytes))
-                       return NULL;
+       inode->i_num_streams++;
 
-       result = do_inode_add_ads(inode, stream_name_utf16le,
-                                 stream_name_utf16le_nbytes);
-       if (!result)
-               FREE(stream_name_utf16le);
-       return result;
+       return new_strm;
 }
 
 /*
- * Add an data alternate stream entry to a WIM inode, where the contents of the
- * new stream are specified in a data buffer.  The inode must be resolved.
+ * Create a new blob descriptor for the specified data buffer or use an existing
+ * blob descriptor in @blob_table for an identical blob, then add a stream of
+ * the specified type and name to the specified inode and set it to initially
+ * reference the blob.
+ *
+ * @inode
+ *     The inode to which to add the stream
+ * @stream_type
+ *     The type of the stream being added
+ * @stream_name
+ *     The name of the stream being added as a null-terminated UTF-16LE string,
+ *     or NO_STREAM_NAME if the stream is unnamed
+ * @data
+ *     The uncompressed data of the blob
+ * @size
+ *     The size, in bytes, of the blob data
+ * @blob_table
+ *     Pointer to the blob table in which the blob needs to be indexed.
  *
- * On success, returns a pointer to the new alternate data stream entry.  Note
- * that this pointer might become invalid if another ADS entry is added to the
- * inode.  On failure, returns NULL and sets errno.
+ * Returns a pointer to the new stream if successfully added, otherwise NULL
+ * with errno set.
  */
-struct wim_ads_entry *
-inode_add_ads_with_data(struct wim_inode *inode, const tchar *name,
-                       const void *value, size_t size,
-                       struct wim_lookup_table *lookup_table)
+struct wim_inode_stream *
+inode_add_stream_with_data(struct wim_inode *inode,
+                          int stream_type, const utf16lechar *stream_name,
+                          const void *data, size_t size,
+                          struct blob_table *blob_table)
 {
-       struct wim_ads_entry *new_entry;
+       struct blob_descriptor *blob;
+       struct wim_inode_stream *strm;
 
-       wimlib_assert(inode->i_resolved);
-
-       new_entry = inode_add_ads(inode, name);
-       if (unlikely(!new_entry))
-               return NULL;
-
-       new_entry->lte = new_stream_from_data_buffer(value, size, lookup_table);
-       if (unlikely(!new_entry->lte)) {
-               inode_remove_ads(inode, new_entry, NULL);
+       blob = new_blob_from_data_buffer(data, size, blob_table);
+       if (!blob)
                return NULL;
-       }
-       return new_entry;
+       strm = inode_add_stream(inode, stream_type, stream_name, blob);
+       if (!strm)
+               blob_decrement_refcnt(blob, blob_table);
+       return strm;
 }
 
-/* Remove an alternate data stream from a WIM inode.  */
+/*
+ * Remove a stream from the specified inode and release the reference to the
+ * blob descriptor, if any.
+ */
 void
-inode_remove_ads(struct wim_inode *inode, struct wim_ads_entry *entry,
-                struct wim_lookup_table *lookup_table)
+inode_remove_stream(struct wim_inode *inode, struct wim_inode_stream *strm,
+                   struct blob_table *blob_table)
 {
-       struct wim_lookup_table_entry *lte;
-       unsigned idx = entry - inode->i_ads_entries;
+       struct blob_descriptor *blob;
+       unsigned idx = strm - inode->i_streams;
 
-       wimlib_assert(idx < inode->i_num_ads);
-       wimlib_assert(inode->i_resolved);
+       wimlib_assert(idx < inode->i_num_streams);
 
-       lte = entry->lte;
-       if (lte)
-               lte_decrement_refcnt(lte, lookup_table);
+       blob = stream_blob(strm, blob_table);
+       if (blob)
+               blob_decrement_refcnt(blob, blob_table);
 
-       destroy_ads_entry(entry);
+       destroy_stream(strm);
 
-       memmove(&inode->i_ads_entries[idx],
-               &inode->i_ads_entries[idx + 1],
-               (inode->i_num_ads - idx - 1) * sizeof(inode->i_ads_entries[0]));
-       inode->i_num_ads--;
+       memmove(&inode->i_streams[idx],
+               &inode->i_streams[idx + 1],
+               (inode->i_num_streams - idx - 1) * sizeof(inode->i_streams[0]));
+       inode->i_num_streams--;
 }
 
-/* Return true iff the specified inode has at least one named data stream.  */
+/* Returns true iff the specified inode has at least one named data stream.  */
 bool
-inode_has_named_stream(const struct wim_inode *inode)
+inode_has_named_data_stream(const struct wim_inode *inode)
 {
-       for (unsigned i = 0; i < inode->i_num_ads; i++)
-               if (inode->i_ads_entries[i].stream_name_nbytes)
+       for (unsigned i = 0; i < inode->i_num_streams; i++)
+               if (stream_is_named_data_stream(&inode->i_streams[i]))
                        return true;
        return false;
 }
 
-/* Set the unnamed stream of a WIM inode, given a data buffer containing the
- * stream contents.  The inode must be resolved and cannot already have an
- * unnamed stream.  */
-int
-inode_set_unnamed_stream(struct wim_inode *inode, const void *data, size_t len,
-                        struct wim_lookup_table *lookup_table)
-{
-       wimlib_assert(inode->i_resolved);
-       wimlib_assert(!inode->i_lte);
-
-       inode->i_lte = new_stream_from_data_buffer(data, len, lookup_table);
-       if (!inode->i_lte)
-               return WIMLIB_ERR_NOMEM;
-       return 0;
-}
-
 /*
- * Resolve an inode's single-instance streams.
+ * Resolve an inode's streams.
  *
- * This takes each SHA-1 message digest stored in the inode or one of its ADS
- * entries and replaces it with a pointer directly to the appropriate 'struct
- * wim_lookup_table_entry' currently inserted into @table to represent the
- * single-instance stream having that SHA-1 message digest.
+ * For each stream, this replaces the SHA-1 message digest of the blob data with
+ * a pointer to the 'struct blob_descriptor' for the blob.  Blob descriptors are
+ * looked up in @table.
  *
  * If @force is %false:
- *     If any of the needed single-instance streams do not exist in @table,
- *     return WIMLIB_ERR_RESOURCE_NOT_FOUND and leave the inode unmodified.
+ *     If any of the needed blobs do not exist in @table, return
+ *     WIMLIB_ERR_RESOURCE_NOT_FOUND and leave the inode unmodified.
  * If @force is %true:
- *     If any of the needed single-instance streams do not exist in @table,
- *     allocate new entries for them and insert them into @table.  This does
- *     not, of course, cause these streams to magically exist, but this is
+ *     If any of the needed blobs do not exist in @table, allocate new blob
+ *     descriptors for them and insert them into @table.  This does not, of
+ *     course, cause the data of these blobs to magically exist, but this is
  *     needed by the code for extraction from a pipe.
  *
- * If the inode is already resolved, this function does nothing.
- *
  * Returns 0 on success; WIMLIB_ERR_NOMEM if out of memory; or
- * WIMLIB_ERR_RESOURCE_NOT_FOUND if @force is %false and at least one
- * single-instance stream referenced by the inode was missing.
+ * WIMLIB_ERR_RESOURCE_NOT_FOUND if @force is %false and at least one blob
+ * referenced by the inode was missing.
  */
 int
-inode_resolve_streams(struct wim_inode *inode, struct wim_lookup_table *table,
+inode_resolve_streams(struct wim_inode *inode, struct blob_table *table,
                      bool force)
 {
-       const u8 *hash;
-       struct wim_lookup_table_entry *lte, *ads_lte;
-
-       if (inode->i_resolved)
-               return 0;
-
-       struct wim_lookup_table_entry *ads_ltes[inode->i_num_ads];
-
-       /* Resolve the default data stream */
-       lte = NULL;
-       hash = inode->i_hash;
-       if (!is_zero_hash(hash)) {
-               lte = lookup_stream(table, hash);
-               if (!lte) {
-                       if (force) {
-                               lte = new_lookup_table_entry();
-                               if (!lte)
-                                       return WIMLIB_ERR_NOMEM;
-                               copy_hash(lte->hash, hash);
-                               lookup_table_insert(table, lte);
-                       } else {
-                               goto stream_not_found;
-                       }
-               }
-       }
+       struct blob_descriptor *blobs[inode->i_num_streams];
+
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+
+               if (inode->i_streams[i].stream_resolved)
+                       continue;
 
-       /* Resolve the alternate data streams */
-       for (unsigned i = 0; i < inode->i_num_ads; i++) {
-               struct wim_ads_entry *cur_entry;
+               const u8 *hash = stream_hash(&inode->i_streams[i]);
+               struct blob_descriptor *blob = NULL;
 
-               ads_lte = NULL;
-               cur_entry = &inode->i_ads_entries[i];
-               hash = cur_entry->hash;
                if (!is_zero_hash(hash)) {
-                       ads_lte = lookup_stream(table, hash);
-                       if (!ads_lte) {
-                               if (force) {
-                                       ads_lte = new_lookup_table_entry();
-                                       if (!ads_lte)
-                                               return WIMLIB_ERR_NOMEM;
-                                       copy_hash(ads_lte->hash, hash);
-                                       lookup_table_insert(table, ads_lte);
-                               } else {
-                                       goto stream_not_found;
-                               }
+                       blob = lookup_blob(table, hash);
+                       if (!blob) {
+                               if (!force)
+                                       return blob_not_found_error(inode, hash);
+                               blob = new_blob_descriptor();
+                               if (!blob)
+                                       return WIMLIB_ERR_NOMEM;
+                               copy_hash(blob->hash, hash);
+                               blob_table_insert(table, blob);
                        }
                }
-               ads_ltes[i] = ads_lte;
+               blobs[i] = blob;
        }
-       inode->i_lte = lte;
-       for (unsigned i = 0; i < inode->i_num_ads; i++)
-               inode->i_ads_entries[i].lte = ads_ltes[i];
-       inode->i_resolved = 1;
-       return 0;
 
-stream_not_found:
-       return stream_not_found_error(inode, hash);
+       for (unsigned i = 0; i < inode->i_num_streams; i++)
+               if (!inode->i_streams[i].stream_resolved)
+                       stream_set_blob(&inode->i_streams[i], blobs[i]);
+       return 0;
 }
 
-/*
- * Undo the effects of inode_resolve_streams().
- *
- * If the inode is not resolved, this function does nothing.
- */
+/* Undo the effects of inode_resolve_streams().  */
 void
 inode_unresolve_streams(struct wim_inode *inode)
 {
-       if (!inode->i_resolved)
-               return;
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
 
-       if (inode->i_lte)
-               copy_hash(inode->i_hash, inode->i_lte->hash);
-       else
-               zero_out_hash(inode->i_hash);
-
-       for (unsigned i = 0; i < inode->i_num_ads; i++) {
-               if (inode->i_ads_entries[i].lte)
-                       copy_hash(inode->i_ads_entries[i].hash,
-                                 inode->i_ads_entries[i].lte->hash);
-               else
-                       zero_out_hash(inode->i_ads_entries[i].hash);
+               if (!inode->i_streams[i].stream_resolved)
+                       continue;
+
+               copy_hash(inode->i_streams[i]._stream_hash,
+                         stream_hash(&inode->i_streams[i]));
+               inode->i_streams[i].stream_resolved = 0;
        }
-       inode->i_resolved = 0;
 }
 
 int
-stream_not_found_error(const struct wim_inode *inode, const u8 *hash)
+blob_not_found_error(const struct wim_inode *inode, const u8 *hash)
 {
        if (wimlib_print_errors) {
                tchar hashstr[SHA1_HASH_SIZE * 2 + 1];
 
                sprint_hash(hash, hashstr);
 
-               ERROR("\"%"TS"\": stream not found\n"
-                     "        SHA-1 message digest of missing stream:\n"
+               ERROR("\"%"TS"\": blob not found\n"
+                     "        SHA-1 message digest of missing blob:\n"
                      "        %"TS"",
                      inode_first_full_path(inode), hashstr);
        }
@@ -502,334 +442,131 @@ stream_not_found_error(const struct wim_inode *inode, const u8 *hash)
 }
 
 /*
- * Return the lookup table entry for the specified stream of the inode, or NULL
- * if the specified stream is empty or not available.
- *
- * stream_idx = 0: default data stream
- * stream_idx > 0: alternate data stream
+ * Return the blob descriptor for the specified stream, or NULL if the blob for
+ * the stream is empty or not available.
  */
-struct wim_lookup_table_entry *
-inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx,
-                const struct wim_lookup_table *table)
+struct blob_descriptor *
+stream_blob(const struct wim_inode_stream *strm, const struct blob_table *table)
 {
-       if (inode->i_resolved)
-               return inode_stream_lte_resolved(inode, stream_idx);
-       if (stream_idx == 0)
-               return lookup_stream(table, inode->i_hash);
-       return lookup_stream(table, inode->i_ads_entries[stream_idx - 1].hash);
+       if (strm->stream_resolved)
+               return strm->_stream_blob;
+       else
+               return lookup_blob(table, strm->_stream_hash);
 }
 
-/*
- * Return the lookup table entry for the unnamed data stream of a *resolved*
- * inode, or NULL if the inode's unnamed data stream is empty.  Also return the
- * 0-based index of the unnamed data stream in *stream_idx_ret.
- */
-struct wim_lookup_table_entry *
-inode_unnamed_stream_resolved(const struct wim_inode *inode,
-                             unsigned *stream_idx_ret)
+/* Return the SHA-1 message digest of the data of the specified stream, or a
+ * void SHA-1 of all zeroes if the specified stream is empty.   */
+const u8 *
+stream_hash(const struct wim_inode_stream *strm)
 {
-       wimlib_assert(inode->i_resolved);
-
-       *stream_idx_ret = 0;
-       if (likely(inode->i_lte))
-               return inode->i_lte;
-
-       for (unsigned i = 0; i < inode->i_num_ads; i++) {
-               if (inode->i_ads_entries[i].stream_name_nbytes == 0 &&
-                   inode->i_ads_entries[i].lte)
-               {
-                       *stream_idx_ret = i + 1;
-                       return inode->i_ads_entries[i].lte;
-               }
-       }
-       return NULL;
+       if (strm->stream_resolved)
+               return strm->_stream_blob ? strm->_stream_blob->hash : zero_hash;
+       else
+               return strm->_stream_hash;
 }
 
 /*
- * Return the lookup table entry for the unnamed data stream of an inode, or
- * NULL if the inode's unnamed data stream is empty or not available.
- *
- * Note: this is complicated by the fact that WIMGAPI may put the unnamed data
- * stream in an alternate data stream entry rather than in the dentry itself.
+ * Return the blob descriptor for the unnamed data stream of the inode, or NULL
+ * if the inode does not have an unnamed data stream, the blob for the inode's
+ * unnamed data stream is empty, or the blob for the inode's unnamed data stream
+ * is not available in @blob_table.
  */
-struct wim_lookup_table_entry *
-inode_unnamed_lte(const struct wim_inode *inode,
-                 const struct wim_lookup_table *table)
+struct blob_descriptor *
+inode_get_blob_for_unnamed_data_stream(const struct wim_inode *inode,
+                                      const struct blob_table *blob_table)
 {
-       struct wim_lookup_table_entry *lte;
-
-       if (inode->i_resolved)
-               return inode_unnamed_lte_resolved(inode);
+       const struct wim_inode_stream *strm;
 
-       lte = lookup_stream(table, inode->i_hash);
-       if (likely(lte))
-               return lte;
+       strm = inode_get_unnamed_stream(inode, STREAM_TYPE_DATA);
+       if (!strm)
+               return NULL;
 
-       for (unsigned i = 0; i < inode->i_num_ads; i++) {
-               if (inode->i_ads_entries[i].stream_name_nbytes)
-                       continue;
-               lte = lookup_stream(table, inode->i_ads_entries[i].hash);
-               if (lte)
-                       return lte;
-       }
-       return NULL;
+       return stream_blob(strm, blob_table);
 }
 
-/* Return the SHA-1 message digest of the specified stream of the inode, or a
- * void SHA-1 of all zeroes if the specified stream is empty.   */
-const u8 *
-inode_stream_hash(const struct wim_inode *inode, unsigned stream_idx)
+/* Like inode_get_blob_for_unnamed_data_stream(), but assumes the unnamed data
+ * stream is resolved.  */
+struct blob_descriptor *
+inode_get_blob_for_unnamed_data_stream_resolved(const struct wim_inode *inode)
 {
-       if (inode->i_resolved) {
-               struct wim_lookup_table_entry *lte;
+       const struct wim_inode_stream *strm;
 
-               lte = inode_stream_lte_resolved(inode, stream_idx);
-               if (lte)
-               &nbs