From: Eric Biggers Date: Tue, 17 Mar 2015 03:17:15 +0000 (-0500) Subject: Stream and blob updates X-Git-Tag: v1.8.1~71 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=3de1ec66f778edda19865482d685bc6f4e17faf7 Stream and blob updates - Rename "lookup table entry" to "blob descriptor" - Rename "lookup table" to "blob table" - Use single array for all an inode's streams - Explicitly annotate each stream with its type - Account for fact that EFSRPC raw data includes all data streams - Other cleanups --- diff --git a/Makefile.am b/Makefile.am index 84139fce..23ccd855 100644 --- a/Makefile.am +++ b/Makefile.am @@ -36,6 +36,7 @@ wimlib.pc: config.status libwim_la_SOURCES = \ src/add_image.c \ src/avl_tree.c \ + src/blob_table.c \ src/capture_common.c \ src/compress.c \ src/compress_common.c \ @@ -59,7 +60,6 @@ libwim_la_SOURCES = \ src/iterate_dir.c \ src/join.c \ src/lcpit_matchfinder.c \ - src/lookup_table.c \ src/lzms_common.c \ src/lzms_compress.c \ src/lzms_decompress.c \ diff --git a/doc/man1/wimlib-imagex-info.1 b/doc/man1/wimlib-imagex-info.1 index cc6e3f9d..a952a2f6 100644 --- a/doc/man1/wimlib-imagex-info.1 +++ b/doc/man1/wimlib-imagex-info.1 @@ -52,8 +52,9 @@ byte-order mark. \fB--header\fR Shows detailed information from the WIM header. .TP -\fB--lookup-table\fR -Prints all the entries in the stream lookup table of the WIM. +\fB--blobs\fR +Prints information about all the blobs ("file data") in the WIM. A WIM file +stores only one copy of each unique blob. .TP \fB--xml\fR Prints the raw XML data from the WIM. Note: the XML data will be encoded using diff --git a/doc/man1/wimlib-imagex-verify.1 b/doc/man1/wimlib-imagex-verify.1 index 68a66589..a889829f 100644 --- a/doc/man1/wimlib-imagex-verify.1 +++ b/doc/man1/wimlib-imagex-verify.1 @@ -12,7 +12,7 @@ Specifically, this command performs the following verifications on the WIM archive: .IP \[bu] 4 Verify that the WIM file can be successfully opened, which involves parsing the -header, lookup table, and XML data. +header, blob table, and XML data. .IP \[bu] If the WIM archive contains an integrity table, verify the integrity of the entire WIM archive. Otherwise, print a warning. diff --git a/include/wimlib.h b/include/wimlib.h index 8c904c53..df0ec28f 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -1449,7 +1449,11 @@ struct wimlib_resource_entry { * solid resource in the WIM. */ uint64_t raw_resource_compressed_size; - uint64_t reserved[2]; + /** If @p packed is 1, then this will specify the uncompressed size of + * the solid resource in the WIM. */ + uint64_t raw_resource_uncompressed_size; + + uint64_t reserved[1]; }; /** diff --git a/include/wimlib/apply.h b/include/wimlib/apply.h index 54850fec..3d7873e7 100644 --- a/include/wimlib/apply.h +++ b/include/wimlib/apply.h @@ -31,8 +31,8 @@ struct wim_features { unsigned long case_sensitive_filenames; }; -struct wim_lookup_table_entry; -struct read_stream_list_callbacks; +struct blob_descriptor; +struct read_blob_list_callbacks; struct apply_operations; struct wim_dentry; @@ -67,20 +67,20 @@ struct apply_ctx { const struct apply_operations *apply_ops; u64 next_progress; unsigned long invalid_sequence; - unsigned long num_streams_remaining; - struct list_head stream_list; - const struct read_stream_list_callbacks *saved_cbs; - struct wim_lookup_table_entry *cur_stream; - u64 cur_stream_offset; + unsigned long num_blobs_remaining; + struct list_head blob_list; + const struct read_blob_list_callbacks *saved_cbs; + struct blob_descriptor *cur_blob; + u64 cur_blob_offset; struct filedes tmpfile_fd; tchar *tmpfile_name; unsigned int count_until_file_progress; }; /* Maximum number of UNIX file descriptors, NTFS attributes, or Windows file - * handles that can be opened simultaneously to extract a single-instance - * stream to multiple destinations. */ -#define MAX_OPEN_STREAMS 512 + * handles that can be opened simultaneously to extract a blob to multiple + * destinations. */ +#define MAX_OPEN_FILES 512 static inline int extract_progress(struct apply_ctx *ctx, enum wimlib_progress_msg msg) @@ -108,14 +108,14 @@ start_file_structure_phase(struct apply_ctx *ctx, uint64_t end_file_count); extern int start_file_metadata_phase(struct apply_ctx *ctx, uint64_t end_file_count); -/* Report that a file was created, prior to stream extraction. */ +/* Report that a file was created, prior to blob extraction. */ static inline int report_file_created(struct apply_ctx *ctx) { return maybe_do_file_progress(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE); } -/* Report that file metadata was applied, after stream extraction. */ +/* Report that file metadata was applied, after blob extraction. */ static inline int report_file_metadata_applied(struct apply_ctx *ctx) { @@ -140,8 +140,8 @@ report_apply_error(struct apply_ctx *ctx, int error_code, const tchar *path) struct wim_dentry, d_extraction_alias_node) extern int -extract_stream_list(struct apply_ctx *ctx, - const struct read_stream_list_callbacks *cbs); +extract_blob_list(struct apply_ctx *ctx, + const struct read_blob_list_callbacks *cbs); /* * Represents an extraction backend. @@ -205,9 +205,9 @@ struct apply_operations { * dentries of that inode being extracted. This will be a (possibly * nonproper) subset of the 'd_inode->i_dentry' list. * - * The streams required to be extracted will already be prepared in - * 'apply_ctx'. The extraction backend should call - * extract_stream_list() to extract them. + * The blobs required to be extracted will already be prepared in + * 'apply_ctx'. The extraction backend should call extract_blob_list() + * to extract them. * * The will_extract_dentry() utility function, given an arbitrary dentry * in the WIM image (which may not be in the extraction list), can be @@ -221,7 +221,7 @@ struct apply_operations { * Query whether the unnamed data stream of the specified file will be * extracted as "externally backed". If so, the extraction backend is * assumed to handle this separately, and the common extraction code - * will not register a usage of that stream. + * will not register a usage of the unnamed data stream's blob. * * This routine is optional. * diff --git a/include/wimlib/blob_table.h b/include/wimlib/blob_table.h new file mode 100644 index 00000000..c07acfad --- /dev/null +++ b/include/wimlib/blob_table.h @@ -0,0 +1,400 @@ +#ifndef _WIMLIB_BLOB_TABLE_H +#define _WIMLIB_BLOB_TABLE_H + +#include "wimlib/list.h" +#include "wimlib/resource.h" +#include "wimlib/sha1.h" +#include "wimlib/types.h" + +/* An enumerated type that identifies where a blob's data is located. */ +enum blob_location { + + /* The blob's data does not exist. This is a temporary state only. */ + BLOB_NONEXISTENT = 0, + + /* The blob's data is located in a WIM resource identified by the + * `struct wim_resource_descriptor' pointed to by @rdesc. + * @offset_in_res identifies the offset at which this particular blob + * begins in the uncompressed data of the resource. */ + BLOB_IN_WIM, + + /* The blob's data is available as the contents of the file named by + * @file_on_disk. */ + BLOB_IN_FILE_ON_DISK, + + /* The blob's data is available as the contents of the in-memory buffer + * pointed to by @attached_buffer. */ + BLOB_IN_ATTACHED_BUFFER, + +#ifdef WITH_FUSE + /* The blob's data is available as the contents of the file with name + * @staging_file_name relative to the open directory file descriptor + * @staging_dir_fd. */ + BLOB_IN_STAGING_FILE, +#endif + +#ifdef WITH_NTFS_3G + /* The blob's data is available as the contents of an NTFS attribute + * accessible through libntfs-3g. The attribute is identified by + * volume, path to an inode, attribute name, and attribute type. + * @ntfs_loc points to a structure containing this information. */ + BLOB_IN_NTFS_VOLUME, +#endif + +#ifdef __WIN32__ + /* Windows only: the blob's data is available as the contents of the + * data stream named by @file_on_disk. @file_on_disk is an NT namespace + * path that may be longer than the Win32-level MAX_PATH. Furthermore, + * the stream may require "backup semantics" to access. */ + BLOB_IN_WINNT_FILE_ON_DISK, + + /* Windows only: the blob's data is available as the raw encrypted data + * of the external file named by @file_on_disk. @file_on_disk is a + * Win32 namespace path. */ + BLOB_WIN32_ENCRYPTED, +#endif +}; + +/* A "blob target" is a stream, and the inode to which that stream belongs, to + * which a blob needs to be extracted as part of an extraction operation. Since + * blobs are single-instanced, a blob may have multiple targets. */ +struct blob_extraction_target { + struct wim_inode *inode; + struct wim_inode_stream *stream; +}; + +/* + * Descriptor for a blob, which is a known length sequence of binary data. + * + * Within a WIM file, blobs are single instanced and are identified by SHA-1 + * message digest. + */ +struct blob_descriptor { + + /* List node for a hash bucket of the blob table */ + struct hlist_node hash_list; + + /* Uncompressed size of this blob */ + u64 size; + + /* One of the `enum blob_location' values documented above. */ + u32 blob_location : 4; + + /* Blob flags (WIM_RESHDR_FLAG_*) */ + u32 flags : 8; + + /* 1 iff the SHA-1 message digest of this blob is unknown. */ + u32 unhashed : 1; + + /* Temporary fields used when writing blobs; set as documented for + * prepare_blob_list_for_write(). */ + u32 unique_size : 1; + u32 will_be_in_output_wim : 1; + + /* Set to 1 if this blob represents a metadata resource that has been + * changed. In such cases, the hash cannot be used to verify the data + * if the metadata resource is read again. (This could be avoided if we + * used separate fields for input/output checksum, but most blobs + * wouldn't need this.) */ + u32 dont_check_metadata_hash : 1; + + u32 may_send_done_with_file : 1; + + /* Only used by wimlib_export_image() */ + u32 was_exported : 1; + + union { + /* + * For unhashed == 0: 'hash' is the SHA-1 message digest of the + * blob's data. 'hash_short' allows accessing just a prefix of + * the SHA-1 message digest, which is useful for getting a "hash + * code" for hash table lookup/insertion. + */ + u8 hash[SHA1_HASH_SIZE]; + size_t hash_short; + + /* For unhashed == 1: these variables make it possible to find + * the stream that references this blob. There can be at most + * one such reference, since duplicate blobs can only be joined + * after they have been hashed. */ + struct { + struct wim_inode *back_inode; + u32 back_stream_id; + }; + }; + + /* Number of times this blob is referenced by file streams in WIM + * images. See blob_decrement_refcnt() for information about the + * limitations of this field. */ + u32 refcnt; + + /* + * When a WIM file is written, this is set to the number of references + * (from file streams) to this blob in the output WIM file. + * + * During extraction, this is set to the number of targets to which this + * blob is being extracted. + * + * During image export, this is set to the number of references of this + * blob that originated from the source WIM. + * + * When mounting a WIM image read-write, this is set to the number of + * extra references to this blob preemptively taken to allow later + * saving the modified image as a new image and leaving the original + * image alone. + */ + u32 out_refcnt; + +#ifdef WITH_FUSE + /* Number of open file descriptors to this blob during a FUSE mount of + * the containing image. */ + u16 num_opened_fds; +#endif + + /* Specification of where this blob's data is located. Which member of + * this union is valid is determined by the @blob_location field. */ + union { + /* BLOB_IN_WIM */ + struct { + struct wim_resource_descriptor *rdesc; + u64 offset_in_res; + }; + + /* BLOB_IN_FILE_ON_DISK + * BLOB_IN_WINNT_FILE_ON_DISK + * BLOB_WIN32_ENCRYPTED */ + struct { + tchar *file_on_disk; + struct wim_inode *file_inode; + }; + + /* BLOB_IN_ATTACHED_BUFFER */ + void *attached_buffer; + +#ifdef WITH_FUSE + /* BLOB_IN_STAGING_FILE */ + struct { + char *staging_file_name; + int staging_dir_fd; + }; +#endif + +#ifdef WITH_NTFS_3G + /* BLOB_IN_NTFS_VOLUME */ + struct ntfs_location *ntfs_loc; +#endif + }; + + /* Links together blobs that share the same underlying WIM resource. + * The head is the 'blob_list' member of + * 'struct wim_resource_descriptor'. */ + struct list_head rdesc_node; + + /* Temporary fields */ + union { + /* Fields used temporarily during WIM file writing. */ + struct { + union { + /* List node used for blob size table. */ + struct hlist_node hash_list_2; + + /* Metadata for the underlying solid resource in + * the WIM being written (only valid if + * WIM_RESHDR_FLAG_SOLID set in + * out_reshdr.flags). */ + struct { + u64 out_res_offset_in_wim; + u64 out_res_size_in_wim; + u64 out_res_uncompressed_size; + }; + }; + + /* Links blobs being written to the WIM. */ + struct list_head write_blobs_list; + + union { + /* Metadata for this blob in the WIM being + * written. */ + struct wim_reshdr out_reshdr; + + struct { + /* Name under which this blob is being + * sorted; used only when sorting blobs + * for solid compression. */ + utf16lechar *solid_sort_name; + size_t solid_sort_name_nbytes; + }; + }; + }; + + /* Used temporarily during extraction. This is an array of + * references to the streams being extracted that use this blob. + * out_refcnt tracks the number of slots filled. */ + union { + struct blob_extraction_target inline_blob_extraction_targets[3]; + struct { + struct blob_extraction_target *blob_extraction_targets; + u32 alloc_blob_extraction_targets; + }; + }; + }; + + /* Temporary list fields. */ + union { + /* Links blobs for writing blob table. */ + struct list_head blob_table_list; + + /* Links blobs being extracted. */ + struct list_head extraction_list; + + /* Links blobs being exported. */ + struct list_head export_blob_list; + + /* Links original list of blobs in the read-write mounted image. */ + struct list_head orig_blob_list; + }; + + /* Links blobs that are still unhashed after being been added to a WIM. + */ + struct list_head unhashed_list; +}; + +extern struct blob_table * +new_blob_table(size_t capacity) _malloc_attribute; + +extern void +free_blob_table(struct blob_table *table); + +extern int +read_blob_table(WIMStruct *wim); + +extern int +write_blob_table_from_blob_list(struct list_head *blob_list, + struct filedes *out_fd, + u16 part_number, + struct wim_reshdr *out_reshdr, + int write_resource_flags); + +extern struct blob_descriptor * +new_blob_descriptor(void) _malloc_attribute; + +extern struct blob_descriptor * +clone_blob_descriptor(const struct blob_descriptor *blob) + _malloc_attribute; + +extern void +blob_decrement_refcnt(struct blob_descriptor *blob, + struct blob_table *table); +#ifdef WITH_FUSE +extern void +blob_decrement_num_opened_fds(struct blob_descriptor *blob); +#endif + +extern void +free_blob_descriptor(struct blob_descriptor *blob); + +extern void +blob_table_insert(struct blob_table *table, struct blob_descriptor *blob); + +extern void +blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob); + +extern struct blob_descriptor * +lookup_blob(const struct blob_table *table, const u8 *hash); + +extern int +for_blob_in_table(struct blob_table *table, + int (*visitor)(struct blob_descriptor *, void *), void *arg); + +extern int +for_blob_in_table_sorted_by_sequential_order(struct blob_table *table, + int (*visitor)(struct blob_descriptor *, void *), + void *arg); + +struct wimlib_resource_entry; + +extern void +blob_to_wimlib_resource_entry(const struct blob_descriptor *blob, + struct wimlib_resource_entry *wentry); + +extern int +sort_blob_list(struct list_head *blob_list, + size_t list_head_offset, + int (*compar)(const void *, const void*)); + +extern int +sort_blob_list_by_sequential_order(struct list_head *blob_list, + size_t list_head_offset); + +extern int +cmp_blobs_by_sequential_order(const void *p1, const void *p2); + +static inline bool +blob_is_in_solid_wim_resource(const struct blob_descriptor *blob) +{ + return blob->blob_location == BLOB_IN_WIM && + blob->size != blob->rdesc->uncompressed_size; +} + +static inline bool +blob_is_in_file(const struct blob_descriptor *blob) +{ + return blob->blob_location == BLOB_IN_FILE_ON_DISK +#ifdef __WIN32__ + || blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK + || blob->blob_location == BLOB_WIN32_ENCRYPTED +#endif + ; +} + +static inline const struct blob_extraction_target * +blob_extraction_targets(struct blob_descriptor *blob) +{ + if (blob->out_refcnt <= ARRAY_LEN(blob->inline_blob_extraction_targets)) + return blob->inline_blob_extraction_targets; + else + return blob->blob_extraction_targets; +} + +static inline void +blob_set_is_located_in_wim_resource(struct blob_descriptor *blob, + struct wim_resource_descriptor *rdesc) +{ + blob->blob_location = BLOB_IN_WIM; + blob->rdesc = rdesc; + list_add_tail(&blob->rdesc_node, &rdesc->blob_list); +} + +static inline void +blob_unset_is_located_in_wim_resource(struct blob_descriptor *blob) +{ + list_del(&blob->rdesc_node); + blob->blob_location = BLOB_NONEXISTENT; +} + +extern struct blob_descriptor * +new_blob_from_data_buffer(const void *buffer, size_t size, + struct blob_table *blob_table); + +extern int +hash_unhashed_blob(struct blob_descriptor *blob, + struct blob_table *blob_table, + struct blob_descriptor **blob_ret); + +extern struct blob_descriptor ** +retrieve_pointer_to_unhashed_blob(struct blob_descriptor *blob); + +static inline void +prepare_unhashed_blob(struct blob_descriptor *blob, + struct wim_inode *back_inode, u32 stream_id, + struct list_head *unhashed_blobs) +{ + if (!blob) + return; + blob->unhashed = 1; + blob->back_inode = back_inode; + blob->back_stream_id = stream_id; + list_add_tail(&blob->unhashed_list, unhashed_blobs); +} + +#endif /* _WIMLIB_BLOB_TABLE_H */ diff --git a/include/wimlib/capture.h b/include/wimlib/capture.h index ae2640a7..7b352dc5 100644 --- a/include/wimlib/capture.h +++ b/include/wimlib/capture.h @@ -9,7 +9,7 @@ #include "wimlib/textfile.h" #include "wimlib/util.h" -struct wim_lookup_table; +struct blob_table; struct wim_dentry; struct wim_inode; @@ -22,12 +22,12 @@ struct capture_config { /* Common parameters to implementations of building an in-memory dentry tree * from an on-disk directory structure. */ struct capture_params { - /* Pointer to the lookup table of the WIM. */ - struct wim_lookup_table *lookup_table; + /* Pointer to the blob table of the WIM. */ + struct blob_table *blob_table; - /* List of streams that have been added so far, but without their SHA1 + /* List of blobs that have been added so far, but without their SHA-1 * message digests being calculated (as a shortcut). */ - struct list_head *unhashed_streams; + struct list_head *unhashed_blobs; /* Hash table of inodes that have been captured for this tree so far. */ struct wim_inode_table *inode_table; diff --git a/include/wimlib/dentry.h b/include/wimlib/dentry.h index 05cb8c3f..241dc56d 100644 --- a/include/wimlib/dentry.h +++ b/include/wimlib/dentry.h @@ -9,11 +9,11 @@ #include "wimlib/types.h" struct wim_inode; -struct wim_lookup_table; +struct blob_table; /* Base size of a WIM dentry in the on-disk format, up to and including the file * name length. This does not include the variable-length file name, short - * name, alternate data stream entries, and padding to 8-byte boundaries. */ + * name, extra stream entries, and padding to 8-byte boundaries. */ #define WIM_DENTRY_DISK_SIZE 102 /* @@ -144,7 +144,7 @@ will_extract_dentry(const struct wim_dentry *dentry) return dentry->d_extraction_list_node.next != NULL; } -extern u64 +extern size_t dentry_out_total_length(const struct wim_dentry *dentry); extern int @@ -250,7 +250,7 @@ free_dentry(struct wim_dentry *dentry); extern void free_dentry_tree(struct wim_dentry *root, - struct wim_lookup_table *lookup_table); + struct blob_table *blob_table); extern void unlink_dentry(struct wim_dentry *dentry); diff --git a/include/wimlib/encoding.h b/include/wimlib/encoding.h index f9e5f264..c3b7fb8c 100644 --- a/include/wimlib/encoding.h +++ b/include/wimlib/encoding.h @@ -37,6 +37,15 @@ varname1##_to_##varname2##_buf(const chartype1 *in, size_t in_nbytes, \ extern utf16lechar * utf16le_dupz(const void *ustr, size_t usize); +extern utf16lechar * +utf16le_dup(const utf16lechar *ustr); + +extern size_t +utf16le_len_bytes(const utf16lechar *s); + +extern size_t +utf16le_len_chars(const utf16lechar *s); + #if !TCHAR_IS_UTF16LE DECLARE_CHAR_CONVERSION_FUNCTIONS(utf16le, tstr, utf16lechar, tchar); DECLARE_CHAR_CONVERSION_FUNCTIONS(tstr, utf16le, tchar, utf16lechar); @@ -72,6 +81,10 @@ cmp_utf16le_strings(const utf16lechar *s1, size_t n1, const utf16lechar *s2, size_t n2, bool ignore_case); +extern int +cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2, + bool ignore_case); + /* Convert a string in the platform-dependent encoding to UTF-16LE, but if both * encodings are UTF-16LE, simply re-use the string. Release with * tstr_put_utf16le() when done. */ diff --git a/include/wimlib/header.h b/include/wimlib/header.h index 6c522fa2..feb31764 100644 --- a/include/wimlib/header.h +++ b/include/wimlib/header.h @@ -16,7 +16,7 @@ /* Default WIM version number. Streams are always compressed independently. */ #define WIM_VERSION_DEFAULT 0x10d00 -/* Version number used for WIMs that allow multiple streams combined into one +/* Version number used for WIMs that allow multiple blobs combined into one * resource ("solid resources", marked by WIM_RESHDR_FLAG_SOLID) and also a new * compression format (LZMS). This version is new as of Windows 8 WIMGAPI. * Although it is used by Windows 8 web downloader, it is not yet documented by @@ -87,8 +87,8 @@ struct wim_header_disk { * least 1. wimlib allows 0. */ u32 image_count; - /* +0x30: Location and size of the WIM's lookup table. */ - struct wim_reshdr_disk lookup_table_reshdr; + /* +0x30: Location and size of the WIM's blob table. */ + struct wim_reshdr_disk blob_table_reshdr; /* +0x48: Location and size of the WIM's XML data. */ struct wim_reshdr_disk xml_data_reshdr; @@ -128,7 +128,7 @@ struct wim_header { u16 part_number; u16 total_parts; u32 image_count; - struct wim_reshdr lookup_table_reshdr; + struct wim_reshdr blob_table_reshdr; struct wim_reshdr xml_data_reshdr; struct wim_reshdr boot_metadata_reshdr; u32 boot_idx; @@ -153,14 +153,14 @@ struct wim_header { /* The WIM is part of a split WIM. */ #define WIM_HDR_FLAG_SPANNED 0x00000008 -/* All streams included in the WIM's lookup table are non-metadata (do not have +/* All blobs included in the WIM's blob table are non-metadata (do not have * WIM_RESHDR_FLAG_METADATA set). wimlib ignores this flag and clears it on new * WIM files it writes. */ #define WIM_HDR_FLAG_RESOURCE_ONLY 0x00000010 -/* All streams included in the WIM's lookup table are metadata (have - * WIM_RESHDR_FLAG_METADATA set). wimlib ignores this flag and clears it on - * new WIM files it writes. */ +/* All blobs included in the WIM's blob table are metadata (have + * WIM_RESHDR_FLAG_METADATA set). wimlib ignores this flag and clears it on new + * WIM files it writes. */ #define WIM_HDR_FLAG_METADATA_ONLY 0x00000020 /* The WIM is currently being written or appended to. */ diff --git a/include/wimlib/inode.h b/include/wimlib/inode.h index 11c2361b..28285b2b 100644 --- a/include/wimlib/inode.h +++ b/include/wimlib/inode.h @@ -1,47 +1,108 @@ #ifndef _WIMLIB_INODE_H #define _WIMLIB_INODE_H +#include "wimlib/assert.h" #include "wimlib/list.h" #include "wimlib/sha1.h" #include "wimlib/types.h" struct avl_tree_node; -struct wim_ads_entry; +struct blob_descriptor; +struct blob_table; struct wim_dentry; -struct wim_lookup_table; -struct wim_lookup_table_entry; struct wim_security_data; struct wimfs_fd; +/* Valid values for the 'stream_type' field of a 'struct wim_inode_stream' */ +enum wim_inode_stream_type { + + /* Data stream, may be unnamed (usual case) or named */ + STREAM_TYPE_DATA, + + /* Reparse point stream. This is the same as the data of the on-disk + * reparse point attribute, except that the first 8 bytes of the on-disk + * attribute are omitted. The omitted bytes contain the reparse tag + * (which is instead stored in the on-disk WIM dentry), the reparse data + * size (which is redundant with the stream size), and a reserved field + * that is always zero. */ + STREAM_TYPE_REPARSE_POINT, + + /* Encrypted data in the "EFSRPC raw data format" specified by [MS-EFSR] + * section 2.2.3. This contains metadata for the Encrypting File System + * as well as the encrypted data of all the file's data streams. */ + STREAM_TYPE_EFSRPC_RAW_DATA, + + /* Stream type could not be determined */ + STREAM_TYPE_UNKNOWN, +}; + +extern const utf16lechar NO_STREAM_NAME[1]; + /* - * WIM inode. + * 'struct wim_inode_stream' describes a "stream", which associates a blob of + * data with an inode. Each stream has a type and optionally a name. + * + * The most frequently seen kind of stream is the "unnamed data stream" + * (stream_type == STREAM_TYPE_DATA && stream_name == NO_STREAM_NAME), which is + * the "default file contents". Many inodes just have an unnamed data stream + * and no other streams. However, files on NTFS filesystems may have + * additional, "named" data streams, and this is supported by the WIM format. * - * As mentioned in the comment above `struct wim_dentry', in WIM files there - * is no on-disk analogue of a real inode, as most of these fields are - * duplicated in the dentries. Instead, a `struct wim_inode' is something we - * create ourselves to simplify the handling of hard links. + * A "reparse point" is an inode with reparse data set. The reparse data is + * stored in a stream of type STREAM_TYPE_REPARSE_POINT. There should be only + * one such stream, and it should be unnamed. However, it is possible for an + * inode to have both a reparse point stream and an unnamed data stream, and + * even named data streams as well. */ -struct wim_inode { - /* If i_resolved == 0: - * SHA1 message digest of the contents of the unnamed-data stream - * of this inode. - * - * If i_resolved == 1: - * Pointer to the lookup table entry for the unnamed data stream - * of this inode, or NULL. +struct wim_inode_stream { + + /* The name of the stream or NO_STREAM_NAME. */ + utf16lechar *stream_name; + + /* + * If 'stream_resolved' = 0, then 'stream_hash' is the SHA-1 message + * digest of the uncompressed data of this stream, or all zeroes if this + * stream is empty. * - * i_hash corresponds to the 'unnamed_stream_hash' field of the `struct - * wim_dentry_on_disk' and the additional caveats documented about that - * field apply here (for example, the quirks regarding all-zero hashes). + * If 'stream_resolved' = 1, then 'stream_blob' is a pointer directly to + * a descriptor for this stream's blob, or NULL if this stream is empty. */ union { - u8 i_hash[SHA1_HASH_SIZE]; - struct wim_lookup_table_entry *i_lte; + u8 _stream_hash[SHA1_HASH_SIZE]; + struct blob_descriptor *_stream_blob; }; - /* Corresponds to the 'attributes' field of `struct wim_dentry_on_disk'; - * bitwise OR of the FILE_ATTRIBUTE_* flags that give the attributes of - * this inode. */ + /* 'stream_resolved' determines whether 'stream_hash' or 'stream_blob' + * is valid as described above. */ + u32 stream_resolved : 1; + + /* A unique identifier for this stream within the context of its inode. + * This stays constant even if the streams array is reallocated. */ + u32 stream_id : 28; + + /* The type of this stream as one of the STREAM_TYPE_* values */ + u32 stream_type : 3; +}; + +/* + * WIM inode - a "file" in an image which may be accessible via multiple paths + * + * Note: in WIM files there is no true on-disk analogue of an inode; there are + * only directory entries, and some fields are duplicated among all links to a + * file. However, wimlib uses inode structures internally to simplify handling + * of hard links. + */ +struct wim_inode { + + /* + * The collection of streams for this inode. 'i_streams' points to + * either 'i_embedded_streams' or an allocated array. + */ + struct wim_inode_stream *i_streams; + struct wim_inode_stream i_embedded_streams[1]; + unsigned i_num_streams; + + /* Windows file attribute flags (FILE_ATTRIBUTE_*). */ u32 i_attributes; /* Root of a balanced binary search tree storing the child directory @@ -77,36 +138,15 @@ struct wim_inode { /* Number of dentries that are aliases for this inode. */ u32 i_nlink; - /* Number of alternate data streams (ADS) associated with this inode */ - u16 i_num_ads; - - /* Flag that indicates whether this inode's streams have been - * "resolved". By default, the inode starts as "unresolved", meaning - * that the i_hash field, along with the hash field of any associated - * wim_ads_entry's, are valid and should be used as keys in the WIM - * lookup table to find the associated `struct wim_lookup_table_entry'. - * But if the inode has been resolved, then each of these fields is - * replaced with a pointer directly to the appropriate `struct - * wim_lookup_table_entry', or NULL if the stream is empty. */ - u8 i_resolved : 1; - /* Flag used to mark this inode as visited; this is used when visiting * all the inodes in a dentry tree exactly once. It will be 0 by * default and must be cleared following the tree traversal, even in * error paths. */ u8 i_visited : 1; - /* 1 iff all ADS entries of this inode are named or if this inode - * has no ADS entries */ - u8 i_canonical_streams : 1; - /* Cached value */ u8 i_can_externally_back : 1; - /* Pointer to a malloc()ed array of i_num_ads alternate data stream - * entries for this inode. */ - struct wim_ads_entry *i_ads_entries; - /* If not NULL, a pointer to the extra data that was read from the * dentry. This should be a series of tagged items, each of which * represents a bit of extra metadata, such as the file's object ID. @@ -180,8 +220,8 @@ struct wim_inode { /* Used during WIM writing with * WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES: the number - * of data streams this inode has that have not yet been fully - * read. */ + * of streams this inode has that have not yet been fully read. + * */ u32 num_remaining_streams; #ifdef WITH_FUSE @@ -204,68 +244,10 @@ struct wim_inode { u16 i_num_allocated_fds; #endif - /* Next alternate data stream ID to be assigned */ + /* Next stream ID to be assigned */ u32 i_next_stream_id; }; -/* Alternate data stream entry. - * - * We read this from disk in the read_ads_entries() function; see that function - * for more explanation. */ -struct wim_ads_entry { - union { - /* SHA-1 message digest of stream contents */ - u8 hash[SHA1_HASH_SIZE]; - - /* The corresponding lookup table entry (only for resolved - * streams) */ - struct wim_lookup_table_entry *lte; - }; - - /* Length of UTF16-encoded stream name, in bytes, not including the - * terminating null character; or 0 if the stream is unnamed. */ - u16 stream_name_nbytes; - - /* Number to identify an alternate data stream even after it's possibly - * been moved or renamed. */ - u32 stream_id; - - /* Stream name (UTF-16LE), null-terminated, or NULL if the stream is - * unnamed. */ - utf16lechar *stream_name; - - /* Reserved field. We read it into memory so we can write it out - * unchanged. */ - u64 reserved; -}; - -/* WIM alternate data stream entry (on-disk format) */ -struct wim_ads_entry_on_disk { - /* Length of the entry, in bytes. This includes all fixed-length - * fields, plus the stream name and null terminator if present, and the - * padding up to an 8 byte boundary. wimlib is a little less strict - * when reading the entries, and only requires that the number of bytes - * from this field is at least as large as the size of the fixed length - * fields and stream name without null terminator. */ - le64 length; - - le64 reserved; - - /* SHA1 message digest of the uncompressed stream; or, alternatively, - * can be all zeroes if the stream has zero length. */ - u8 hash[SHA1_HASH_SIZE]; - - /* Length of the stream name, in bytes. 0 if the stream is unnamed. */ - le16 stream_name_nbytes; - - /* Stream name in UTF-16LE. It is @stream_name_nbytes bytes long, - * excluding the null terminator. There is a null terminator character - * if @stream_name_nbytes != 0; i.e., if this stream is named. */ - utf16lechar stream_name[]; -} _packed_attribute; - -#define WIM_ADS_ENTRY_DISK_SIZE 38 - /* * Reparse tags documented at * http://msdn.microsoft.com/en-us/library/dd541667(v=prot.10).aspx @@ -340,17 +322,6 @@ inode_is_directory(const struct wim_inode *inode) == FILE_ATTRIBUTE_DIRECTORY; } -/* Is the inode a directory with the encrypted attribute set? - * This returns true for encrypted directories even if they have reparse data - * (I'm not sure if such files can even exist!). */ -static inline bool -inode_is_encrypted_directory(const struct wim_inode *inode) -{ - return ((inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY | - FILE_ATTRIBUTE_ENCRYPTED)) - == (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_ENCRYPTED)); -} - /* Is the inode a symbolic link? * This returns true iff the inode is a reparse point that is either a "real" * symbolic link or a junction point. */ @@ -362,114 +333,102 @@ inode_is_symlink(const struct wim_inode *inode) inode->i_reparse_tag == WIM_IO_REPARSE_TAG_MOUNT_POINT); } -/* Does the inode have children? - * Currently (based on read_dentry_tree()), this can only return true for inodes - * for which inode_is_directory() returns true. (This also returns false on - * empty directories.) */ +/* Does the inode have children? Currently (based on read_dentry_tree() as well + * as the various build-dentry-tree implementations), this can only return true + * for inodes for which inode_is_directory() returns true. */ static inline bool inode_has_children(const struct wim_inode *inode) { return inode->i_children != NULL; } -extern struct wim_ads_entry * -inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name); +extern struct wim_inode_stream * +inode_get_stream(const struct wim_inode *inode, int stream_type, + const utf16lechar *stream_name); -extern struct wim_ads_entry * -inode_add_ads_utf16le(struct wim_inode *inode, const utf16lechar *stream_name, - size_t stream_name_nbytes); +extern struct wim_inode_stream * +inode_get_unnamed_stream(const struct wim_inode *inode, int stream_type); -extern struct wim_ads_entry * -inode_add_ads(struct wim_inode *dentry, const tchar *stream_name); +extern struct wim_inode_stream * +inode_add_stream(struct wim_inode *inode, int stream_type, + const utf16lechar *stream_name, struct blob_descriptor *blob); -extern struct wim_ads_entry * -inode_add_ads_with_data(struct wim_inode *inode, const tchar *name, - const void *value, size_t size, - struct wim_lookup_table *lookup_table); +extern struct wim_inode_stream * +inode_add_stream_with_data(struct wim_inode *inode, int stream_type, + const utf16lechar *stream_name, + const void *data, size_t size, + struct blob_table *blob_table); extern void -inode_remove_ads(struct wim_inode *inode, struct wim_ads_entry *entry, - struct wim_lookup_table *lookup_table); +inode_remove_stream(struct wim_inode *inode, struct wim_inode_stream *strm, + struct blob_table *blob_table); -extern bool -inode_has_named_stream(const struct wim_inode *inode); - -extern int -inode_set_unnamed_stream(struct wim_inode *inode, const void *data, size_t len, - struct wim_lookup_table *lookup_table); - -extern int -inode_resolve_streams(struct wim_inode *inode, struct wim_lookup_table *table, - bool force); - -extern void -inode_unresolve_streams(struct wim_inode *inode); - -extern int -stream_not_found_error(const struct wim_inode *inode, const u8 *hash); - -static inline struct wim_lookup_table_entry * -inode_stream_lte_resolved(const struct wim_inode *inode, unsigned stream_idx) +static inline struct blob_descriptor * +stream_blob_resolved(const struct wim_inode_stream *strm) { - if (stream_idx == 0) - return inode->i_lte; - return inode->i_ads_entries[stream_idx - 1].lte; + wimlib_assert(strm->stream_resolved); + return strm->_stream_blob; } -extern struct wim_lookup_table_entry * -inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx, - const struct wim_lookup_table *table); - -extern struct wim_lookup_table_entry * -inode_unnamed_stream_resolved(const struct wim_inode *inode, - unsigned *stream_idx_ret); - -static inline struct wim_lookup_table_entry * -inode_unnamed_lte_resolved(const struct wim_inode *inode) +static inline void +stream_set_blob(struct wim_inode_stream *strm, struct blob_descriptor *blob) { - unsigned stream_idx; - return inode_unnamed_stream_resolved(inode, &stream_idx); + strm->_stream_blob = blob; + strm->stream_resolved = 1; } -extern struct wim_lookup_table_entry * -inode_unnamed_lte(const struct wim_inode *inode, - const struct wim_lookup_table *table); - -extern const u8 * -inode_stream_hash(const struct wim_inode *inode, unsigned stream_idx); - -extern const u8 * -inode_unnamed_stream_hash(const struct wim_inode *inode); +static inline bool +stream_is_named(const struct wim_inode_stream *strm) +{ + return strm->stream_name != NO_STREAM_NAME; +} -static inline unsigned -inode_stream_name_nbytes(const struct wim_inode *inode, unsigned stream_idx) +static inline bool +stream_is_unnamed_data_stream(const struct wim_inode_stream *strm) { - if (stream_idx == 0) - return 0; - return inode->i_ads_entries[stream_idx - 1].stream_name_nbytes; + return strm->stream_type == STREAM_TYPE_DATA && !stream_is_named(strm); } -static inline u32 -inode_stream_idx_to_id(const struct wim_inode *inode, unsigned stream_idx) +static inline bool +stream_is_named_data_stream(const struct wim_inode_stream *strm) { - if (stream_idx == 0) - return 0; - return inode->i_ads_entries[stream_idx - 1].stream_id; + return strm->stream_type == STREAM_TYPE_DATA && stream_is_named(strm); } -extern void -inode_ref_streams(struct wim_inode *inode); +extern bool +inode_has_named_data_stream(const struct wim_inode *inode); + +extern int +inode_resolve_streams(struct wim_inode *inode, + struct blob_table *table, bool force); extern void -inode_unref_streams(struct wim_inode *inode, - struct wim_lookup_table *lookup_table); +inode_unresolve_streams(struct wim_inode *inode); extern int -read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, - size_t *nbytes_remaining_p); +blob_not_found_error(const struct wim_inode *inode, const u8 *hash); + +extern struct blob_descriptor * +stream_blob(const struct wim_inode_stream *strm, const struct blob_table *table); + +extern struct blob_descriptor * +inode_get_blob_for_unnamed_data_stream(const struct wim_inode *inode, + const struct blob_table *blob_table); + +extern struct blob_descriptor * +inode_get_blob_for_unnamed_data_stream_resolved(const struct wim_inode *inode); + +extern const u8 * +stream_hash(const struct wim_inode_stream *strm); + +extern const u8 * +inode_get_hash_of_unnamed_data_stream(const struct wim_inode *inode); + +extern void +inode_ref_blobs(struct wim_inode *inode); extern void -check_inode(struct wim_inode *inode, const struct wim_security_data *sd); +inode_unref_blobs(struct wim_inode *inode, struct blob_table *blob_table); /* inode_fixup.c */ extern int diff --git a/include/wimlib/integrity.h b/include/wimlib/integrity.h index 1fa723a9..48f24e31 100644 --- a/include/wimlib/integrity.h +++ b/include/wimlib/integrity.h @@ -19,8 +19,8 @@ read_integrity_table(WIMStruct *wim, u64 num_checked_bytes, extern int write_integrity_table(WIMStruct *wim, - off_t new_lookup_table_end, - off_t old_lookup_table_end, + off_t new_blob_table_end, + off_t old_blob_table_end, struct integrity_table *old_table); extern int diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h deleted file mode 100644 index 4158f157..00000000 --- a/include/wimlib/lookup_table.h +++ /dev/null @@ -1,419 +0,0 @@ -#ifndef _WIMLIB_LOOKUP_TABLE_H -#define _WIMLIB_LOOKUP_TABLE_H - -#include "wimlib/list.h" -#include "wimlib/resource.h" -#include "wimlib/sha1.h" -#include "wimlib/types.h" - -/* An enumerated type that identifies where the stream corresponding to this - * lookup table entry is actually located. - * - * If we open a WIM and read its lookup table, the location is set to - * RESOURCE_IN_WIM since all the streams will initially be located in the WIM. - * However, to handle situations such as image capture and image mount, we allow - * the actual location of the stream to be somewhere else, such as an external - * file. */ -enum resource_location { - /* The lookup table entry does not yet correspond to a stream; this is a - * temporary state only. */ - RESOURCE_NONEXISTENT = 0, - - /* The stream is located in a resource in a WIM file identified by the - * `struct wim_resource_spec' pointed to by @rspec. @offset_in_res - * identifies the offset at which this particular stream begins in the - * uncompressed data of the resource; this is normally 0, but a WIM - * resource can be "solid" and contain multiple streams. */ - RESOURCE_IN_WIM, - - /* The stream is located in the external file named by @file_on_disk. - */ - RESOURCE_IN_FILE_ON_DISK, - - /* The stream is directly attached in the in-memory buffer pointed to by - * @attached_buffer. */ - RESOURCE_IN_ATTACHED_BUFFER, - -#ifdef WITH_FUSE - /* The stream is located in the external file named by - * @staging_file_name, located in the staging directory for a read-write - * mount. */ - RESOURCE_IN_STAGING_FILE, -#endif - -#ifdef WITH_NTFS_3G - /* The stream is located in an NTFS volume. It is identified by volume, - * filename, data stream name, and by whether it is a reparse point or - * not. @ntfs_loc points to a structure containing this information. - * */ - RESOURCE_IN_NTFS_VOLUME, -#endif - -#ifdef __WIN32__ - /* Windows only: the stream is located in the external file named by - * @file_on_disk, which is in the Windows NT namespace and may specify a - * named data stream. */ - RESOURCE_IN_WINNT_FILE_ON_DISK, - - /* Windows only: the stream is located in the external file named by - * @file_on_disk, but the file is encrypted and must be read using the - * appropriate Windows API. */ - RESOURCE_WIN32_ENCRYPTED, -#endif -}; - -struct stream_owner { - struct wim_inode *inode; - const utf16lechar *stream_name; -}; - -/* Specification for a stream, which may be the contents of a file (unnamed data - * stream), a named data stream, reparse point data, or a WIM metadata resource. - * - * One instance of this structure is created for each entry in the WIM's lookup - * table, hence the name of the struct. Each of these entries contains the SHA1 - * message digest of a stream and the location of the stream data in the WIM - * file (size, location, flags). The in-memory lookup table is a map from SHA1 - * message digests to stream locations. */ -struct wim_lookup_table_entry { - - /* List node for a hash bucket of the lookup table. */ - struct hlist_node hash_list; - - /* Uncompressed size of this stream. */ - u64 size; - - /* Stream flags (WIM_RESHDR_FLAG_*). */ - u32 flags : 8; - - /* One of the `enum resource_location' values documented above. */ - u32 resource_location : 4; - - /* 1 if this stream has not had a SHA1 message digest calculated for it - * yet. */ - u32 unhashed : 1; - - /* Temoorary fields used when writing streams; set as documented for - * prepare_stream_list_for_write(). */ - u32 unique_size : 1; - u32 will_be_in_output_wim : 1; - - /* Set to 1 when a metadata entry has its checksum changed; in such - * cases the hash cannot be used to verify the data if the metadata - * resource is read again. (This could be avoided if we used separate - * fields for input/output checksum, but most stream entries wouldn't - * need this.) */ - u32 dont_check_metadata_hash : 1; - - u32 may_send_done_with_file : 1; - - /* Only used by wimlib_export_image() */ - u32 was_exported : 1; - - union { - /* (On-disk field) SHA1 message digest of the stream referenced - * by this lookup table entry. */ - u8 hash[SHA1_HASH_SIZE]; - - /* First 4 or 8 bytes of the SHA1 message digest, used for - * inserting the entry into the hash table. Since the SHA1 - * message digest can be considered random, we don't really need - * the full 20 byte hash just to insert the entry in a hash - * table. */ - size_t hash_short; - - /* Unhashed entries only (unhashed == 1): these variables make - * it possible to find the pointer to this 'struct - * wim_lookup_table_entry' contained in either 'struct - * wim_ads_entry' or 'struct wim_inode'. There can be at most 1 - * such pointer, as we can only join duplicate streams after - * they have been hashed. */ - struct { - struct wim_inode *back_inode; - u32 back_stream_id; - }; - }; - - /* Number of times this lookup table entry is referenced by dentries in - * the WIM. When a WIM's lookup table is read, this field is - * initialized from a corresponding entry. - * - * However, see lte_decrement_refcnt() for information about the - * limitations of this field. */ - u32 refcnt; - - /* When a WIM file is written, this is set to the number of references - * (by dentries) to this stream in the output WIM file. - * - * During extraction, this is the number of slots in stream_owners (or - * inline_stream_owners) that have been filled. - * - * During image export, this is set to the number of references of this - * stream that originated from the source WIM. - * - * When mounting a WIM image read-write, this is set to the number of - * extra references to this stream preemptively taken to allow later - * saving the modified image as a new image and leaving the original - * image alone. */ - u32 out_refcnt; - -#ifdef WITH_FUSE - /* Number of open file descriptors to this stream during a FUSE mount of - * the containing image. */ - u16 num_opened_fds; -#endif - - /* Specification of where this stream is actually located. Which member - * is valid is determined by the @resource_location field. */ - union { - struct { - struct wim_resource_spec *rspec; - u64 offset_in_res; - }; - struct { - tchar *file_on_disk; - struct wim_inode *file_inode; - }; - void *attached_buffer; - #ifdef WITH_FUSE - struct { - char *staging_file_name; - int staging_dir_fd; - }; - #endif - #ifdef WITH_NTFS_3G - struct ntfs_location *ntfs_loc; - #endif - }; - - /* Links together streams that share the same underlying WIM resource. - * The head is the `stream_list' member of `struct wim_resource_spec'. - */ - struct list_head rspec_node; - - /* Temporary fields */ - union { - /* Fields used temporarily during WIM file writing. */ - struct { - union { - /* List node used for stream size table. */ - struct hlist_node hash_list_2; - - /* Metadata for the underlying solid resource in - * the WIM being written (only valid if - * WIM_RESHDR_FLAG_SOLID set in - * out_reshdr.flags). */ - struct { - u64 out_res_offset_in_wim; - u64 out_res_size_in_wim; - u64 out_res_uncompressed_size; - }; - }; - - /* Links streams being written to the WIM. */ - struct list_head write_streams_list; - - union { - /* Metadata for this stream in the WIM being - * written. */ - struct wim_reshdr out_reshdr; - - struct { - /* Name under which this stream is being - * sorted; used only when sorting - * streams for solid compression. */ - utf16lechar *solid_sort_name; - size_t solid_sort_name_nbytes; - }; - }; - }; - - /* Used temporarily during extraction. This is an array of - * pointers to the inodes being extracted that use this stream. - */ - union { - /* Inodes to extract that reference this stream. - * out_refcnt tracks the number of slots filled. */ - struct stream_owner inline_stream_owners[3]; - struct { - struct stream_owner *stream_owners; - u32 alloc_stream_owners; - }; - }; - }; - - /* Temporary list fields. */ - union { - /* Links streams for writing lookup table. */ - struct list_head lookup_table_list; - - /* Links streams being extracted. */ - struct list_head extraction_list; - - /* Links streams being exported. */ - struct list_head export_stream_list; - - /* Links original list of streams in the read-write mounted image. */ - struct list_head orig_stream_list; - }; - - /* Links streams that are still unhashed after being been added to a - * WIM. */ - struct list_head unhashed_list; -}; - -/* Functions to allocate and free lookup tables */ - -extern struct wim_lookup_table * -new_lookup_table(size_t capacity) _malloc_attribute; - -extern void -free_lookup_table(struct wim_lookup_table *table); - -/* Functions to read or write the lookup table from/to a WIM file */ - -extern int -read_wim_lookup_table(WIMStruct *wim); - -extern int -write_wim_lookup_table_from_stream_list(struct list_head *stream_list, - struct filedes *out_fd, - u16 part_number, - struct wim_reshdr *out_reshdr, - int write_resource_flags); - -/* Functions to create, clone, print, and free lookup table entries */ - -extern struct wim_lookup_table_entry * -new_lookup_table_entry(void) _malloc_attribute; - -extern struct wim_lookup_table_entry * -clone_lookup_table_entry(const struct wim_lookup_table_entry *lte) - _malloc_attribute; - -extern void -lte_decrement_refcnt(struct wim_lookup_table_entry *lte, - struct wim_lookup_table *table); -#ifdef WITH_FUSE -extern void -lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte); -#endif - -extern void -free_lookup_table_entry(struct wim_lookup_table_entry *lte); - -/* Functions to insert and delete entries from a lookup table */ - -extern void -lookup_table_insert(struct wim_lookup_table *table, - struct wim_lookup_table_entry *lte); - -extern void -lookup_table_unlink(struct wim_lookup_table *table, - struct wim_lookup_table_entry *lte); - -/* Function to lookup a stream by SHA1 message digest */ -extern struct wim_lookup_table_entry * -lookup_stream(const struct wim_lookup_table *table, const u8 hash[]); - -/* Functions to iterate through the entries of a lookup table */ - -extern int -for_lookup_table_entry(struct wim_lookup_table *table, - int (*visitor)(struct wim_lookup_table_entry *, void *), - void *arg); - -extern int -for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, - int (*visitor)(struct wim_lookup_table_entry *, - void *), - void *arg); - - - -/* Function to get a resource entry in stable format */ - -struct wimlib_resource_entry; - -extern void -lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, - struct wimlib_resource_entry *wentry); - -/* Functions to sort a list of lookup table entries */ -extern int -sort_stream_list(struct list_head *stream_list, - size_t list_head_offset, - int (*compar)(const void *, const void*)); - -extern int -sort_stream_list_by_sequential_order(struct list_head *stream_list, - size_t list_head_offset); - -extern int -cmp_streams_by_sequential_order(const void *p1, const void *p2); - -/* Utility functions */ - -extern int -lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *ignore); - -static inline bool -lte_is_partial(const struct wim_lookup_table_entry * lte) -{ - return lte->resource_location == RESOURCE_IN_WIM && - lte->size != lte->rspec->uncompressed_size; -} - -static inline const struct stream_owner * -stream_owners(struct wim_lookup_table_entry *stream) -{ - if (stream->out_refcnt <= ARRAY_LEN(stream->inline_stream_owners)) - return stream->inline_stream_owners; - else - return stream->stream_owners; -} - -static inline void -lte_bind_wim_resource_spec(struct wim_lookup_table_entry *lte, - struct wim_resource_spec *rspec) -{ - lte->resource_location = RESOURCE_IN_WIM; - lte->rspec = rspec; - list_add_tail(<e->rspec_node, &rspec->stream_list); -} - -static inline void -lte_unbind_wim_resource_spec(struct wim_lookup_table_entry *lte) -{ - list_del(<e->rspec_node); - lte->resource_location = RESOURCE_NONEXISTENT; -} - -extern void -lte_put_resource(struct wim_lookup_table_entry *lte); - -extern struct wim_lookup_table_entry * -new_stream_from_data_buffer(const void *buffer, size_t size, - struct wim_lookup_table *lookup_table); - -static inline void -add_unhashed_stream(struct wim_lookup_table_entry *lte, - struct wim_inode *back_inode, - u32 back_stream_id, - struct list_head *unhashed_streams) -{ - lte->unhashed = 1; - lte->back_inode = back_inode; - lte->back_stream_id = back_stream_id; - list_add_tail(<e->unhashed_list, unhashed_streams); -} - -extern int -hash_unhashed_stream(struct wim_lookup_table_entry *lte, - struct wim_lookup_table *lookup_table, - struct wim_lookup_table_entry **lte_ret); - -extern struct wim_lookup_table_entry ** -retrieve_lte_pointer(struct wim_lookup_table_entry *lte); - -#endif /* _WIMLIB_LOOKUP_TABLE_H */ diff --git a/include/wimlib/metadata.h b/include/wimlib/metadata.h index 8a527ac3..af35b539 100644 --- a/include/wimlib/metadata.h +++ b/include/wimlib/metadata.h @@ -22,23 +22,22 @@ struct wim_image_metadata { /* Pointer to the security data of the image. */ struct wim_security_data *security_data; - /* Pointer to the lookup table entry for this image's metadata resource - */ - struct wim_lookup_table_entry *metadata_lte; + /* Pointer to the blob descriptor for this image's metadata resource */ + struct blob_descriptor *metadata_blob; /* Linked list of 'struct wim_inode's for this image. */ struct list_head inode_list; - /* Linked list of 'struct wim_lookup_table_entry's for this image that - * are referred to in the dentry tree, but have not had a SHA1 message - * digest calculated yet and therefore have not been inserted into the - * WIM's lookup table. This list is added to during wimlib_add_image() - * and wimlib_mount_image() (read-write only). */ - struct list_head unhashed_streams; + /* Linked list of 'struct blob_descriptor's for blobs that are + * referenced by this image's dentry tree, but have not had their SHA-1 + * message digests calculated yet and therefore have not been inserted + * into the WIMStruct's blob table. This list is appended to when files + * are scanned for inclusion in this WIM image. */ + struct list_head unhashed_blobs; - /* 1 iff the dentry tree has been modified. If this is the case, the - * memory for the dentry tree should not be freed when switching to a - * different WIM image. */ + /* 1 iff the dentry tree has been modified from the original stored in + * the WIM file. If this is the case, the memory for the dentry tree + * should not be freed when switching to a different WIM image. */ u8 modified : 1; #ifdef WITH_NTFS_3G @@ -74,18 +73,17 @@ wim_get_current_security_data(WIMStruct *wim) #define image_for_each_inode(inode, imd) \ list_for_each_entry(inode, &(imd)->inode_list, i_list) -/* Iterate over each stream in a WIM image that has not yet been hashed */ -#define image_for_each_unhashed_stream(lte, imd) \ - list_for_each_entry(lte, &(imd)->unhashed_streams, unhashed_list) +/* Iterate over each blob in a WIM image that has not yet been hashed */ +#define image_for_each_unhashed_blob(blob, imd) \ + list_for_each_entry(blob, &(imd)->unhashed_blobs, unhashed_list) -/* Iterate over each stream in a WIM image that has not yet been hashed (safe - * against stream removal) */ -#define image_for_each_unhashed_stream_safe(lte, tmp, imd) \ - list_for_each_entry_safe(lte, tmp, &(imd)->unhashed_streams, unhashed_list) +/* Iterate over each blob in a WIM image that has not yet been hashed (safe + * against blob removal) */ +#define image_for_each_unhashed_blob_safe(blob, tmp, imd) \ + list_for_each_entry_safe(blob, tmp, &(imd)->unhashed_blobs, unhashed_list) extern void -put_image_metadata(struct wim_image_metadata *imd, - struct wim_lookup_table *table); +put_image_metadata(struct wim_image_metadata *imd, struct blob_table *table); extern int append_image_metadata(WIMStruct *wim, struct wim_image_metadata *imd); diff --git a/include/wimlib/ntfs_3g.h b/include/wimlib/ntfs_3g.h index 499c1aa5..6648e5a1 100644 --- a/include/wimlib/ntfs_3g.h +++ b/include/wimlib/ntfs_3g.h @@ -4,17 +4,17 @@ #include "wimlib/callback.h" #include "wimlib/types.h" -struct wim_lookup_table_entry; +struct blob_descriptor; struct _ntfs_volume; #ifdef WITH_NTFS_3G struct _ntfs_volume; struct ntfs_location { - tchar *path; - utf16lechar *stream_name; - u16 stream_name_nchars; struct _ntfs_volume *ntfs_vol; - bool is_reparse_point; + char *path; + utf16lechar *attr_name; + unsigned attr_name_nchars; + unsigned attr_type; }; #endif @@ -22,11 +22,8 @@ extern void libntfs3g_global_init(void); extern int -read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte, - u64 size, - consume_data_callback_t cb, - void *cb_ctx); - +read_ntfs_attribute_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx); extern int do_ntfs_umount(struct _ntfs_volume *vol); diff --git a/include/wimlib/reparse.h b/include/wimlib/reparse.h index f7d7c63c..43adbd5c 100644 --- a/include/wimlib/reparse.h +++ b/include/wimlib/reparse.h @@ -6,8 +6,8 @@ #include "wimlib/types.h" struct wim_inode; -struct wim_lookup_table; -struct wim_lookup_table_entry; +struct blob_table; +struct blob_descriptor; #define REPARSE_POINT_MAX_SIZE (16 * 1024) @@ -86,20 +86,14 @@ make_reparse_buffer(const struct reparse_data * restrict rpdata, u8 * restrict rpbuf, u16 * restrict rpbuflen_ret); -extern int -wim_inode_get_reparse_data(const struct wim_inode * restrict inode, - u8 * restrict rpbuf, - u16 * restrict rpbuflen_ret, - struct wim_lookup_table_entry *lte_override); - #ifndef __WIN32__ ssize_t wim_inode_readlink(const struct wim_inode * restrict inode, char * restrict buf, - size_t buf_len, struct wim_lookup_table_entry *lte); + size_t buf_len, struct blob_descriptor *blob); extern int wim_inode_set_symlink(struct wim_inode *inode, const char *target, - struct wim_lookup_table *lookup_table); + struct blob_table *blob_table); #endif #endif /* _WIMLIB_REPARSE_H */ diff --git a/include/wimlib/resource.h b/include/wimlib/resource.h index 5572f523..98305f00 100644 --- a/include/wimlib/resource.h +++ b/include/wimlib/resource.h @@ -6,22 +6,19 @@ #include "wimlib/sha1.h" #include "wimlib/types.h" +struct blob_descriptor; struct filedes; -struct wim_lookup_table_entry; struct wim_image_metadata; /* - * Specification of a resource in a WIM file. - * - * If a `struct wim_lookup_table_entry' lte has (lte->resource_location == - * RESOURCE_IN_WIM), then lte->rspec points to an instance of this structure. - * - * Normally, there is a one-to-one correspondence between lookup table entries - * ("streams", each of which may be the contents of a file, for example) and - * resources. However, a resource with the WIM_RESHDR_FLAG_SOLID flag set is a - * "solid" resource that may contain multiple streams compressed together. + * Description of a "resource" in a WIM file. A "resource" is a standalone, + * possibly compressed region of data. Normally, there is a one-to-one + * correspondence between "blobs" (each of which may be the contents of a file, + * for example) and resources. However, a resource with the + * WIM_RESHDR_FLAG_SOLID flag set is a "solid" resource that contains multiple + * blobs compressed together. */ -struct wim_resource_spec { +struct wim_resource_descriptor { /* The WIM containing this resource. @wim->in_fd is expected to be a * file descriptor to the underlying WIM file, opened for reading. */ WIMStruct *wim; @@ -39,8 +36,8 @@ struct wim_resource_spec { * to. */ u64 uncompressed_size; - /* The list of streams this resource contains. */ - struct list_head stream_list; + /* The list of blobs this resource contains. */ + struct list_head blob_list; /* Flags for this resource (WIM_RESHDR_FLAG_*). */ u32 flags : 8; @@ -89,25 +86,22 @@ struct wim_reshdr { /* Flags for the `flags' field of WIM resource headers (`struct wim_reshdr'). */ -/* Unknown meaning; may be intended to indicate spaces in the WIM that are free - * to overwrite. Currently ignored by wimlib. */ +/* Unknown meaning; currently ignored by wimlib. */ #define WIM_RESHDR_FLAG_FREE 0x01 -/* The resource is a metadata resource for a WIM image, or is the lookup table - * or XML data for the WIM. */ +/* The resource is a metadata resource for a WIM image, or is the blob table or + * XML data for the WIM. */ #define WIM_RESHDR_FLAG_METADATA 0x02 /* The resource is a non-solid resource compressed using the WIM's default * compression type. */ #define WIM_RESHDR_FLAG_COMPRESSED 0x04 -/* Unknown meaning; may be intended to indicate a partial stream. Currently - * ignored by wimlib. */ +/* Unknown meaning; currently ignored by wimlib. */ #define WIM_RESHDR_FLAG_SPANNED 0x08 -/* The resource is a solid compressed resource which may contain multiple - * streams. This flag is only allowed if the WIM version number is - * WIM_VERSION_SOLID. */ +/* The resource is a solid compressed resource which may contain multiple blobs. + * This flag is only allowed if the WIM version number is WIM_VERSION_SOLID. */ #define WIM_RESHDR_FLAG_SOLID 0x10 /* Magic number in the 'uncompressed_size' field of the resource header that @@ -117,9 +111,9 @@ struct wim_reshdr { /* Returns true if the specified WIM resource is compressed (may be either solid * or non-solid) */ static inline bool -resource_is_compressed(const struct wim_resource_spec *rspec) +resource_is_compressed(const struct wim_resource_descriptor *rdesc) { - return (rspec->flags & (WIM_RESHDR_FLAG_COMPRESSED | + return (rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_SOLID)); } @@ -136,11 +130,11 @@ zero_reshdr(struct wim_reshdr *reshdr) } extern void -wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim, - struct wim_resource_spec *rspec); +wim_res_hdr_to_desc(const struct wim_reshdr *reshdr, WIMStruct *wim, + struct wim_resource_descriptor *rdesc); extern void -wim_res_spec_to_hdr(const struct wim_resource_spec *rspec, +wim_res_desc_to_hdr(const struct wim_resource_descriptor *rdesc, struct wim_reshdr *reshdr); extern void @@ -184,18 +178,17 @@ get_chunk_entry_size(u64 res_size, bool is_alt) return 8; } -/* Functions to read streams */ +/* Functions to read blobs */ extern int -read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte, - size_t size, u64 offset, void *buf); +read_partial_wim_blob_into_buf(const struct blob_descriptor *blob, + size_t size, u64 offset, void *buf); extern int -read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *buf); +read_full_blob_into_buf(const struct blob_descriptor *blob, void *buf); extern int -read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte, - void **buf_ret); +read_full_blob_into_alloc_buf(const struct blob_descriptor *blob, void **buf_ret); extern int wim_reshdr_to_data(const struct wim_reshdr *reshdr, @@ -206,100 +199,93 @@ wim_reshdr_to_hash(const struct wim_reshdr *reshdr, WIMStruct *wim, u8 hash[SHA1_HASH_SIZE]); extern int -skip_wim_stream(struct wim_lookup_table_entry *lte); +skip_wim_resource(struct wim_resource_descriptor *rdesc); /* - * Type of callback function for beginning to read a stream. + * Type of callback function for beginning to read a blob. * - * @lte: - * Stream that is about to be read. + * @blob: + * Blob that is about to be read. * * @ctx: * User-provided context. * * Must return 0 on success, a positive error code on failure, or the special - * value BEGIN_STREAM_STATUS_SKIP_STREAM to indicate that the stream should not - * be read, and read_stream_list() should continue on to the next stream - * (without calling @consume_chunk or @end_stream). + * value BEGIN_BLOB_STATUS_SKIP_BLOB to indicate that the blob should not be + * read, and read_blob_list() should continue on to the next blob (without + * calling @consume_chunk or @end_blob). */ -typedef int (*read_stream_list_begin_stream_t)(struct wim_lookup_table_entry *lte, - void *ctx); +typedef int (*read_blob_list_begin_blob_t)(struct blob_descriptor *blob, void *ctx); -#define BEGIN_STREAM_STATUS_SKIP_STREAM -1 +#define BEGIN_BLOB_STATUS_SKIP_BLOB -1 /* - * Type of callback function for finishing reading a stream. + * Type of callback function for finishing reading a blob. * - * @lte: - * Stream that has been fully read, or stream that started being read but - * could not be fully read due to a read error. + * @blob: + * Blob that has been fully read, or blob that started being read but could + * not be fully read due to a read error. * * @status: - * 0 if reading the stream was successful; otherwise a nonzero error code + * 0 if reading the blob was successful; otherwise a nonzero error code * that specifies the return status. * * @ctx: * User-provided context. */ -typedef int (*read_stream_list_end_stream_t)(struct wim_lookup_table_entry *lte, - int status, - void *ctx); +typedef int (*read_blob_list_end_blob_t)(struct blob_descriptor *blob, + int status, + void *ctx); -/* Callback functions and contexts for read_stream_list(). */ -struct read_stream_list_callbacks { +/* Callback functions and contexts for read_blob_list(). */ +struct read_blob_list_callbacks { - /* Called when a stream is about to be read. */ - read_stream_list_begin_stream_t begin_stream; + /* Called when a blob is about to be read. */ + read_blob_list_begin_blob_t begin_blob; /* Called when a chunk of data has been read. */ consume_data_callback_t consume_chunk; - /* Called when a stream has been fully read. A successful call to - * @begin_stream will always be matched by a call to @end_stream. */ - read_stream_list_end_stream_t end_stream; + /* Called when a blob has been fully read. A successful call to + * @begin_blob will always be matched by a call to @end_blob. */ + read_blob_list_end_blob_t end_blob; - /* Parameter passed to @begin_stream. */ - void *begin_stream_ctx; + /* Parameter passed to @begin_blob. */ + void *begin_blob_ctx; /* Parameter passed to @consume_chunk. */ void *consume_chunk_ctx; - /* Parameter passed to @end_stream. */ - void *end_stream_ctx; + /* Parameter passed to @end_blob. */ + void *end_blob_ctx; }; -/* Flags for read_stream_list() */ -#define VERIFY_STREAM_HASHES 0x1 -#define COMPUTE_MISSING_STREAM_HASHES 0x2 -#define STREAM_LIST_ALREADY_SORTED 0x4 +/* Flags for read_blob_list() */ +#define VERIFY_BLOB_HASHES 0x1 +#define COMPUTE_MISSING_BLOB_HASHES 0x2 +#define BLOB_LIST_ALREADY_SORTED 0x4 extern int -read_stream_list(struct list_head *stream_list, - size_t list_head_offset, - const struct read_stream_list_callbacks *cbs, - int flags); +read_blob_list(struct list_head *blob_list, size_t list_head_offset, + const struct read_blob_list_callbacks *cbs, int flags); -/* Functions to extract streams. */ +/* Functions to extract blobs. */ extern int -extract_stream(struct wim_lookup_table_entry *lte, - u64 size, - consume_data_callback_t extract_chunk, - void *extract_chunk_arg); +extract_blob(struct blob_descriptor *blob, u64 size, + consume_data_callback_t extract_chunk, void *extract_chunk_arg); extern int -extract_stream_to_fd(struct wim_lookup_table_entry *lte, - struct filedes *fd, u64 size); +extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd, u64 size); extern int -extract_full_stream_to_fd(struct wim_lookup_table_entry *lte, - struct filedes *fd); +extract_full_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd); -/* Miscellaneous stream functions. */ +/* Miscellaneous blob functions. */ extern int -sha1_stream(struct wim_lookup_table_entry *lte); +sha1_blob(struct blob_descriptor *blob); /* Functions to read/write metadata resources. */ @@ -311,12 +297,12 @@ write_metadata_resource(WIMStruct *wim, int image, int write_resource_flags); /* Definitions specific to pipable WIM resources. */ -/* Arbitrary number to begin each stream in the pipable WIM, used for sanity +/* Arbitrary number to begin each blob in the pipable WIM, used for sanity * checking. */ -#define PWM_STREAM_MAGIC 0x2b9b9ba2443db9d8ULL +#define PWM_BLOB_MAGIC 0x2b9b9ba2443db9d8ULL /* Header that precedes each resource in a pipable WIM. */ -struct pwm_stream_hdr { +struct pwm_blob_hdr { le64 magic; /* +0 */ le64 uncompressed_size; /* +8 */ u8 hash[SHA1_HASH_SIZE]; /* +16 */ @@ -324,9 +310,9 @@ struct pwm_stream_hdr { /* +40 */ } _packed_attribute; -/* Extra flag for the @flags field in `struct pipable_wim_stream_hdr': Indicates - * that the SHA1 message digest of the stream has not been calculated. - * Currently only used for the XML data. */ +/* Extra flag for the @flags field in `struct pwm_blob_hdr': Indicates that the + * SHA-1 message digest of the stream has not been calculated. Currently only + * used for the XML data. */ #define PWM_RESHDR_FLAG_UNHASHED 0x100 /* Header that precedes each chunk of a compressed resource in a pipable WIM. diff --git a/include/wimlib/solid.h b/include/wimlib/solid.h index 064f27fe..cf37a880 100644 --- a/include/wimlib/solid.h +++ b/include/wimlib/solid.h @@ -4,6 +4,6 @@ struct list_head; extern int -sort_stream_list_for_solid_compression(struct list_head *stream_list); +sort_blob_list_for_solid_compression(struct list_head *blob_list); #endif /* _WIMLIB_SOLID_H */ diff --git a/include/wimlib/util.h b/include/wimlib/util.h index 40f6294e..67044638 100644 --- a/include/wimlib/util.h +++ b/include/wimlib/util.h @@ -84,9 +84,6 @@ extern void * mempcpy(void *dst, const void *src, size_t n); #endif -extern size_t -utf16le_strlen(const utf16lechar *s); - extern void randomize_byte_array(u8 *p, size_t n); diff --git a/include/wimlib/wim.h b/include/wimlib/wim.h index 5bc20a6b..0f77aabc 100644 --- a/include/wimlib/wim.h +++ b/include/wimlib/wim.h @@ -12,7 +12,7 @@ struct wim_image_metadata; struct wim_info; -struct wim_lookup_table; +struct blob_table; /* * WIMStruct - represents a WIM, or a part of a non-standalone WIM @@ -57,11 +57,11 @@ struct WIMStruct { * also maintained for a WIMStruct not backed by a file. */ struct wim_info *wim_info; - /* The lookup table for this WIMStruct. If this WIMStruct has a backing - * file, then this table will index the streams contained in that file. - * In addition, this table may index streams that were added by updates - * or referenced from other WIMStructs. */ - struct wim_lookup_table *lookup_table; + /* The blob table for this WIMStruct. If this WIMStruct has a backing + * file, then this table will index the blobs contained in that file. + * In addition, this table may index blobs that were added by updates or + * referenced from other WIMStructs. */ + struct blob_table *blob_table; /* * The 1-based index of the currently selected image in this WIMStruct, @@ -211,7 +211,7 @@ extern int for_image(WIMStruct *wim, int image, int (*visitor)(WIMStruct *)); extern int -wim_checksum_unhashed_streams(WIMStruct *wim); +wim_checksum_unhashed_blobs(WIMStruct *wim); extern int delete_wim_image(WIMStruct *wim, int image); diff --git a/include/wimlib/wimboot.h b/include/wimlib/wimboot.h index f9bdbe8b..7dcd85d3 100644 --- a/include/wimlib/wimboot.h +++ b/include/wimlib/wimboot.h @@ -6,7 +6,7 @@ #include "wimlib/types.h" #include "wimlib/win32_common.h" -struct wim_lookup_table_entry; +struct blob_descriptor; extern int wimboot_alloc_data_source_id(const wchar_t *wim_path, @@ -16,9 +16,9 @@ wimboot_alloc_data_source_id(const wchar_t *wim_path, extern bool wimboot_set_pointer(HANDLE h, - const struct wim_lookup_table_entry *lte, + const struct blob_descriptor *blob, u64 data_source_id, - const u8 lookup_table_hash[SHA1_HASH_SIZE], + const u8 blob_table_hash[SHA1_HASH_SIZE], bool wof_running); diff --git a/include/wimlib/win32.h b/include/wimlib/win32.h index c8be8c80..9ab141f1 100644 --- a/include/wimlib/win32.h +++ b/include/wimlib/win32.h @@ -8,14 +8,14 @@ #include "wimlib/callback.h" #include "wimlib/types.h" -struct wim_lookup_table_entry; +struct blob_descriptor; extern int -read_winnt_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, void *cb_ctx); +read_winnt_stream_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx); extern int -read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte, +read_win32_encrypted_file_prefix(const struct blob_descriptor *blob, u64 size, consume_data_callback_t cb, void *cb_ctx); diff --git a/include/wimlib/wof.h b/include/wimlib/wof.h index 969be854..fae0801d 100644 --- a/include/wimlib/wof.h +++ b/include/wimlib/wof.h @@ -63,22 +63,22 @@ struct wim_provider_rpdata { /* Integer ID that identifies the WIM. */ le64 data_source_id; - /* SHA1 message digest of the file's unnamed data stream. */ - u8 resource_hash[20]; + /* SHA-1 message digest of the file's unnamed data stream. */ + u8 unnamed_data_stream_hash[20]; - /* SHA1 message digest of the WIM's lookup table. */ - u8 wim_lookup_table_hash[20]; + /* SHA-1 message digest of the WIM's blob table as stored on disk. */ + u8 blob_table_hash[20]; /* Uncompressed size of the file's unnamed data stream, in bytes. */ - le64 stream_uncompressed_size; + le64 unnamed_data_stream_uncompressed_size; - /* Compressed size of the file's unnamed data stream, in bytes. If + /* Compressed size of the file's unnamed data stream, in bytes. If the * stream is stored uncompressed, set this the same as the uncompressed * size. */ - le64 stream_compressed_size; + le64 unnamed_data_stream_compressed_size; /* Byte offset of the file's unnamed data stream in the WIM. */ - le64 stream_offset_in_wim; + le64 unnamed_data_stream_offset_in_wim; } _packed_attribute; /* WIM-specific information about a WIM data source */ @@ -102,8 +102,8 @@ struct WimOverlay_dat_entry_1 { le32 wim_type; /* Index of the image in the WIM to use??? (This doesn't really make - * sense, since WIM files combine streams for all images into a single - * table. Set to 1 if unsure...) */ + * sense, since WIM files combine file data "blobs" for all images into + * a single table. Set to 1 if unsure...) */ le32 wim_index; /* GUID of the WIM file (copied from the WIM header, offset +0x18). */ @@ -292,8 +292,8 @@ struct wim_provider_external_info { * FSCTL_ADD_OVERLAY ioctl. */ u64 data_source_id; - /* SHA1 message digest of the file's unnamed data stream. */ - u8 resource_hash[20]; + /* SHA-1 message digest of the file's unnamed data stream. */ + u8 unnamed_data_stream_hash[20]; }; /***************************************************************************** @@ -394,9 +394,9 @@ struct wim_provider_overlay_entry { /* Type of WIM file: WIM_BOOT_OS_WIM or WIM_BOOT_NOT_OS_WIM. */ uint32_t wim_type; - /* Index of the backing image in the WIM??? (This doesn't really make - * sense, since WIM files combine streams for all images into a single - * table.) */ + /* Index of the image in the WIM to use??? (This doesn't really make + * sense, since WIM files combine file data "blobs" for all images into + * a single table. Set to 1 if unsure...) */ uint32_t wim_index; /* 0 when WIM provider active, otherwise @@ -441,8 +441,8 @@ struct wim_provider_add_overlay_input { #define WIM_BOOT_NOT_OS_WIM 1 /* Index of the image in the WIM to use??? (This doesn't really make - * sense, since WIM files combine streams for all images into a single - * table. Set to 1 if unsure...) */ + * sense, since WIM files combine file data "blobs" for all images into + * a single table. Set to 1 if unsure...) */ u32 wim_index; /* Byte offset of wim_file_name in this buffer, not including the diff --git a/include/wimlib/write.h b/include/wimlib/write.h index a46460bd..af1a6353 100644 --- a/include/wimlib/write.h +++ b/include/wimlib/write.h @@ -5,7 +5,7 @@ #include "wimlib/types.h" /* Internal use only */ -#define WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE 0x80000000 +#define WIMLIB_WRITE_FLAG_NO_BLOB_TABLE 0x80000000 #define WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML 0x40000000 #define WIMLIB_WRITE_FLAG_HEADER_AT_END 0x20000000 #define WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR 0x10000000 @@ -60,7 +60,7 @@ write_wim_part(WIMStruct *wim, unsigned num_threads, unsigned part_number, unsigned total_parts, - struct list_head *stream_list_override, + struct list_head *blob_list_override, const u8 *guid); int diff --git a/programs/imagex.c b/programs/imagex.c index 9334623d..b3080289 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -160,7 +160,7 @@ enum { IMAGEX_HEADER_OPTION, IMAGEX_INCLUDE_INVALID_NAMES_OPTION, IMAGEX_LAZY_OPTION, - IMAGEX_LOOKUP_TABLE_OPTION, + IMAGEX_BLOBS_OPTION, IMAGEX_METADATA_OPTION, IMAGEX_NEW_IMAGE_OPTION, IMAGEX_NOCHECK_OPTION, @@ -320,7 +320,8 @@ static const struct option info_options[] = { {T("no-check"), no_argument, NULL, IMAGEX_NOCHECK_OPTION}, {T("extract-xml"), required_argument, NULL, IMAGEX_EXTRACT_XML_OPTION}, {T("header"), no_argument, NULL, IMAGEX_HEADER_OPTION}, - {T("lookup-table"), no_argument, NULL, IMAGEX_LOOKUP_TABLE_OPTION}, + {T("lookup-table"), no_argument, NULL, IMAGEX_BLOBS_OPTION}, + {T("blobs"), no_argument, NULL, IMAGEX_BLOBS_OPTION}, {T("metadata"), no_argument, NULL, IMAGEX_METADATA_OPTION}, {T("xml"), no_argument, NULL, IMAGEX_XML_OPTION}, {NULL, 0, NULL, 0}, @@ -2440,34 +2441,34 @@ static int print_resource(const struct wimlib_resource_entry *resource, void *_ignore) { - tprintf(T("Hash = 0x")); + tprintf(T("Hash = 0x")); print_byte_field(resource->sha1_hash, sizeof(resource->sha1_hash)); tputchar(T('\n')); if (!resource->is_missing) { - tprintf(T("Uncompressed size = %"PRIu64" bytes\n"), + tprintf(T("Uncompressed size = %"PRIu64" bytes\n"), resource->uncompressed_size); if (resource->packed) { - tprintf(T("Raw compressed size = %"PRIu64" bytes\n"), - resource->raw_resource_compressed_size); - - tprintf(T("Raw offset in WIM = %"PRIu64" bytes\n"), + tprintf(T("Solid resource = %"PRIu64" => %"PRIu64" " + "bytes @ offset %"PRIu64"\n"), + resource->raw_resource_uncompressed_size, + resource->raw_resource_compressed_size, resource->raw_resource_offset_in_wim); - tprintf(T("Offset in raw = %"PRIu64" bytes\n"), + tprintf(T("Solid offset = %"PRIu64" bytes\n"), resource->offset); } else { - tprintf(T("Compressed size = %"PRIu64" bytes\n"), + tprintf(T("Compressed size = %"PRIu64" bytes\n"), resource->compressed_size); - tprintf(T("Offset in WIM = %"PRIu64" bytes\n"), + tprintf(T("Offset in WIM = %"PRIu64" bytes\n"), resource->offset); } - tprintf(T("Part Number = %u\n"), resource->part_number); - tprintf(T("Reference Count = %u\n"), resource->reference_count); + tprintf(T("Part Number = %u\n"), resource->part_number); + tprintf(T("Reference Count = %u\n"), resource->reference_count); - tprintf(T("Flags = ")); + tprintf(T("Flags = ")); if (resource->is_compressed) tprintf(T("WIM_RESHDR_FLAG_COMPRESSED ")); if (resource->is_metadata) @@ -2485,7 +2486,7 @@ print_resource(const struct wimlib_resource_entry *resource, } static void -print_lookup_table(WIMStruct *wim) +print_blobs(WIMStruct *wim) { wimlib_iterate_lookup_table(wim, 0, print_resource, NULL); } @@ -2538,8 +2539,12 @@ print_dentry_detailed(const struct wimlib_dir_entry *dentry) for (uint32_t i = 0; i <= dentry->num_named_streams; i++) { if (dentry->streams[i].stream_name) { - tprintf(T("\tData stream \"%"TS"\":\n"), + tprintf(T("\tNamed data stream \"%"TS"\":\n"), dentry->streams[i].stream_name); + } else if (dentry->attributes & WIMLIB_FILE_ATTRIBUTE_ENCRYPTED) { + tprintf(T("\tRaw encrypted data stream:\n")); + } else if (dentry->attributes & WIMLIB_FILE_ATTRIBUTE_REPARSE_POINT) { + tprintf(T("\tReparse point stream:\n")); } else { tprintf(T("\tUnnamed data stream:\n")); } @@ -3119,7 +3124,7 @@ imagex_info(int argc, tchar **argv, int cmd) bool check = false; bool nocheck = false; bool header = false; - bool lookup_table = false; + bool blobs = false; bool xml = false; bool short_header = true; const tchar *xml_out_file = NULL; @@ -3148,8 +3153,8 @@ imagex_info(int argc, tchar **argv, int cmd) header = true; short_header = false; break; - case IMAGEX_LOOKUP_TABLE_OPTION: - lookup_table = true; + case IMAGEX_BLOBS_OPTION: + blobs = true; short_header = false; break; case IMAGEX_XML_OPTION: @@ -3244,13 +3249,13 @@ imagex_info(int argc, tchar **argv, int cmd) if (header) wimlib_print_header(wim); - if (lookup_table) { + if (blobs) { if (info.total_parts != 1) { - tfprintf(stderr, T("Warning: Only showing the lookup table " + tfprintf(stderr, T("Warning: Only showing the blobs " "for part %d of a %d-part WIM.\n"), info.part_number, info.total_parts); } - print_lookup_table(wim); + print_blobs(wim); } if (xml) { @@ -4223,7 +4228,7 @@ T( T( " %"TS" WIMFILE [IMAGE [NEW_NAME [NEW_DESC]]]\n" " [--boot] [--check] [--nocheck] [--xml]\n" -" [--extract-xml FILE] [--header] [--lookup-table]\n" +" [--extract-xml FILE] [--header] [--blobs]\n" ), [CMD_JOIN] = T( diff --git a/src/add_image.c b/src/add_image.c index 86ba9f0b..cff98203 100644 --- a/src/add_image.c +++ b/src/add_image.c @@ -24,8 +24,8 @@ #endif #include "wimlib.h" +#include "wimlib/blob_table.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/security.h" #include "wimlib/xml.h" @@ -38,24 +38,23 @@ static int add_empty_image_metadata(WIMStruct *wim) { int ret; - struct wim_lookup_table_entry *metadata_lte; + struct blob_descriptor *metadata_blob; struct wim_security_data *sd; struct wim_image_metadata *imd; - /* Create lookup table entry for this metadata resource (for now really - * just a dummy entry). */ + /* Create a blob descriptor for the new metadata resource. */ ret = WIMLIB_ERR_NOMEM; - metadata_lte = new_lookup_table_entry(); - if (!metadata_lte) + metadata_blob = new_blob_descriptor(); + if (!metadata_blob) goto out; - metadata_lte->flags = WIM_RESHDR_FLAG_METADATA; - metadata_lte->unhashed = 1; + metadata_blob->flags = WIM_RESHDR_FLAG_METADATA; + metadata_blob->unhashed = 1; /* Create empty security data (no security descriptors). */ sd = new_wim_security_data(); if (!sd) - goto out_free_metadata_lte; + goto out_free_metadata_blob; imd = new_image_metadata(); if (!imd) @@ -64,7 +63,7 @@ add_empty_image_metadata(WIMStruct *wim) /* A NULL root_dentry indicates a completely empty image, without even a * root directory. */ imd->root_dentry = NULL; - imd->metadata_lte = metadata_lte; + imd->metadata_blob = metadata_blob; imd->security_data = sd; imd->modified = 1; @@ -76,8 +75,8 @@ add_empty_image_metadata(WIMStruct *wim) out_free_security_data: free_wim_security_data(sd); -out_free_metadata_lte: - free_lookup_table_entry(metadata_lte); +out_free_metadata_blob: + free_blob_descriptor(metadata_blob); out: return ret; } diff --git a/src/blob_table.c b/src/blob_table.c new file mode 100644 index 00000000..5551c163 --- /dev/null +++ b/src/blob_table.c @@ -0,0 +1,1404 @@ +/* + * blob_table.c + * + * A blob table maps SHA-1 message digests to "blobs", which are nonempty + * sequences of binary data. Within a WIM file, blobs are single-instanced. + * + * This file also contains code to read and write the corresponding on-disk + * representation of this table in the WIM file format. + */ + +/* + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers + * + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include /* for unlink() */ + +#include "wimlib/assert.h" +#include "wimlib/blob_table.h" +#include "wimlib/encoding.h" +#include "wimlib/endianness.h" +#include "wimlib/error.h" +#include "wimlib/metadata.h" +#include "wimlib/ntfs_3g.h" +#include "wimlib/resource.h" +#include "wimlib/unaligned.h" +#include "wimlib/util.h" +#include "wimlib/write.h" + +/* A hash table mapping SHA-1 message digests to blob descriptors */ +struct blob_table { + struct hlist_head *array; + size_t num_blobs; + size_t capacity; +}; + +struct blob_table * +new_blob_table(size_t capacity) +{ + struct blob_table *table; + struct hlist_head *array; + + table = MALLOC(sizeof(struct blob_table)); + if (table == NULL) + goto oom; + + array = CALLOC(capacity, sizeof(array[0])); + if (array == NULL) { + FREE(table); + goto oom; + } + + table->num_blobs = 0; + table->capacity = capacity; + table->array = array; + return table; + +oom: + ERROR("Failed to allocate memory for blob table " + "with capacity %zu", capacity); + return NULL; +} + +static int +do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore) +{ + free_blob_descriptor(blob); + return 0; +} + +void +free_blob_table(struct blob_table *table) +{ + if (table) { + for_blob_in_table(table, do_free_blob_descriptor, NULL); + FREE(table->array); + FREE(table); + } +} + +struct blob_descriptor * +new_blob_descriptor(void) +{ + struct blob_descriptor *blob; + + blob = CALLOC(1, sizeof(struct blob_descriptor)); + if (blob == NULL) + return NULL; + + blob->refcnt = 1; + + /* blob->blob_location = BLOB_NONEXISTENT */ + BUILD_BUG_ON(BLOB_NONEXISTENT != 0); + + return blob; +} + +struct blob_descriptor * +clone_blob_descriptor(const struct blob_descriptor *old) +{ + struct blob_descriptor *new; + + new = memdup(old, sizeof(struct blob_descriptor)); + if (new == NULL) + return NULL; + + switch (new->blob_location) { + case BLOB_IN_WIM: + list_add(&new->rdesc_node, &new->rdesc->blob_list); + break; + + case BLOB_IN_FILE_ON_DISK: +#ifdef __WIN32__ + case BLOB_IN_WINNT_FILE_ON_DISK: + case BLOB_WIN32_ENCRYPTED: +#endif +#ifdef WITH_FUSE + case BLOB_IN_STAGING_FILE: + BUILD_BUG_ON((void*)&old->file_on_disk != + (void*)&old->staging_file_name); +#endif + new->file_on_disk = TSTRDUP(old->file_on_disk); + if (new->file_on_disk == NULL) + goto out_free; + break; + case BLOB_IN_ATTACHED_BUFFER: + new->attached_buffer = memdup(old->attached_buffer, old->size); + if (new->attached_buffer == NULL) + goto out_free; + break; +#ifdef WITH_NTFS_3G + case BLOB_IN_NTFS_VOLUME: + if (old->ntfs_loc) { + struct ntfs_location *loc; + loc = memdup(old->ntfs_loc, sizeof(struct ntfs_location)); + if (loc == NULL) + goto out_free; + loc->path = NULL; + loc->attr_name = NULL; + new->ntfs_loc = loc; + loc->path = STRDUP(old->ntfs_loc->path); + if (loc->path == NULL) + goto out_free; + if (loc->attr_name_nchars != 0) { + loc->attr_name = utf16le_dup(old->ntfs_loc->attr_name); + if (loc->attr_name == NULL) + goto out_free; + } + } + break; +#endif + default: + break; + } + return new; + +out_free: + free_blob_descriptor(new); + return NULL; +} + +static void +blob_release_location(struct blob_descriptor *blob) +{ + switch (blob->blob_location) { + case BLOB_IN_WIM: + list_del(&blob->rdesc_node); + if (list_empty(&blob->rdesc->blob_list)) + FREE(blob->rdesc); + break; + case BLOB_IN_FILE_ON_DISK: +#ifdef __WIN32__ + case BLOB_IN_WINNT_FILE_ON_DISK: + case BLOB_WIN32_ENCRYPTED: +#endif +#ifdef WITH_FUSE + case BLOB_IN_STAGING_FILE: + BUILD_BUG_ON((void*)&blob->file_on_disk != + (void*)&blob->staging_file_name); +#endif + case BLOB_IN_ATTACHED_BUFFER: + BUILD_BUG_ON((void*)&blob->file_on_disk != + (void*)&blob->attached_buffer); + FREE(blob->file_on_disk); + break; +#ifdef WITH_NTFS_3G + case BLOB_IN_NTFS_VOLUME: + if (blob->ntfs_loc) { + FREE(blob->ntfs_loc->path); + FREE(blob->ntfs_loc->attr_name); + FREE(blob->ntfs_loc); + } + break; +#endif + default: + break; + } +} + +void +free_blob_descriptor(struct blob_descriptor *blob) +{ + if (blob) { + blob_release_location(blob); + FREE(blob); + } +} + +/* Should this blob be retained even if it has no references? */ +static bool +should_retain_blob(const struct blob_descriptor *blob) +{ + return blob->blob_location == BLOB_IN_WIM; +} + +static void +finalize_blob(struct blob_descriptor *blob) +{ + if (!should_retain_blob(blob)) + free_blob_descriptor(blob); +} + +/* + * Decrements the reference count of the specified blob, which must be either + * (a) unhashed, or (b) inserted in the specified blob table. + * + * If the blob's reference count reaches 0, we may unlink it from @table and + * free it. However, we retain blobs with 0 reference count that originated + * from WIM files (BLOB_IN_WIM). We do this for two reasons: + * + * 1. This prevents information about valid blobs in a WIM file --- blobs which + * will continue to be present after appending to the WIM file --- from being + * lost merely because we dropped all references to them. + * + * 2. Blob reference counts we read from WIM files can't be trusted. It's + * possible that a WIM has reference counts that are too low; WIMGAPI + * sometimes creates WIMs where this is the case. It's also possible that + * blobs have been referenced from an external WIM; those blobs can + * potentially have any reference count at all, either lower or higher than + * would be expected for this WIM ("this WIM" meaning the owner of @table) if + * it were a standalone WIM. + * + * So we can't take the reference counts too seriously. But at least, we do + * recalculate by default when writing a new WIM file. + */ +void +blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table) +{ + if (unlikely(blob->refcnt == 0)) /* See comment above */ + return; + + if (--blob->refcnt != 0) + return; + + if (blob->unhashed) { + list_del(&blob->unhashed_list); + #ifdef WITH_FUSE + /* If the blob has been extracted to a staging file for a FUSE + * mount, unlink the staging file. (Note that there still may + * be open file descriptors to it.) */ + if (blob->blob_location == BLOB_IN_STAGING_FILE) + unlinkat(blob->staging_dir_fd, + blob->staging_file_name, 0); + #endif + } else { + if (!should_retain_blob(blob)) + blob_table_unlink(table, blob); + } + + /* If FUSE mounts are enabled, then don't actually free the blob + * descriptor until the last file descriptor to it has been closed. */ +#ifdef WITH_FUSE + if (blob->num_opened_fds == 0) +#endif + finalize_blob(blob); +} + +#ifdef WITH_FUSE +void +blob_decrement_num_opened_fds(struct blob_descriptor *blob) +{ + wimlib_assert(blob->num_opened_fds != 0); + + if (--blob->num_opened_fds == 0 && blob->refcnt == 0) + finalize_blob(blob); +} +#endif + +static void +blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob) +{ + size_t i = blob->hash_short % table->capacity; + + hlist_add_head(&blob->hash_list, &table->array[i]); +} + +static void +enlarge_blob_table(struct blob_table *table) +{ + size_t old_capacity, new_capacity; + struct hlist_head *old_array, *new_array; + struct blob_descriptor *blob; + struct hlist_node *cur, *tmp; + size_t i; + + old_capacity = table->capacity; + new_capacity = old_capacity * 2; + new_array = CALLOC(new_capacity, sizeof(struct hlist_head)); + if (new_array == NULL) + return; + old_array = table->array; + table->array = new_array; + table->capacity = new_capacity; + + for (i = 0; i < old_capacity; i++) { + hlist_for_each_entry_safe(blob, cur, tmp, &old_array[i], hash_list) { + hlist_del(&blob->hash_list); + blob_table_insert_raw(table, blob); + } + } + FREE(old_array); +} + +/* Insert a blob descriptor into the blob table. */ +void +blob_table_insert(struct blob_table *table, struct blob_descriptor *blob) +{ + blob_table_insert_raw(table, blob); + if (++table->num_blobs > table->capacity) + enlarge_blob_table(table); +} + +/* Unlinks a blob descriptor from the blob table; does not free it. */ +void +blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob) +{ + wimlib_assert(!blob->unhashed); + wimlib_assert(table->num_blobs != 0); + + hlist_del(&blob->hash_list); + table->num_blobs--; +} + +/* Given a SHA-1 message digest, return the corresponding blob descriptor from + * the specified blob table, or NULL if there is none. */ +struct blob_descriptor * +lookup_blob(const struct blob_table *table, const u8 *hash) +{ + size_t i; + struct blob_descriptor *blob; + struct hlist_node *pos; + + i = load_size_t_unaligned(hash) % table->capacity; + hlist_for_each_entry(blob, pos, &table->array[i], hash_list) + if (hashes_equal(hash, blob->hash)) + return blob; + return NULL; +} + +/* Call a function on all blob descriptors in the specified blob table. Stop + * early and return nonzero if any call to the function returns nonzero. */ +int +for_blob_in_table(struct blob_table *table, + int (*visitor)(struct blob_descriptor *, void *), void *arg) +{ + struct blob_descriptor *blob; + struct hlist_node *pos, *tmp; + int ret; + + for (size_t i = 0; i < table->capacity; i++) { + hlist_for_each_entry_safe(blob, pos, tmp, &table->array[i], + hash_list) + { + ret = visitor(blob, arg); + if (ret) + return ret; + } + } + return 0; +} + +/* + * This is a qsort() callback that sorts blobs into an order optimized for + * reading. Sorting is done primarily by blob location, then secondarily by a + * location-dependent order. For example, blobs in WIM resources are sorted + * such that the underlying WIM files will be read sequentially. This is + * especially important for WIM files containing solid resources. + */ +int +cmp_blobs_by_sequential_order(const void *p1, const void *p2) +{ + const struct blob_descriptor *blob1, *blob2; + int v; + WIMStruct *wim1, *wim2; + + blob1 = *(const struct blob_descriptor**)p1; + blob2 = *(const struct blob_descriptor**)p2; + + v = (int)blob1->blob_location - (int)blob2->blob_location; + + /* Different resource locations? */ + if (v) + return v; + + switch (blob1->blob_location) { + case BLOB_IN_WIM: + wim1 = blob1->rdesc->wim; + wim2 = blob2->rdesc->wim; + + /* Different (possibly split) WIMs? */ + if (wim1 != wim2) { + v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN); + if (v) + return v; + } + + /* Different part numbers in the same WIM? */ + v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number; + if (v) + return v; + + if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim) + return cmp_u64(blob1->rdesc->offset_in_wim, + blob2->rdesc->offset_in_wim); + + return cmp_u64(blob1->offset_in_res, blob2->offset_in_res); + + case BLOB_IN_FILE_ON_DISK: +#ifdef WITH_FUSE + case BLOB_IN_STAGING_FILE: +#endif +#ifdef __WIN32__ + case BLOB_IN_WINNT_FILE_ON_DISK: + case BLOB_WIN32_ENCRYPTED: +#endif + /* Compare files by path: just a heuristic that will place files + * in the same directory next to each other. */ + return tstrcmp(blob1->file_on_disk, blob2->file_on_disk); +#ifdef WITH_NTFS_3G + case BLOB_IN_NTFS_VOLUME: + return tstrcmp(blob1->ntfs_loc->path, blob2->ntfs_loc->path); +#endif + default: + /* No additional sorting order defined for this resource + * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare + * everything equal to each other. */ + return 0; + } +} + +int +sort_blob_list(struct list_head *blob_list, size_t list_head_offset, + int (*compar)(const void *, const void*)) +{ + struct list_head *cur; + struct blob_descriptor **array; + size_t i; + size_t array_size; + size_t num_blobs = 0; + + list_for_each(cur, blob_list) + num_blobs++; + + if (num_blobs <= 1) + return 0; + + array_size = num_blobs * sizeof(array[0]); + array = MALLOC(array_size); + if (array == NULL) + return WIMLIB_ERR_NOMEM; + + cur = blob_list->next; + for (i = 0; i < num_blobs; i++) { + array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset); + cur = cur->next; + } + + qsort(array, num_blobs, sizeof(array[0]), compar); + + INIT_LIST_HEAD(blob_list); + for (i = 0; i < num_blobs; i++) { + list_add_tail((struct list_head*) + ((u8*)array[i] + list_head_offset), blob_list); + } + FREE(array); + return 0; +} + +/* Sort the specified list of blobs in an order optimized for sequential + * reading. */ +int +sort_blob_list_by_sequential_order(struct list_head *blob_list, + size_t list_head_offset) +{ + return sort_blob_list(blob_list, list_head_offset, + cmp_blobs_by_sequential_order); +} + +static int +add_blob_to_array(struct blob_descriptor *blob, void *_pp) +{ + struct blob_descriptor ***pp = _pp; + *(*pp)++ = blob; + return 0; +} + +/* Iterate through the blob descriptors in the specified blob table in an order + * optimized for sequential reading. */ +int +for_blob_in_table_sorted_by_sequential_order(struct blob_table *table, + int (*visitor)(struct blob_descriptor *, void *), + void *arg) +{ + struct blob_descriptor **blob_array, **p; + size_t num_blobs = table->num_blobs; + int ret; + + blob_array = MALLOC(num_blobs * sizeof(blob_array[0])); + if (!blob_array) + return WIMLIB_ERR_NOMEM; + p = blob_array; + for_blob_in_table(table, add_blob_to_array, &p); + + wimlib_assert(p == blob_array + num_blobs); + + qsort(blob_array, num_blobs, sizeof(blob_array[0]), + cmp_blobs_by_sequential_order); + ret = 0; + for (size_t i = 0; i < num_blobs; i++) { + ret = visitor(blob_array[i], arg); + if (ret) + break; + } + FREE(blob_array); + return ret; +} + +/* On-disk format of a blob descriptor in a WIM file. + * + * Note: if the WIM file contains solid resource(s), then this structure is + * sometimes overloaded to describe a "resource" rather than a "blob". See the + * code for details. */ +struct blob_descriptor_disk { + + /* Size, offset, and flags of the blob. */ + struct wim_reshdr_disk reshdr; + + /* Which part of the split WIM this blob is in; indexed from 1. */ + le16 part_number; + + /* Reference count of this blob over all WIM images. (But see comment + * above blob_decrement_refcnt().) */ + le32 refcnt; + + /* SHA-1 message digest of the uncompressed data of this blob, or all + * zeroes if this blob is of zero length. */ + u8 hash[SHA1_HASH_SIZE]; +} _packed_attribute; + +/* Given a nonempty run of consecutive blob descriptors with the SOLID flag set, + * count how many specify resources (as opposed to blobs within those + * resources). + * + * Returns the resulting count. */ +static size_t +count_solid_resources(const struct blob_descriptor_disk *entries, size_t max) +{ + size_t count = 0; + do { + struct wim_reshdr reshdr; + + get_wim_reshdr(&(entries++)->reshdr, &reshdr); + + if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) { + /* Run was terminated by a stand-alone blob entry. */ + break; + } + + if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) { + /* This is a resource entry. */ + count++; + } + } while (--max); + return count; +} + +/* + * Given a run of consecutive blob descriptors with the SOLID flag set and + * having @num_rdescs resource entries, load resource information from them into + * the resource descriptors in the @rdescs array. + * + * Returns 0 on success, or a nonzero error code on failure. + */ +static int +do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs, + size_t num_rdescs, + const struct blob_descriptor_disk *entries) +{ + for (size_t i = 0; i < num_rdescs; i++) { + struct wim_reshdr reshdr; + struct alt_chunk_table_header_disk hdr; + struct wim_resource_descriptor *rdesc; + int ret; + + /* Advance to next resource entry. */ + + do { + get_wim_reshdr(&(entries++)->reshdr, &reshdr); + } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER); + + rdesc = rdescs[i]; + + wim_res_hdr_to_desc(&reshdr, wim, rdesc); + + /* For solid resources, the uncompressed size, compression type, + * and chunk size are stored in the resource itself, not in the + * blob table. */ + + ret = full_pread(&wim->in_fd, &hdr, + sizeof(hdr), reshdr.offset_in_wim); + if (ret) { + ERROR("Failed to read header of solid resource " + "(offset_in_wim=%"PRIu64")", + reshdr.offset_in_wim); + return ret; + } + + rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize); + + /* Compression format numbers must be the same as in + * WIMGAPI to be compatible here. */ + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2); + BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); + rdesc->compression_type = le32_to_cpu(hdr.compression_format); + + rdesc->chunk_size = le32_to_cpu(hdr.chunk_size); + + DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" " + "(%"TS"/%"PRIu32") @ +%"PRIu64"", + i + 1, num_rdescs, + rdesc->uncompressed_size, + rdesc->size_in_wim, + wimlib_get_compression_type_string(rdesc->compression_type), + rdesc->chunk_size, + rdesc->offset_in_wim); + } + return 0; +} + +/* + * Given a nonempty run of consecutive blob descriptors with the SOLID flag set, + * allocate a 'struct wim_resource_descriptor' for each resource within that + * run. + * + * Returns 0 on success, or a nonzero error code on failure. + * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret. + */ +static int +load_solid_info(WIMStruct *wim, + const struct blob_descriptor_disk *entries, + size_t num_remaining_entries, + struct wim_resource_descriptor ***rdescs_ret, + size_t *num_rdescs_ret) +{ + size_t num_rdescs; + struct wim_resource_descriptor **rdescs; + size_t i; + int ret; + + num_rdescs = count_solid_resources(entries, num_remaining_entries); + rdescs = CALLOC(num_rdescs, sizeof(rdescs[0])); + if (!rdescs) + return WIMLIB_ERR_NOMEM; + + for (i = 0; i < num_rdescs; i++) { + rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor)); + if (!rdescs[i]) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_rdescs; + } + } + + ret = do_load_solid_info(wim, rdescs, num_rdescs, entries); + if (ret) + goto out_free_rdescs; + + *rdescs_ret = rdescs; + *num_rdescs_ret = num_rdescs; + return 0; + +out_free_rdescs: + for (i = 0; i < num_rdescs; i++) + FREE(rdescs[i]); + FREE(rdescs); + return ret; +} + +/* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor + * with the SOLID flag set, try to assign it to resource in the current solid + * run. */ +static int +assign_blob_to_solid_resource(const struct wim_reshdr *reshdr, + struct blob_descriptor *blob, + struct wim_resource_descriptor **rdescs, + size_t num_rdescs) +{ + u64 offset = reshdr->offset_in_wim; + + /* XXX: This linear search will be slow in the degenerate case where the + * number of solid resources in the run is huge. */ + blob->size = reshdr->size_in_wim; + blob->flags = reshdr->flags; + for (size_t i = 0; i < num_rdescs; i++) { + if (offset + blob->size <= rdescs[i]->uncompressed_size) { + blob->offset_in_res = offset; + blob_set_is_located_in_wim_resource(blob, rdescs[i]); + return 0; + } + offset -= rdescs[i]->uncompressed_size; + } + ERROR("blob could not be assigned to a solid resource"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; +} + +static void +free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs) +{ + if (rdescs) { + for (size_t i = 0; i < num_rdescs; i++) + if (list_empty(&rdescs[i]->blob_list)) + FREE(rdescs[i]); + FREE(rdescs); + } +} + +static int +cmp_blobs_by_offset_in_res(const void *p1, const void *p2) +{ + const struct blob_descriptor *blob1, *blob2; + + blob1 = *(const struct blob_descriptor**)p1; + blob2 = *(const struct blob_descriptor**)p2; + + return cmp_u64(blob1->offset_in_res, blob2->offset_in_res); +} + +/* Validate the size and location of a WIM resource. */ +static int +validate_resource(struct wim_resource_descriptor *rdesc) +{ + struct blob_descriptor *blob; + bool out_of_order; + u64 expected_next_offset; + int ret; + + /* Verify that the resource itself has a valid offset and size. */ + if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim) + goto invalid_due_to_overflow; + + /* Verify that each blob in the resource has a valid offset and size. + */ + expected_next_offset = 0; + out_of_order = false; + list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) { + if (blob->offset_in_res + blob->size < blob->size || + blob->offset_in_res + blob->size > rdesc->uncompressed_size) + goto invalid_due_to_overflow; + + if (blob->offset_in_res >= expected_next_offset) + expected_next_offset = blob->offset_in_res + blob->size; + else + out_of_order = true; + } + + /* If the blobs were not located at strictly increasing positions (not + * allowing for overlap), sort them. Then make sure that none overlap. + */ + if (out_of_order) { + ret = sort_blob_list(&rdesc->blob_list, + offsetof(struct blob_descriptor, + rdesc_node), + cmp_blobs_by_offset_in_res); + if (ret) + return ret; + + expected_next_offset = 0; + list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) { + if (blob->offset_in_res >= expected_next_offset) + expected_next_offset = blob->offset_in_res + blob->size; + else + goto invalid_due_to_overlap; + } + } + + return 0; + +invalid_due_to_overflow: + ERROR("Invalid blob table (offset overflow)"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; + +invalid_due_to_overlap: + ERROR("Invalid blob table (blobs in solid resource overlap)"); + return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; +} + +static int +finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs) +{ + int ret = 0; + for (size_t i = 0; i < num_rdescs; i++) { + ret = validate_resource(rdescs[i]); + if (ret) + break; + } + free_solid_rdescs(rdescs, num_rdescs); + return ret; +} + +/* + * read_blob_table() - + * + * Read the blob table from a WIM file. Usually, each entry in this table + * describes a "blob", or equivalently a "resource", that the WIM file contains, + * along with its location and SHA-1 message digest. Descriptors for + * non-metadata blobs will be saved in the in-memory blob table + * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a + * special location per-image (the wim->image_metadata array). + * + * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple + * blobs that are compressed together. Such a resource is called a "solid + * resource". Solid resources are still described in the on-disk "blob table", + * although the format is not the most logical. A consecutive sequence of + * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run". + * A solid run describes a set of solid resources, each of which contains a set + * of blobs. In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size + * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource, + * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid + * resource. There are some oddities in how we need to determine which solid + * resource a blob is actually in; see the code for details. + * + * Possible return values: + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY + * WIMLIB_ERR_NOMEM + * + * Or an error code caused by failure to read the blob table from the WIM + * file. + */ +int +read_blob_table(WIMStruct *wim) +{ + int ret; + size_t num_entries; + void *buf = NULL; + struct blob_table *table = NULL; + struct blob_descriptor *cur_blob = NULL; + size_t num_duplicate_blobs = 0; + size_t num_wrong_part_blobs = 0; + u32 image_index = 0; + struct wim_resource_descriptor **cur_solid_rdescs = NULL; + size_t cur_num_solid_rdescs = 0; + + DEBUG("Reading blob table."); + + /* Calculate the number of entries in the blob table. */ + num_entries = wim->hdr.blob_table_reshdr.uncompressed_size / + sizeof(struct blob_descriptor_disk); + + /* Read the blob table into a buffer. */ + ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf); + if (ret) + goto out; + + /* Allocate a hash table to map SHA-1 message digests into blob + * descriptors. This is the in-memory "blob table". */ + table = new_blob_table(num_entries * 2 + 1); + if (!table) + goto oom; + + /* Allocate and initalize blob descriptors from the raw blob table + * buffer. */ + for (size_t i = 0; i < num_entries; i++) { + const struct blob_descriptor_disk *disk_entry = + &((const struct blob_descriptor_disk*)buf)[i]; + struct wim_reshdr reshdr; + u16 part_number; + + /* Get the resource header */ + get_wim_reshdr(&disk_entry->reshdr, &reshdr); + + DEBUG("reshdr: size_in_wim=%"PRIu64", " + "uncompressed_size=%"PRIu64", " + "offset_in_wim=%"PRIu64", " + "flags=0x%02x", + reshdr.size_in_wim, reshdr.uncompressed_size, + reshdr.offset_in_wim, reshdr.flags); + + /* Ignore SOLID flag if it isn't supposed to be used in this WIM + * version. */ + if (wim->hdr.wim_version == WIM_VERSION_DEFAULT) + reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID; + + /* Allocate a new 'struct blob_descriptor'. */ + cur_blob = new_blob_descriptor(); + if (!cur_blob) + goto oom; + + /* Get the part number, reference count, and hash. */ + part_number = le16_to_cpu(disk_entry->part_number); + cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt); + copy_hash(cur_blob->hash, disk_entry->hash); + + if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) { + + /* SOLID entry */ + + if (!cur_solid_rdescs) { + /* Starting new run */ + ret = load_solid_info(wim, disk_entry, + num_entries - i, + &cur_solid_rdescs, + &cur_num_solid_rdescs); + if (ret) + goto out; + } + + if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) { + /* Resource entry, not blob entry */ + goto free_cur_blob_and_continue; + } + + /* Blob entry */ + + ret = assign_blob_to_solid_resource(&reshdr, + cur_blob, + cur_solid_rdescs, + cur_num_solid_rdescs); + if (ret) + goto out; + + } else { + /* Normal blob/resource entry; SOLID not set. */ + + struct wim_resource_descriptor *rdesc; + + if (unlikely(cur_solid_rdescs)) { + /* This entry terminated a solid run. */ + ret = finish_solid_rdescs(cur_solid_rdescs, + cur_num_solid_rdescs); + cur_solid_rdescs = NULL; + if (ret) + goto out; + } + + /* How to handle an uncompressed resource with its + * uncompressed size different from its compressed size? + * + * Based on a simple test, WIMGAPI seems to handle this + * as follows: + * + * if (size_in_wim > uncompressed_size) { + * Ignore uncompressed_size; use size_in_wim + * instead. + * } else { + * Honor uncompressed_size, but treat the part of + * the file data above size_in_wim as all zeros. + * } + * + * So we will do the same. */ + if (unlikely(!(reshdr.flags & + WIM_RESHDR_FLAG_COMPRESSED) && + (reshdr.size_in_wim > + reshdr.uncompressed_size))) + { + reshdr.uncompressed_size = reshdr.size_in_wim; + } + + /* Set up a resource descriptor for this blob. */ + + rdesc = MALLOC(sizeof(struct wim_resource_descriptor)); + if (!rdesc) + goto oom; + + wim_res_hdr_to_desc(&reshdr, wim, rdesc); + + cur_blob->offset_in_res = 0; + cur_blob->size = reshdr.uncompressed_size; + cur_blob->flags = reshdr.flags; + + blob_set_is_located_in_wim_resource(cur_blob, rdesc); + } + + /* cur_blob is now a blob bound to a resource. */ + + /* Ignore entries with all zeroes in the hash field. */ + if (is_zero_hash(cur_blob->hash)) + goto free_cur_blob_and_continue; + + /* Verify that the part number matches that of the underlying + * WIM file. */ + if (part_number != wim->hdr.part_number) { + num_wrong_part_blobs++; + goto free_cur_blob_and_continue; + } + + if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) { + + /* Blob table entry for a metadata resource. */ + + /* Metadata entries with no references must be ignored. + * See, for example, the WinPE WIMs from the WAIK v2.1. + */ + if (cur_blob->refcnt == 0) + goto free_cur_blob_and_continue; + + if (cur_blob->refcnt != 1) { + /* We don't currently support this case due to + * the complications of multiple images sharing + * the same metadata resource or a metadata + * resource also being referenced by files. */ + ERROR("Found metadata resource with refcnt != 1"); + ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; + goto out; + } + + if (wim->hdr.part_number != 1) { + WARNING("Ignoring metadata resource found in a " + "non-first part of the split WIM"); + goto free_cur_blob_and_continue; + } + + /* The number of entries in the blob table with + * WIM_RESHDR_FLAG_METADATA set should be the same as + * the image_count field in the WIM header. */ + if (image_index == wim->hdr.image_count) { + WARNING("Found more metadata resources than images"); + goto free_cur_blob_and_continue; + } + + /* Notice very carefully: We are assigning the metadata + * resources to images in the same order in which their + * blob table entries occur on disk. (This is also the + * behavior of Microsoft's software.) In particular, + * this overrides the actual locations of the metadata + * resources themselves in the WIM file as well as any + * information written in the XML data. */ + DEBUG("Found metadata resource for image %"PRIu32" at " + "offset %"PRIu64".", + image_index + 1, + reshdr.offset_in_wim); + + wim->image_metadata[image_index++]->metadata_blob = cur_blob; + } else { + /* Blob table entry for a non-metadata blob. */ + + /* Ignore this blob if it's a duplicate. */ + if (lookup_blob(table, cur_blob->hash)) { + num_duplicate_blobs++; + goto free_cur_blob_and_continue; + } + + /* Insert the blob into the in-memory blob table, keyed + * by its SHA-1 message digest. */ + blob_table_insert(table, cur_blob); + } + + continue; + + free_cur_blob_and_continue: + if (cur_solid_rdescs && + cur_blob->blob_location == BLOB_IN_WIM) + blob_unset_is_located_in_wim_resource(cur_blob); + free_blob_descriptor(cur_blob); + } + cur_blob = NULL; + + if (cur_solid_rdescs) { + /* End of blob table terminated a solid run. */ + ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs); + cur_solid_rdescs = NULL; + if (ret) + goto out; + } + + if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) { + WARNING("Could not find metadata resources for all images"); + for (u32 i = image_index; i < wim->hdr.image_count; i++) + put_image_metadata(wim->image_metadata[i], NULL); + wim->hdr.image_count = image_index; + } + + if (num_duplicate_blobs > 0) + WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs); + + if (num_wrong_part_blobs > 0) { + WARNING("Ignoring %zu blobs with wrong part number", + num_wrong_part_blobs); + } + + DEBUG("Done reading blob table."); + wim->blob_table = table; + ret = 0; + goto out_free_buf; + +oom: + ERROR("Not enough memory to read blob table!"); + ret = WIMLIB_ERR_NOMEM; +out: + free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs); + free_blob_descriptor(cur_blob); + free_blob_table(table); +out_free_buf: + FREE(buf); + return ret; +} + +static void +write_blob_descriptor(struct blob_descriptor_disk *disk_entry, + const struct wim_reshdr *out_reshdr, + u16 part_number, u32 refcnt, const u8 *hash) +{ + put_wim_reshdr(out_reshdr, &disk_entry->reshdr); + disk_entry->part_number = cpu_to_le16(part_number); + disk_entry->refcnt = cpu_to_le32(refcnt); + copy_hash(disk_entry->hash, hash); +} + +/* Note: the list of blob descriptors must be sorted so that all entries for the + * same solid resource are consecutive. In addition, blob descriptors with + * WIM_RESHDR_FLAG_METADATA set must be in the same order as the indices of the + * underlying images. */ +int +write_blob_table_from_blob_list(struct list_head *blob_list, + struct filedes *out_fd, + u16 part_number, + struct wim_reshdr *out_reshdr, + int write_resource_flags) +{ + size_t table_size; + struct blob_descriptor *blob; + struct blob_descriptor_disk *table_buf; + struct blob_descriptor_disk *table_buf_ptr; + int ret; + u64 prev_res_offset_in_wim = ~0ULL; + u64 prev_uncompressed_size; + u64 logical_offset; + + table_size = 0; + list_for_each_entry(blob, blob_list, blob_table_list) { + table_size += sizeof(struct blob_descriptor_disk); + + if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID && + blob->out_res_offset_in_wim != prev_res_offset_in_wim) + { + table_size += sizeof(struct blob_descriptor_disk); + prev_res_offset_in_wim = blob->out_res_offset_in_wim; + } + } + + DEBUG("Writing WIM blob table (size=%zu, offset=%"PRIu64")", + table_size, out_fd->offset); + + table_buf = MALLOC(table_size); + if (table_buf == NULL) { + ERROR("Failed to allocate %zu bytes for temporary blob table", + table_size); + return WIMLIB_ERR_NOMEM; + } + table_buf_ptr = table_buf; + + prev_res_offset_in_wim = ~0ULL; + prev_uncompressed_size = 0; + logical_offset = 0; + list_for_each_entry(blob, blob_list, blob_table_list) { + if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { + struct wim_reshdr tmp_reshdr; + + /* Eww. When WIMGAPI sees multiple solid resources, it + * expects the offsets to be adjusted as if there were + * really only one solid resource. */ + + if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) { + /* Put the resource entry for solid resource */ + tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim; + tmp_reshdr.size_in_wim = blob->out_res_size_in_wim; + tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER; + tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID; + + write_blob_descriptor(table_buf_ptr++, &tmp_reshdr, + part_number, 1, zero_hash); + + logical_offset += prev_uncompressed_size; + + prev_res_offset_in_wim = blob->out_res_offset_in_wim; + prev_uncompressed_size = blob->out_res_uncompressed_size; + } + tmp_reshdr = blob->out_reshdr; + tmp_reshdr.offset_in_wim += logical_offset; + write_blob_descriptor(table_buf_ptr++, &tmp_reshdr, + part_number, blob->out_refcnt, blob->hash); + } else { + write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr, + part_number, blob->out_refcnt, blob->hash); + } + + } + wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size); + + /* Write the blob table uncompressed. Although wimlib can handle a + * compressed blob table, MS software cannot. */ + ret = write_wim_resource_from_buffer(table_buf, + table_size, + WIM_RESHDR_FLAG_METADATA, + out_fd, + WIMLIB_COMPRESSION_TYPE_NONE, + 0, + out_reshdr, + NULL, + write_resource_flags); + FREE(table_buf); + DEBUG("ret=%d", ret); + return ret; +} + +/* Allocate a blob descriptor for the contents of the buffer, or re-use an + * existing descriptor in @blob_table for an identical blob. */ +struct blob_descriptor * +new_blob_from_data_buffer(const void *buffer, size_t size, + struct blob_table *blob_table) +{ + u8 hash[SHA1_HASH_SIZE]; + struct blob_descriptor *blob, *existing_blob; + + sha1_buffer(buffer, size, hash); + existing_blob = lookup_blob(blob_table, hash); + if (existing_blob) { + wimlib_assert(existing_blob->size == size); + blob = existing_blob; + blob->refcnt++; + } else { + void *buffer_copy; + blob = new_blob_descriptor(); + if (blob == NULL) + return NULL; + buffer_copy = memdup(buffer, size); + if (buffer_copy == NULL) { + free_blob_descriptor(blob); + return NULL; + } + blob->blob_location = BLOB_IN_ATTACHED_BUFFER; + blob->attached_buffer = buffer_copy; + blob->size = size; + copy_hash(blob->hash, hash); + blob_table_insert(blob_table, blob); + } + return blob; +} + +/* + * Calculate the SHA-1 message digest of a blob and move its descriptor from the + * list of unhashed blobs to the blob table, possibly joining it with an + * identical blob. + * + * @blob: + * The blob to hash + * @blob_table: + * The blob table in which the blob needs to be indexed + * @blob_ret: + * On success, a pointer to the resulting blob descriptor is written to + * this location. This will be the same as @blob if it was inserted into + * the blob table, or different if a duplicate blob was found. + * + * Returns 0 on success; nonzero if there is an error reading the blob data. + */ +int +hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table, + struct blob_descriptor **blob_ret) +{ + int ret; + struct blob_descriptor *duplicate_blob; + struct blob_descriptor **back_ptr; + + wimlib_assert(blob->unhashed); + + /* back_ptr must be saved because @back_inode and @back_stream_id are in + * union with the SHA-1 message digest and will no longer be valid once + * the SHA-1 has been calculated. */ + back_ptr = retrieve_pointer_to_unhashed_blob(blob); + + ret = sha1_blob(blob); + if (ret) + return ret; + + list_del(&blob->unhashed_list); + blob->unhashed = 0; + + /* Look for a duplicate blob */ + duplicate_blob = lookup_blob(blob_table, blob->hash); + if (duplicate_blob) { + /* We have a duplicate blob. Transfer the reference counts from + * this blob to the duplicate and update the reference to this + * blob (from an stream) to point to the duplicate. The caller + * is responsible for freeing @blob if needed. */ + wimlib_assert(duplicate_blob->size == blob->size); + duplicate_blob->refcnt += blob->refcnt; + blob->refcnt = 0; + *back_ptr = duplicate_blob; + blob = duplicate_blob; + } else { + /* No duplicate blob, so we need to insert this blob into the + * blob table and treat it as a hashed blob. */ + blob_table_insert(blob_table, blob); + } + *blob_ret = blob; + return 0; +} + +void +blob_to_wimlib_resource_entry(const struct blob_descriptor *blob, + struct wimlib_resource_entry *wentry) +{ + memset(wentry, 0, sizeof(*wentry)); + + wentry->uncompressed_size = blob->size; + if (blob->blob_location == BLOB_IN_WIM) { + wentry->part_number = blob->rdesc->wim->hdr.part_number; + if (blob->flags & WIM_RESHDR_FLAG_SOLID) { + wentry->offset = blob->offset_in_res; + } else { + wentry->compressed_size = blob->rdesc->size_in_wim; + wentry->offset = blob->rdesc->offset_in_wim; + } + wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim; + wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim; + wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size; + } + copy_hash(wentry->sha1_hash, blob->hash); + wentry->reference_count = blob->refcnt; + wentry->is_compressed = (blob->flags & WIM_RESHDR_FLAG_COMPRESSED) != 0; + wentry->is_metadata = (blob->flags & WIM_RESHDR_FLAG_METADATA) != 0; + wentry->is_free = (blob->flags & WIM_RESHDR_FLAG_FREE) != 0; + wentry->is_spanned = (blob->flags & WIM_RESHDR_FLAG_SPANNED) != 0; + wentry->packed = (blob->flags & WIM_RESHDR_FLAG_SOLID) != 0; +} + +struct iterate_blob_context { + wimlib_iterate_lookup_table_callback_t cb; + void *user_ctx; +}; + +static int +do_iterate_blob(struct blob_descriptor *blob, void *_ctx) +{ + struct iterate_blob_context *ctx = _ctx; + struct wimlib_resource_entry entry; + + blob_to_wimlib_resource_entry(blob, &entry); + return (*ctx->cb)(&entry, ctx->user_ctx); +} + +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_iterate_lookup_table(WIMStruct *wim, int flags, + wimlib_iterate_lookup_table_callback_t cb, + void *user_ctx) +{ + if (flags != 0) + return WIMLIB_ERR_INVALID_PARAM; + + struct iterate_blob_context ctx = { + .cb = cb, + .user_ctx = user_ctx, + }; + if (wim_has_metadata(wim)) { + int ret; + for (int i = 0; i < wim->hdr.image_count; i++) { + ret = do_iterate_blob(wim->image_metadata[i]->metadata_blob, + &ctx); + if (ret) + return ret; + } + } + return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx); +} diff --git a/src/capture_common.c b/src/capture_common.c index cbc1aa49..0e682619 100644 --- a/src/capture_common.c +++ b/src/capture_common.c @@ -25,10 +25,10 @@ #include +#include "wimlib/blob_table.h" #include "wimlib/capture.h" #include "wimlib/dentry.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/paths.h" #include "wimlib/progress.h" #include "wimlib/textfile.h" @@ -71,12 +71,12 @@ do_capture_progress(struct capture_params *params, int status, /* Successful scan, and visiting inode for the first time */ - /* Tally size of all data streams. */ - const struct wim_lookup_table_entry *lte; - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - lte = inode_stream_lte_resolved(inode, i); - if (lte) - params->progress.scan.num_bytes_scanned += lte->size; + /* Tally size of all streams. */ + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct blob_descriptor *blob = + stream_blob_resolved(&inode->i_streams[i]); + if (blob) + params->progress.scan.num_bytes_scanned += blob->size; } /* Tally the file itself. */ diff --git a/src/delete_image.c b/src/delete_image.c index 442f8b60..8844d6b3 100644 --- a/src/delete_image.c +++ b/src/delete_image.c @@ -38,15 +38,15 @@ delete_wim_image(WIMStruct *wim, int image) int ret; /* Load the metadata for the image to be deleted. This is necessary - * because streams referenced by files in the image need to have their + * because blobs referenced by files in the image need to have their * reference counts decremented. */ ret = select_wim_image(wim, image); if (ret) return ret; /* Release the reference to the image metadata and decrement reference - * counts on the streams referenced by files in the image. */ - put_image_metadata(wim->image_metadata[image - 1], wim->lookup_table); + * counts on the blobs referenced by files in the image. */ + put_image_metadata(wim->image_metadata[image - 1], wim->blob_table); /* Remove the empty slot from the image metadata array. */ memmove(&wim->image_metadata[image - 1], &wim->image_metadata[image], diff --git a/src/dentry.c b/src/dentry.c index 324fbcf8..55da50cf 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014 Eric Biggers + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -72,21 +72,22 @@ * resource for a WIM image. */ struct wim_dentry_on_disk { - /* Length of this directory entry in bytes, not including any alternate - * data stream entries. Should be a multiple of 8 so that the following - * dentry or alternate data stream entry is aligned on an 8-byte - * boundary. (If not, wimlib will round it up.) It must be at least as - * long as the fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), - * plus the lengths of the file name and/or short name if present. + /* Length of this directory entry in bytes, not including any extra + * stream entries. Should be a multiple of 8 so that the following + * dentry or extra stream entry is aligned on an 8-byte boundary. (If + * not, wimlib will round it up.) It must be at least as long as the + * fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), plus the + * lengths of the file name and/or short name if present, plus the size + * of any "extra" data. * - * It is also possible for this field to be 0. This situation, which is - * undocumented, indicates the end of a list of sibling nodes in a - * directory. It also means the real length is 8, because the dentry - * included only the length field, but that takes up 8 bytes. */ + * It is also possible for this field to be 0. This case indicates the + * end of a list of sibling entries in a directory. It also means the + * real length is 8, because the dentry included only the length field, + * but that takes up 8 bytes. */ le64 length; - /* Attributes of the file or directory. This is a bitwise OR of the - * FILE_ATTRIBUTE_* constants and should correspond to the value + /* File attributes for the file or directory. This is a bitwise OR of + * the FILE_ATTRIBUTE_* constants and should correspond to the value * retrieved by GetFileAttributes() on Windows. */ le32 attributes; @@ -114,38 +115,25 @@ struct wim_dentry_on_disk { le64 last_access_time; le64 last_write_time; - /* Vaguely, the SHA-1 message digest ("hash") of the file's contents. - * More specifically, this is for the "unnamed data stream" rather than - * any "alternate data streams". This hash value is used to look up the - * corresponding entry in the WIM's stream lookup table to actually find - * the file contents within the WIM. + /* + * Usually this is the SHA-1 message digest of the file's "contents" + * (the unnamed data stream). * - * If the file has no unnamed data stream (e.g. is a directory), then - * this field will be all zeroes. If the unnamed data stream is empty - * (i.e. an "empty file"), then this field is also expected to be all - * zeroes. (It will be if wimlib created the WIM image, at least; - * otherwise it can't be ruled out that the SHA-1 message digest of 0 - * bytes of data is given explicitly.) + * If the file has FILE_ATTRIBUTE_REPARSE_POINT set, then this is + * instead usually the SHA-1 message digest of the uncompressed reparse + * point data. * - * If the file has reparse data, then this field will instead specify - * the SHA-1 message digest of the reparse data. If it is somehow - * possible for a file to have both an unnamed data stream and reparse - * data, then this is not handled by wimlib. - * - * As a further special case, if this field is all zeroes but there is - * an alternate data stream entry with no name and a nonzero SHA-1 - * message digest field, then that hash must be used instead of this - * one. In fact, when named data streams are present, some versions of - * Windows PE contain a bug where they only look in the alternate data - * stream entries for the unnamed data stream, not here. + * However, there are some special rules that need to be applied to + * interpret this field correctly when extra stream entries are present. + * See the code for details. */ - u8 unnamed_stream_hash[SHA1_HASH_SIZE]; + u8 default_hash[SHA1_HASH_SIZE]; /* The format of the following data is not yet completely known and they * do not correspond to Microsoft's documentation. * * If this directory entry is for a reparse point (has - * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the + * FILE_ATTRIBUTE_REPARSE_POINT set in the 'attributes' field), then the * version of the following fields containing the reparse tag is valid. * Furthermore, the field notated as not_rpfixed, as far as I can tell, * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the @@ -180,9 +168,9 @@ struct wim_dentry_on_disk { } _packed_attribute nonreparse; }; - /* Number of alternate data stream entries that directly follow this - * dentry on-disk. */ - le16 num_alternate_data_streams; + /* Number of extra stream entries that directly follow this dentry + * on-disk. */ + le16 num_extra_streams; /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE * encoded short name (8.3 DOS-compatible name), excluding the null @@ -216,26 +204,36 @@ struct wim_dentry_on_disk { /* u8 tagged_items[] _aligned_attribute(8); */ } _packed_attribute; - /* If num_alternate_data_streams != 0, then there are that many - * alternate data stream entries following the dentry, on an 8-byte - * aligned boundary. They are not counted in the 'length' field of the - * dentry. */ + /* If num_extra_streams != 0, then there are that many extra stream + * entries following the dentry, starting on the next 8-byte aligned + * boundary. They are not counted in the 'length' field of the dentry. + */ -/* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry - * that has names of the specified lengths. (Zero length means the - * corresponding name actually does not exist.) The returned value excludes - * tagged metadata items as well as any alternate data stream entries that may - * need to follow the dentry. */ -static u64 -dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes) -{ - u64 length = sizeof(struct wim_dentry_on_disk); - if (file_name_nbytes) - length += (u32)file_name_nbytes + 2; - if (short_name_nbytes) - length += (u32)short_name_nbytes + 2; - return length; -} +/* On-disk format of an extra stream entry. This represents an extra NTFS-style + * "stream" associated with the file, such as a named data stream. */ +struct wim_extra_stream_entry_on_disk { + + /* Length of this extra stream entry, in bytes. This includes all + * fixed-length fields, plus the name and null terminator if present, + * and any needed padding such that the length is a multiple of 8. */ + le64 length; + + /* Reserved field */ + le64 reserved; + + /* SHA-1 message digest of this stream's uncompressed data, or all + * zeroes if this stream's data is of zero length. */ + u8 hash[SHA1_HASH_SIZE]; + + /* Length of this stream's name, in bytes and excluding the null + * terminator; or 0 if this stream is unnamed. */ + le16 name_nbytes; + + /* Stream name in UTF-16LE. It is @name_nbytes bytes long, excluding + * the null terminator. There is a null terminator character if + * @name_nbytes != 0; i.e., if this stream is named. */ + utf16lechar name[]; +} _packed_attribute; static void do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *file_name, @@ -319,68 +317,50 @@ dentry_set_name(struct wim_dentry *dentry, const tchar *name) return 0; } -/* Return the length, in bytes, required for the specified alternate data stream - * (ADS) entry on-disk. This accounts for the fixed-length portion of the ADS - * entry, the {stream name and its null terminator} if present, and the padding - * after the entry to align the next ADS entry or dentry on an 8-byte boundary - * in the uncompressed metadata resource buffer. */ -static u64 -ads_entry_out_total_length(const struct wim_ads_entry *entry) +/* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry + * that has names of the specified lengths. (Zero length means the + * corresponding name actually does not exist.) The returned value excludes + * tagged metadata items as well as any extra stream entries that may need to + * follow the dentry. */ +static size_t +dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes) { - u64 len = sizeof(struct wim_ads_entry_on_disk); - if (entry->stream_name_nbytes) - len += (u32)entry->stream_name_nbytes + 2; - return (len + 7) & ~7; + size_t length = sizeof(struct wim_dentry_on_disk); + if (file_name_nbytes) + length += (u32)file_name_nbytes + 2; + if (short_name_nbytes) + length += (u32)short_name_nbytes + 2; + return length; } -/* - * Determine whether to include a "dummy" stream when writing a WIM dentry. - * - * Some versions of Microsoft's WIM software (the boot driver(s) in WinPE 3.0, - * for example) contain a bug where they assume the first alternate data stream - * (ADS) entry of a dentry with a nonzero ADS count specifies the unnamed - * stream, even if it has a name and the unnamed stream is already specified in - * the hash field of the dentry itself. - * - * wimlib has to work around this behavior by carefully emulating the behavior - * of (most versions of) ImageX/WIMGAPI, which move the unnamed stream reference - * into the alternate stream entries whenever there are named data streams, even - * though there is already a field in the dentry itself for the unnamed stream - * reference, which then goes to waste. - */ -static bool -inode_needs_dummy_stream(const struct wim_inode *inode) -{ - /* Normal case */ - if (likely(inode->i_num_ads <= 0)) - return false; - - /* Overflow check */ - if (inode->i_num_ads >= 0xFFFF) - return false; - /* Assume the dentry is okay if it already had an unnamed ADS entry when - * it was read in. */ - if (!inode->i_canonical_streams) - return false; +/* Return the length, in bytes, required for the specified stream on-disk, when + * represented as an extra stream entry. */ +static size_t +stream_out_total_length(const struct wim_inode_stream *strm) +{ + /* Account for the fixed length portion */ + size_t len = sizeof(struct wim_extra_stream_entry_on_disk); - /* We can't use use this workaround on encrypted files because WIMGAPI - * reports that the WIM is in an incorrect format. */ - if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) - return false; + /* For named streams, account for the variable-length name. */ + if (stream_is_named(strm)) + len += utf16le_len_bytes(strm->stream_name) + 2; - return true; + /* Account for any necessary padding to the next 8-byte boundary. */ + return (len + 7) & ~7; } -/* Calculate the total number of bytes that will be consumed when a dentry is +/* + * Calculate the total number of bytes that will be consumed when a dentry is * written. This includes the fixed-length portion of the dentry, the name - * fields, any tagged metadata items, and any alternate data stream entries. - * Also includes all alignment bytes. */ -u64 + * fields, any tagged metadata items, and any extra stream entries. This also + * includes all alignment bytes. + */ +size_t dentry_out_total_length(const struct wim_dentry *dentry) { const struct wim_inode *inode = dentry->d_inode; - u64 len; + size_t len; len = dentry_min_len_with_names(dentry->file_name_nbytes, dentry->short_name_nbytes); @@ -391,12 +371,34 @@ dentry_out_total_length(const struct wim_dentry *dentry) len = (len + 7) & ~7; } - if (unlikely(inode->i_num_ads)) { - if (inode_needs_dummy_stream(inode)) - len += ads_entry_out_total_length(&(struct wim_ads_entry){}); + if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) { + /* + * Extra stream entries: + * + * - Use one extra stream entry for each named data stream + * - Use one extra stream entry for the unnamed data stream when there is either: + * - a reparse point stream + * - at least one named data stream (for Windows PE bug workaround) + * - Use one extra stream entry for the reparse point stream if there is one + */ + bool have_named_data_stream = false; + bool have_reparse_point_stream = false; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + if (stream_is_named_data_stream(strm)) { + len += stream_out_total_length(strm); + have_named_data_stream = true; + } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) { + wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT); + have_reparse_point_stream = true; + } + } - for (u16 i = 0; i < inode->i_num_ads; i++) - len += ads_entry_out_total_length(&inode->i_ads_entries[i]); + if (have_named_data_stream || have_reparse_point_stream) { + if (have_reparse_point_stream) + len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7; + len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7; + } } return len; @@ -991,7 +993,6 @@ new_filler_directory(struct wim_dentry **dentry_ret) return ret; /* Leave the inode number as 0; this is allowed for non * hard-linked files. */ - dentry->d_inode->i_resolved = 1; dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY; *dentry_ret = dentry; return 0; @@ -1036,9 +1037,9 @@ do_free_dentry(struct wim_dentry *dentry, void *_ignore) } static int -do_free_dentry_and_unref_streams(struct wim_dentry *dentry, void *lookup_table) +do_free_dentry_and_unref_blobs(struct wim_dentry *dentry, void *blob_table) { - inode_unref_streams(dentry->d_inode, lookup_table); + inode_unref_blobs(dentry->d_inode, blob_table); free_dentry(dentry); return 0; } @@ -1050,10 +1051,10 @@ do_free_dentry_and_unref_streams(struct wim_dentry *dentry, void *lookup_table) * The root of the dentry tree to free. If NULL, this function has no * effect. * - * @lookup_table: - * A pointer to the lookup table for the WIM, or NULL if not specified. If + * @blob_table: + * A pointer to the blob table for the WIM, or NULL if not specified. If * specified, this function will decrement the reference counts of the - * single-instance streams referenced by the dentries. + * blobs referenced by the dentries. * * This function also releases references to the corresponding inodes. * @@ -1062,16 +1063,16 @@ do_free_dentry_and_unref_streams(struct wim_dentry *dentry, void *lookup_table) * function. */ void -free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table) +free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table) { int (*f)(struct wim_dentry *, void *); - if (lookup_table) - f = do_free_dentry_and_unref_streams; + if (blob_table) + f = do_free_dentry_and_unref_blobs; else f = do_free_dentry; - for_dentry_in_tree_depth(root, f, lookup_table); + for_dentry_in_tree_depth(root, f, blob_table); } /* Insert the @child dentry into the case sensitive index of the @dir directory. @@ -1223,8 +1224,182 @@ read_extra_data(const u8 *p, const u8 *end, struct wim_inode *inode) return 0; } -/* Read a dentry, including all alternate data stream entries that follow it, - * from an uncompressed metadata resource buffer. */ +/* + * Set the type of each stream for an encrypted file. + * + * All data streams of the encrypted file should have been packed into a single + * stream in the format provided by ReadEncryptedFileRaw() on Windows. We + * assign this stream type STREAM_TYPE_EFSRPC_RAW_DATA. + * + * Encrypted files can't have a reparse point stream. In the on-disk NTFS + * format they can, but as far as I know the reparse point stream of an + * encrypted file can't be stored in the WIM format in a way that's compatible + * with WIMGAPI, nor is there even any way for it to be read or written on + * Windows when the process does not have access to the file encryption key. + */ +static void +assign_stream_types_encrypted(struct wim_inode *inode) +{ + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct wim_inode_stream *strm = &inode->i_streams[i]; + if (!stream_is_named(strm) && !is_zero_hash(strm->_stream_hash)) + { + strm->stream_type = STREAM_TYPE_EFSRPC_RAW_DATA; + return; + } + } +} + +/* + * Set the type of each stream for an unencrypted file. + * + * There will be an unnamed data stream, a reparse point stream, or both an + * unnamed data stream and a reparse point stream. In addition, there may be + * named data streams. + */ +static void +assign_stream_types_unencrypted(struct wim_inode *inode) +{ + bool found_reparse_point_stream = false; + bool found_unnamed_data_stream = false; + struct wim_inode_stream *unnamed_stream_with_zero_hash = NULL; + + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct wim_inode_stream *strm = &inode->i_streams[i]; + + if (stream_is_named(strm)) { + /* Named data stream */ + strm->stream_type = STREAM_TYPE_DATA; + } else if (!is_zero_hash(strm->_stream_hash)) { + if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) && + !found_reparse_point_stream) { + found_reparse_point_stream = true; + strm->stream_type = STREAM_TYPE_REPARSE_POINT; + } else if (!found_unnamed_data_stream) { + found_unnamed_data_stream = true; + strm->stream_type = STREAM_TYPE_DATA; + } + } else { + /* If no stream name is specified and the hash is zero, + * then remember this stream for later so that we can + * assign it to the unnamed data stream if we don't find + * a better candidate. */ + unnamed_stream_with_zero_hash = strm; + } + } + + if (!found_unnamed_data_stream && unnamed_stream_with_zero_hash != NULL) + unnamed_stream_with_zero_hash->stream_type = STREAM_TYPE_DATA; +} + +/* + * Read and interpret the collection of streams for the specified inode. + */ +static int +setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode, + unsigned num_extra_streams, const u8 *default_hash, + u64 *offset_p) +{ + const u8 *orig_p = p; + + inode->i_num_streams = 1 + num_extra_streams; + + if (likely(inode->i_num_streams <= ARRAY_LEN(inode->i_embedded_streams))) { + inode->i_streams = inode->i_embedded_streams; + } else { + inode->i_streams = CALLOC(inode->i_num_streams, + sizeof(inode->i_streams[0])); + if (!inode->i_streams) + return WIMLIB_ERR_NOMEM; + } + + /* Use the default hash field for the first stream */ + inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME; + copy_hash(inode->i_streams[0]._stream_hash, default_hash); + inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN; + inode->i_streams[0].stream_id = 0; + + /* Read the extra stream entries */ + for (unsigned i = 1; i < inode->i_num_streams; i++) { + struct wim_inode_stream *strm; + const struct wim_extra_stream_entry_on_disk *disk_strm; + u64 length; + u16 name_nbytes; + + strm = &inode->i_streams[i]; + + strm->stream_id = i; + + /* Do we have at least the size of the fixed-length data we know + * need? */ + if ((end - p) < sizeof(struct wim_extra_stream_entry_on_disk)) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + + disk_strm = (const struct wim_extra_stream_entry_on_disk *)p; + + /* Read the length field */ + length = le64_to_cpu(disk_strm->length); + + /* 8-byte align the length */ + length = (length + 7) & ~7; + + /* Make sure the length field is neither so small it doesn't + * include all the fixed-length data nor so large it overflows + * the metadata resource buffer. */ + if (length < sizeof(struct wim_extra_stream_entry_on_disk) || + length > (end - p)) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + + /* Read the rest of the fixed-length data. */ + + copy_hash(strm->_stream_hash, disk_strm->hash); + name_nbytes = le16_to_cpu(disk_strm->name_nbytes); + + /* If stream_name_nbytes != 0, the stream is named. */ + if (name_nbytes != 0) { + /* The name is encoded in UTF16-LE, which uses 2-byte + * coding units, so the length of the name had better be + * an even number of bytes. */ + if (name_nbytes & 1) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + + /* Add the length of the stream name to get the length + * we actually need to read. Make sure this isn't more + * than the specified length of the entry. */ + if (sizeof(struct wim_extra_stream_entry_on_disk) + + name_nbytes > length) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + + strm->stream_name = utf16le_dupz(disk_strm->name, + name_nbytes); + if (!strm->stream_name) + return WIMLIB_ERR_NOMEM; + } else { + strm->stream_name = (utf16lechar *)NO_STREAM_NAME; + } + + strm->stream_type = STREAM_TYPE_UNKNOWN; + + p += length; + } + + inode->i_next_stream_id = inode->i_num_streams; + + /* Now, assign a type to each stream. Unfortunately this requires + * various hacks because stream types aren't explicitly provided in the + * WIM on-disk format. */ + + if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) + assign_stream_types_encrypted(inode); + else + assign_stream_types_unencrypted(inode); + + *offset_p += p - orig_p; + return 0; +} + +/* Read a dentry, including all extra stream entries that follow it, from an + * uncompressed metadata resource buffer. */ static int read_dentry(const u8 * restrict buf, size_t buf_len, u64 *offset_p, struct wim_dentry **dentry_ret) @@ -1249,19 +1424,14 @@ read_dentry(const u8 * restrict buf, size_t buf_len, /* Check for buffer overrun. */ if (unlikely(offset + sizeof(u64) > buf_len || offset + sizeof(u64) < offset)) - { - ERROR("Directory entry starting at %"PRIu64" ends past the " - "end of the metadata resource (size %zu)", - offset, buf_len); return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - } /* Get pointer to the dentry data. */ p = &buf[offset]; disk_dentry = (const struct wim_dentry_on_disk*)p; /* Get dentry length. */ - length = le64_to_cpu(disk_dentry->length); + length = (le64_to_cpu(disk_dentry->length) + 7) & ~7; /* Check for end-of-directory. */ if (length <= 8) { @@ -1270,21 +1440,13 @@ read_dentry(const u8 * restrict buf, size_t buf_len, } /* Validate dentry length. */ - if (unlikely(length < sizeof(struct wim_dentry_on_disk))) { - ERROR("Directory entry has invalid length of %"PRIu64" bytes", - length); + if (unlikely(length < sizeof(struct wim_dentry_on_disk))) return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - } /* Check for buffer overrun. */ if (unlikely(offset + length > buf_len || offset + length < offset)) - { - ERROR("Directory entry at offset %"PRIu64" and with size " - "%"PRIu64" ends past the end of the metadata resource " - "(size %zu)", offset, length, buf_len); return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - } /* Allocate new dentry structure, along with a preliminary inode. */ ret = new_dentry_with_timeless_inode(NULL, &dentry); @@ -1300,7 +1462,6 @@ read_dentry(const u8 * restrict buf, size_t buf_len, inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time); inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time); inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time); - copy_hash(inode->i_hash, disk_dentry->unnamed_stream_hash); /* I don't know what's going on here. It seems like M$ screwed up the * reparse points, then put the fields in the same place and didn't @@ -1311,15 +1472,12 @@ read_dentry(const u8 * restrict buf, size_t buf_len, inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag); inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2); inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed); - /* Leave inode->i_ino at 0. Note that this means the WIM file - * cannot archive hard-linked reparse points. Such a thing - * doesn't really make sense anyway, although I believe it's - * theoretically possible to have them on NTFS. */ + /* Leave inode->i_ino at 0. Note: this means that WIM cannot + * represent multiple hard links to a reparse point file. */ } else { inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1); inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id); } - inode->i_num_ads = le16_to_cpu(disk_dentry->num_alternate_data_streams); /* Now onto reading the names. There are two of them: the (long) file * name, and the short name. */ @@ -1328,24 +1486,16 @@ read_dentry(const u8 * restrict buf, size_t buf_len, file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes); if (unlikely((short_name_nbytes & 1) | (file_name_nbytes & 1))) { - ERROR("Dentry name is not valid UTF-16 (odd number of bytes)!"); ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; goto err_free_dentry; } /* We now know the length of the file name and short name. Make sure - * the length of the dentry is large enough to actually hold them. - * - * The calculated length here is unaligned to allow for the possibility - * that the dentry's length is unaligned, although this would be - * unexpected. */ + * the length of the dentry is large enough to actually hold them. */ calculated_size = dentry_min_len_with_names(file_name_nbytes, short_name_nbytes); if (unlikely(length < calculated_size)) { - ERROR("Unexpected end of directory entry! (Expected " - "at least %"PRIu64" bytes, got %"PRIu64" bytes.)", - calculated_size, length); ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; goto err_free_dentry; } @@ -1377,39 +1527,23 @@ read_dentry(const u8 * restrict buf, size_t buf_len, p += (u32)short_name_nbytes + 2; } - /* Read extra data at end of dentry (but before alternate data stream - * entries). This may contain tagged items. */ + /* Read extra data at end of dentry (but before extra stream entries). + * This may contain tagged metadata items. */ ret = read_extra_data(p, &buf[offset + length], inode); if (ret) goto err_free_dentry; - /* Align the dentry length. */ - length = (length + 7) & ~7; - offset += length; - /* Read the alternate data streams, if present. inode->i_num_ads tells - * us how many they are, and they will directly follow the dentry in the - * metadata resource buffer. - * - * Note that each alternate data stream entry begins on an 8-byte - * aligned boundary, and the alternate data stream entries seem to NOT - * be included in the dentry->length field for some reason. */ - if (unlikely(inode->i_num_ads != 0)) { - size_t orig_bytes_remaining; - size_t bytes_remaining; - - if (offset > buf_len) { - ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; - goto err_free_dentry; - } - bytes_remaining = buf_len - offset; - orig_bytes_remaining = bytes_remaining; - ret = read_ads_entries(&buf[offset], inode, &bytes_remaining); - if (ret) - goto err_free_dentry; - offset += (orig_bytes_remaining - bytes_remaining); - } + /* Set up the inode's collection of streams. */ + ret = setup_inode_streams(&buf[offset], + &buf[buf_len], + inode, + le16_to_cpu(disk_dentry->num_extra_streams), + disk_dentry->default_hash, + &offset); + if (ret) + goto err_free_dentry; *offset_p = offset; /* Sets offset of next dentry in directory */ *dentry_ret = dentry; @@ -1585,49 +1719,37 @@ err_free_dentry_tree: return ret; } -/* - * Write a WIM alternate data stream (ADS) entry to an output buffer. - * - * @ads_entry: - * The ADS entry to write. - * - * @hash: - * The hash field to use (instead of the one stored directly in the ADS - * entry, which isn't valid if the inode has been "resolved"). - * - * @p: - * The memory location to which to write the data. - * - * Returns a pointer to the byte after the last byte written. - */ static u8 * -write_ads_entry(const struct wim_ads_entry *ads_entry, - const u8 *hash, u8 * restrict p) +write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name, + const u8 * restrict hash) { - struct wim_ads_entry_on_disk *disk_ads_entry = - (struct wim_ads_entry_on_disk*)p; + struct wim_extra_stream_entry_on_disk *disk_strm = + (struct wim_extra_stream_entry_on_disk *)p; u8 *orig_p = p; + size_t name_nbytes; - disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved); - copy_hash(disk_ads_entry->hash, hash); - disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes); - p += sizeof(struct wim_ads_entry_on_disk); - if (ads_entry->stream_name_nbytes) { - p = mempcpy(p, ads_entry->stream_name, - (u32)ads_entry->stream_name_nbytes + 2); - } + if (name == NO_STREAM_NAME) + name_nbytes = 0; + else + name_nbytes = utf16le_len_bytes(name); + + disk_strm->reserved = 0; + copy_hash(disk_strm->hash, hash); + disk_strm->name_nbytes = cpu_to_le16(name_nbytes); + p += sizeof(struct wim_extra_stream_entry_on_disk); + if (name_nbytes != 0) + p = mempcpy(p, name, name_nbytes + 2); /* Align to 8-byte boundary */ while ((uintptr_t)p & 7) *p++ = 0; - disk_ads_entry->length = cpu_to_le64(p - orig_p); + disk_strm->length = cpu_to_le64(p - orig_p); return p; } /* * Write a WIM dentry to an output buffer. * - * This includes any alternate data stream entries that may follow the dentry - * itself. + * This includes any extra stream entries that may follow the dentry itself. * * @dentry: * The dentry to write. @@ -1643,15 +1765,11 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) const struct wim_inode *inode; struct wim_dentry_on_disk *disk_dentry; const u8 *orig_p; - const u8 *hash; - bool use_dummy_stream; - u16 num_ads; wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */ orig_p = p; inode = dentry->d_inode; - use_dummy_stream = inode_needs_dummy_stream(inode); disk_dentry = (struct wim_dentry_on_disk*)p; disk_dentry->attributes = cpu_to_le32(inode->i_attributes); @@ -1664,11 +1782,6 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time); disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time); disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time); - if (use_dummy_stream) - hash = zero_hash; - else - hash = inode_stream_hash(inode, 0); - copy_hash(disk_dentry->unnamed_stream_hash, hash); if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1); disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag); @@ -1679,10 +1792,7 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) disk_dentry->nonreparse.hard_link_group_id = cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino); } - num_ads = inode->i_num_ads; - if (use_dummy_stream) - num_ads++; - disk_dentry->num_alternate_data_streams = cpu_to_le16(num_ads); + disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes); disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes); p += sizeof(struct wim_dentry_on_disk); @@ -1702,21 +1812,79 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) if (inode->i_extra_size) { /* Extra tagged items --- not usually present. */ p = mempcpy(p, inode->i_extra, inode->i_extra_size); + + /* Align to 8-byte boundary */ while ((uintptr_t)p & 7) *p++ = 0; } disk_dentry->length = cpu_to_le64(p - orig_p); - if (use_dummy_stream) { - hash = inode_unnamed_stream_hash(inode); - p = write_ads_entry(&(struct wim_ads_entry){}, hash, p); - } + /* Streams */ + + if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) { + const struct wim_inode_stream *efs_strm; + const u8 *efs_hash; + + efs_strm = inode_get_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA, + NO_STREAM_NAME); + efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash; + copy_hash(disk_dentry->default_hash, efs_hash); + disk_dentry->num_extra_streams = cpu_to_le16(0); + } else { + /* + * Extra stream entries: + * + * - Use one extra stream entry for each named data stream + * - Use one extra stream entry for the unnamed data stream when there is either: + * - a reparse point stream + * - at least one named data stream (for Windows PE bug workaround) + * - Use one extra stream entry for the reparse point stream if there is one + */ + bool have_named_data_stream = false; + bool have_reparse_point_stream = false; + u16 num_extra_streams = 0; + const u8 *unnamed_data_stream_hash = zero_hash; + const u8 *reparse_point_hash; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + if (strm->stream_type == STREAM_TYPE_DATA) { + if (stream_is_named(strm)) + have_named_data_stream = true; + else + unnamed_data_stream_hash = stream_hash(strm); + } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) { + have_reparse_point_stream = true; + reparse_point_hash = stream_hash(strm); + } + } - /* Write the alternate data streams entries, if any. */ - for (u16 i = 0; i < inode->i_num_ads; i++) { - hash = inode_stream_hash(inode, i + 1); - p = write_ads_entry(&inode->i_ads_entries[i], hash, p); + if (have_reparse_point_stream || have_named_data_stream) { + + copy_hash(disk_dentry->default_hash, zero_hash); + + if (have_reparse_point_stream) { + p = write_extra_stream_entry(p, NO_STREAM_NAME, + reparse_point_hash); + num_extra_streams++; + } + + p = write_extra_stream_entry(p, NO_STREAM_NAME, + unnamed_data_stream_hash); + num_extra_streams++; + } else { + copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash); + } + + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + if (stream_is_named_data_stream(strm)) { + p = write_extra_stream_entry(p, strm->stream_name, + stream_hash(strm)); + num_extra_streams++; + } + } + disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams); } return p; diff --git a/src/encoding.c b/src/encoding.c index 5819f0ef..5ff1df23 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -136,7 +136,7 @@ varname1##_to_##varname2##_nbytes(const chartype1 *in, size_t in_nbytes,\ bool buf_onheap; \ bufsize = (worst_case_len_expr) * sizeof(chartype2); \ /* Worst case length */ \ - if (bufsize <= STACK_MAX) { \ + if (bufsize <= STACK_MAX) { \ buf = alloca(bufsize); \ buf_onheap = false; \ } else { \ @@ -558,7 +558,31 @@ cmp_utf16le_strings(const utf16lechar *s1, size_t n1, return (n1 < n2) ? -1 : 1; } -/* Duplicate a UTF16-LE string. The input string might not be null terminated +/* Like cmp_utf16le_strings(), but assumes the strings are null terminated. */ +int +cmp_utf16le_strings_z(const utf16lechar *s1, const utf16lechar *s2, + bool ignore_case) +{ + if (ignore_case) { + for (;;) { + u16 c1 = upcase[le16_to_cpu(*s1)]; + u16 c2 = upcase[le16_to_cpu(*s2)]; + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + if (c1 == 0) + return 0; + s1++, s2++; + } + } else { + while (*s1 && *s1 == *s2) + s1++, s2++; + if (*s1 == *s2) + return 0; + return (le16_to_cpu(*s1) < le16_to_cpu(*s2)) ? -1 : 1; + } +} + +/* Duplicate a UTF-16LE string. The input string might not be null terminated * and might be misaligned, but the returned string is guaranteed to be null * terminated and properly aligned. */ utf16lechar * @@ -571,3 +595,32 @@ utf16le_dupz(const void *ustr, size_t usize) } return dup; } + +/* Duplicate a null-terminated UTF-16LE string. */ +utf16lechar * +utf16le_dup(const utf16lechar *ustr) +{ + const utf16lechar *p = ustr; + while (*p++) + ; + return memdup(ustr, (const u8 *)p - (const u8 *)ustr); +} + +/* Return the length, in bytes, of a UTF-null terminated UTF-16 string, + * excluding the null terminator. */ +size_t +utf16le_len_bytes(const utf16lechar *s) +{ + const utf16lechar *p = s; + while (*p) + p++; + return (p - s) * sizeof(utf16lechar); +} + +/* Return the length, in UTF-16 coding units, of a UTF-null terminated UTF-16 + * string, excluding the null terminator. */ +size_t +utf16le_len_chars(const utf16lechar *s) +{ + return utf16le_len_bytes(s) / sizeof(utf16lechar); +} diff --git a/src/export_image.c b/src/export_image.c index bfc36bde..8d733284 100644 --- a/src/export_image.c +++ b/src/export_image.c @@ -24,84 +24,83 @@ #endif #include "wimlib.h" +#include "wimlib/blob_table.h" #include "wimlib/error.h" #include "wimlib/inode.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/xml.h" static int -lte_set_not_exported(struct wim_lookup_table_entry *lte, void *_ignore) +blob_set_not_exported(struct blob_descriptor *blob, void *_ignore) { - lte->out_refcnt = 0; - lte->was_exported = 0; + blob->out_refcnt = 0; + blob->was_exported = 0; return 0; } static int -lte_rollback_export(struct wim_lookup_table_entry *lte, void *_lookup_table) +blob_rollback_export(struct blob_descriptor *blob, void *_blob_table) { - struct wim_lookup_table *lookup_table = _lookup_table; + struct blob_table *blob_table = _blob_table; - lte->refcnt -= lte->out_refcnt; - if (lte->was_exported) { - lookup_table_unlink(lookup_table, lte); - free_lookup_table_entry(lte); + blob->refcnt -= blob->out_refcnt; + if (blob->was_exported) { + blob_table_unlink(blob_table, blob); + free_blob_descriptor(blob); } return 0; } static int -inode_export_streams(struct wim_inode *inode, - struct wim_lookup_table *src_lookup_table, - struct wim_lookup_table *dest_lookup_table, - bool gift) +inode_export_blobs(struct wim_inode *inode, struct blob_table *src_blob_table, + struct blob_table *dest_blob_table, bool gift) { unsigned i; const u8 *hash; - struct wim_lookup_table_entry *src_lte, *dest_lte; + struct blob_descriptor *src_blob, *dest_blob; inode_unresolve_streams(inode); - for (i = 0; i <= inode->i_num_ads; i++) { - /* Retrieve SHA1 message digest of stream to export. */ - hash = inode_stream_hash(inode, i); - if (is_zero_hash(hash)) /* Empty stream? */ + for (i = 0; i < inode->i_num_streams; i++) { + + /* Retrieve SHA-1 message digest of blob to export. */ + hash = stream_hash(&inode->i_streams[i]); + if (is_zero_hash(hash)) /* Empty blob? */ continue; - /* Search for the stream (via SHA1 message digest) in the + /* Search for the blob (via SHA-1 message digest) in the * destination WIM. */ - dest_lte = lookup_stream(dest_lookup_table, hash); - if (!dest_lte) { - /* Stream not yet present in destination WIM. Search - * for it in the source WIM, then export it into the + dest_blob = lookup_blob(dest_blob_table, hash); + if (!dest_blob) { + /* Blob not yet present in destination WIM. Search for + * it in the source WIM, then export it into the * destination WIM. */ - src_lte = lookup_stream(src_lookup_table, hash); - if (!src_lte) - return stream_not_found_error(inode, hash); + src_blob = lookup_blob(src_blob_table, hash); + if (!src_blob) + return blob_not_found_error(inode, hash); if (gift) { - dest_lte = src_lte; - lookup_table_unlink(src_lookup_table, src_lte); + dest_blob = src_blob; + blob_table_unlink(src_blob_table, src_blob); } else { - dest_lte = clone_lookup_table_entry(src_lte); - if (!dest_lte) + dest_blob = clone_blob_descriptor(src_blob); + if (!dest_blob) return WIMLIB_ERR_NOMEM; } - dest_lte->refcnt = 0; - dest_lte->out_refcnt = 0; - dest_lte->was_exported = 1; - lookup_table_insert(dest_lookup_table, dest_lte); + dest_blob->refcnt = 0; + dest_blob->out_refcnt = 0; + dest_blob->was_exported = 1; + blob_table_insert(dest_blob_table, dest_blob); } - /* Stream is present in destination WIM (either pre-existing, + /* Blob is present in destination WIM (either pre-existing, * already exported, or just exported above). Increment its * reference count appropriately. Note: we use 'refcnt' for * the raw reference count, but 'out_refcnt' for references * arising just from the export operation; this is used to roll * back a failed export if needed. */ - dest_lte->refcnt += inode->i_nlink; - dest_lte->out_refcnt += inode->i_nlink; + dest_blob->refcnt += inode->i_nlink; + dest_blob->out_refcnt += inode->i_nlink; } return 0; } @@ -155,16 +154,16 @@ wimlib_export_image(WIMStruct *src_wim, } orig_dest_image_count = dest_wim->hdr.image_count; - /* Stream checksums must be known before proceeding. */ - ret = wim_checksum_unhashed_streams(src_wim); + /* Blob checksums must be known before proceeding. */ + ret = wim_checksum_unhashed_blobs(src_wim); if (ret) return ret; - ret = wim_checksum_unhashed_streams(dest_wim); + ret = wim_checksum_unhashed_blobs(dest_wim); if (ret) return ret; /* Enable rollbacks */ - for_lookup_table_entry(dest_wim->lookup_table, lte_set_not_exported, NULL); + for_blob_in_table(dest_wim->blob_table, blob_set_not_exported, NULL); /* Export each requested image. */ for (src_image = start_src_image; @@ -207,12 +206,12 @@ wimlib_export_image(WIMStruct *src_wim, src_imd = wim_get_current_image_metadata(src_wim); /* Iterate through inodes in the source image and export their - * streams into the destination WIM. */ + * blobs into the destination WIM. */ image_for_each_inode(inode, src_imd) { - ret = inode_export_streams(inode, - src_wim->lookup_table, - dest_wim->lookup_table, - export_flags & WIMLIB_EXPORT_FLAG_GIFT); + ret = inode_export_blobs(inode, + src_wim->blob_table, + dest_wim->blob_table, + export_flags & WIMLIB_EXPORT_FLAG_GIFT); if (ret) goto out_rollback; } @@ -259,8 +258,8 @@ wimlib_export_image(WIMStruct *src_wim, } if (export_flags & WIMLIB_EXPORT_FLAG_GIFT) { - free_lookup_table(src_wim->lookup_table); - src_wim->lookup_table = NULL; + free_blob_table(src_wim->blob_table); + src_wim->blob_table = NULL; } return 0; @@ -275,7 +274,7 @@ out_rollback: put_image_metadata(dest_wim->image_metadata[ --dest_wim->hdr.image_count], NULL); } - for_lookup_table_entry(dest_wim->lookup_table, lte_rollback_export, - dest_wim->lookup_table); + for_blob_in_table(dest_wim->blob_table, blob_rollback_export, + dest_wim->blob_table); return ret; } diff --git a/src/extract.c b/src/extract.c index a5be7d1b..5b1c1cc6 100644 --- a/src/extract.c +++ b/src/extract.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014 Eric Biggers + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -46,11 +46,11 @@ #include "wimlib/apply.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/pathlist.h" #include "wimlib/paths.h" @@ -136,83 +136,55 @@ end_file_metadata_phase(struct apply_ctx *ctx) return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA); } -/* Check whether the extraction of a dentry should be skipped completely. */ -static bool -dentry_is_supported(struct wim_dentry *dentry, - const struct wim_features *supported_features) -{ - struct wim_inode *inode = dentry->d_inode; - - if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { - if (!(supported_features->reparse_points || - (inode_is_symlink(inode) && - supported_features->symlink_reparse_points))) - return false; - } - - if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) { - if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) { - if (!supported_features->encrypted_directories) - return false; - } else { - if (!supported_features->encrypted_files) - return false; - } - } - - return true; -} - - #define PWM_ALLOW_WIM_HDR 0x00001 -/* Read the header from a stream in a pipable WIM. */ +/* Read the header for a blob in a pipable WIM. */ static int -read_pwm_stream_header(WIMStruct *pwm, struct wim_lookup_table_entry *lte, - struct wim_resource_spec *rspec, - int flags, struct wim_header_disk *hdr_ret) +read_pwm_blob_header(WIMStruct *pwm, struct blob_descriptor *blob, + struct wim_resource_descriptor *rdesc, + int flags, struct wim_header_disk *hdr_ret) { union { - struct pwm_stream_hdr stream_hdr; + struct pwm_blob_hdr blob_hdr; struct wim_header_disk pwm_hdr; } buf; struct wim_reshdr reshdr; int ret; - ret = full_read(&pwm->in_fd, &buf.stream_hdr, sizeof(buf.stream_hdr)); + ret = full_read(&pwm->in_fd, &buf.blob_hdr, sizeof(buf.blob_hdr)); if (ret) goto read_error; if ((flags & PWM_ALLOW_WIM_HDR) && - le64_to_cpu(buf.stream_hdr.magic) == PWM_MAGIC) + le64_to_cpu(buf.blob_hdr.magic) == PWM_MAGIC) { - BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.stream_hdr)); - ret = full_read(&pwm->in_fd, &buf.stream_hdr + 1, - sizeof(buf.pwm_hdr) - sizeof(buf.stream_hdr)); + BUILD_BUG_ON(sizeof(buf.pwm_hdr) < sizeof(buf.blob_hdr)); + ret = full_read(&pwm->in_fd, &buf.blob_hdr + 1, + sizeof(buf.pwm_hdr) - sizeof(buf.blob_hdr)); if (ret) goto read_error; - lte->resource_location = RESOURCE_NONEXISTENT; + blob->blob_location = BLOB_NONEXISTENT; memcpy(hdr_ret, &buf.pwm_hdr, sizeof(buf.pwm_hdr)); return 0; } - if (le64_to_cpu(buf.stream_hdr.magic) != PWM_STREAM_MAGIC) { - ERROR("Data read on pipe is invalid (expected stream header)."); + if (le64_to_cpu(buf.blob_hdr.magic) != PWM_BLOB_MAGIC) { + ERROR("Data read on pipe is invalid (expected blob header)."); return WIMLIB_ERR_INVALID_PIPABLE_WIM; } - copy_hash(lte->hash, buf.stream_hdr.hash); + copy_hash(blob->hash, buf.blob_hdr.hash); reshdr.size_in_wim = 0; - reshdr.flags = le32_to_cpu(buf.stream_hdr.flags); + reshdr.flags = le32_to_cpu(buf.blob_hdr.flags); reshdr.offset_in_wim = pwm->in_fd.offset; - reshdr.uncompressed_size = le64_to_cpu(buf.stream_hdr.uncompressed_size); - wim_res_hdr_to_spec(&reshdr, pwm, rspec); - lte_bind_wim_resource_spec(lte, rspec); - lte->flags = rspec->flags; - lte->size = rspec->uncompressed_size; - lte->offset_in_res = 0; + reshdr.uncompressed_size = le64_to_cpu(buf.blob_hdr.uncompressed_size); + wim_res_hdr_to_desc(&reshdr, pwm, rdesc); + blob_set_is_located_in_wim_resource(blob, rdesc); + blob->flags = rdesc->flags; + blob->size = rdesc->uncompressed_size; + blob->offset_in_res = 0; return 0; read_error: @@ -221,24 +193,24 @@ read_error: } static int -load_streams_from_pipe(struct apply_ctx *ctx, - const struct read_stream_list_callbacks *cbs) +read_blobs_from_pipe(struct apply_ctx *ctx, + const struct read_blob_list_callbacks *cbs) { - struct wim_lookup_table_entry *found_lte = NULL; - struct wim_resource_spec *rspec = NULL; - struct wim_lookup_table *lookup_table; + struct blob_descriptor *found_blob = NULL; + struct wim_resource_descriptor *rdesc = NULL; + struct blob_table *blob_table; int ret; ret = WIMLIB_ERR_NOMEM; - found_lte = new_lookup_table_entry(); - if (!found_lte) + found_blob = new_blob_descriptor(); + if (!found_blob) goto out; - rspec = MALLOC(sizeof(struct wim_resource_spec)); - if (!rspec) + rdesc = MALLOC(sizeof(struct wim_resource_descriptor)); + if (!rdesc) goto out; - lookup_table = ctx->wim->lookup_table; + blob_table = ctx->wim->blob_table; memcpy(ctx->progress.extract.guid, ctx->wim->hdr.guid, WIM_GUID_LEN); ctx->progress.extract.part_number = ctx->wim->hdr.part_number; ctx->progress.extract.total_parts = ctx->wim->hdr.total_parts; @@ -246,48 +218,48 @@ load_streams_from_pipe(struct apply_ctx *ctx, if (ret) goto out; - while (ctx->num_streams_remaining) { + while (ctx->num_blobs_remaining) { struct wim_header_disk pwm_hdr; - struct wim_lookup_table_entry *needed_lte; + struct blob_descriptor *needed_blob; - if (found_lte->resource_location != RESOURCE_NONEXISTENT) - lte_unbind_wim_resource_spec(found_lte); - ret = read_pwm_stream_header(ctx->wim, found_lte, rspec, - PWM_ALLOW_WIM_HDR, &pwm_hdr); + if (found_blob->blob_location != BLOB_NONEXISTENT) + blob_unset_is_located_in_wim_resource(found_blob); + ret = read_pwm_blob_header(ctx->wim, found_blob, rdesc, + PWM_ALLOW_WIM_HDR, &pwm_hdr); if (ret) goto out; - if ((found_lte->resource_location != RESOURCE_NONEXISTENT) - && !(found_lte->flags & WIM_RESHDR_FLAG_METADATA) - && (needed_lte = lookup_stream(lookup_table, found_lte->hash)) - && (needed_lte->out_refcnt)) + if ((found_blob->blob_location != BLOB_NONEXISTENT) + && !(found_blob->flags & WIM_RESHDR_FLAG_METADATA) + && (needed_blob = lookup_blob(blob_table, found_blob->hash)) + && (needed_blob->out_refcnt)) { - needed_lte->offset_in_res = found_lte->offset_in_res; - needed_lte->flags = found_lte->flags; - needed_lte->size = found_lte->size; + needed_blob->offset_in_res = found_blob->offset_in_res; + needed_blob->flags = found_blob->flags; + needed_blob->size = found_blob->size; - lte_unbind_wim_resource_spec(found_lte); - lte_bind_wim_resource_spec(needed_lte, rspec); + blob_unset_is_located_in_wim_resource(found_blob); + blob_set_is_located_in_wim_resource(needed_blob, rdesc); - ret = (*cbs->begin_stream)(needed_lte, - cbs->begin_stream_ctx); + ret = (*cbs->begin_blob)(needed_blob, + cbs->begin_blob_ctx); if (ret) { - lte_unbind_wim_resource_spec(needed_lte); + blob_unset_is_located_in_wim_resource(needed_blob); goto out; } - ret = extract_stream(needed_lte, needed_lte->size, - cbs->consume_chunk, - cbs->consume_chunk_ctx); + ret = extract_blob(needed_blob, needed_blob->size, + cbs->consume_chunk, + cbs->consume_chunk_ctx); - ret = (*cbs->end_stream)(needed_lte, ret, - cbs->end_stream_ctx); - lte_unbind_wim_resource_spec(needed_lte); + ret = (*cbs->end_blob)(needed_blob, ret, + cbs->end_blob_ctx); + blob_unset_is_located_in_wim_resource(needed_blob); if (ret) goto out; - ctx->num_streams_remaining--; - } else if (found_lte->resource_location != RESOURCE_NONEXISTENT) { - ret = skip_wim_stream(found_lte); + ctx->num_blobs_remaining--; + } else if (found_blob->blob_location != BLOB_NONEXISTENT) { + ret = skip_wim_resource(found_blob->rdesc); if (ret) goto out; } else { @@ -312,9 +284,9 @@ load_streams_from_pipe(struct apply_ctx *ctx, } ret = 0; out: - if (found_lte && found_lte->resource_location != RESOURCE_IN_WIM) - FREE(rspec); - free_lookup_table_entry(found_lte); + if (found_blob && found_blob->blob_location != BLOB_IN_WIM) + FREE(rdesc); + free_blob_descriptor(found_blob); return ret; } @@ -358,17 +330,17 @@ retry: } static int -begin_extract_stream_wrapper(struct wim_lookup_table_entry *lte, void *_ctx) +begin_extract_blob_wrapper(struct blob_descriptor *blob, void *_ctx) { struct apply_ctx *ctx = _ctx; - ctx->cur_stream = lte; - ctx->cur_stream_offset = 0; + ctx->cur_blob = blob; + ctx->cur_blob_offset = 0; - if (unlikely(lte->out_refcnt > MAX_OPEN_STREAMS)) + if (unlikely(blob->out_refcnt > MAX_OPEN_FILES)) return create_temporary_file(&ctx->tmpfile_fd, &ctx->tmpfile_name); else - return (*ctx->saved_cbs->begin_stream)(lte, ctx->saved_cbs->begin_stream_ctx); + return (*ctx->saved_cbs->begin_blob)(blob, ctx->saved_cbs->begin_blob_ctx); } static int @@ -378,17 +350,18 @@ extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx) union wimlib_progress_info *progress = &ctx->progress; int ret; - ctx->cur_stream_offset += size; + ctx->cur_blob_offset += size; if (likely(ctx->supported_features.hard_links)) { progress->extract.completed_bytes += - (u64)size * ctx->cur_stream->out_refcnt; - if (ctx->cur_stream_offset == ctx->cur_stream->size) - progress->extract.completed_streams += ctx->cur_stream->out_refcnt; + (u64)size * ctx->cur_blob->out_refcnt; + if (ctx->cur_blob_offset == ctx->cur_blob->size) + progress->extract.completed_streams += ctx->cur_blob->out_refcnt; } else { - const struct stream_owner *owners = stream_owners(ctx->cur_stream); - for (u32 i = 0; i < ctx->cur_stream->out_refcnt; i++) { - const struct wim_inode *inode = owners[i].inode; + const struct blob_extraction_target *targets = + blob_extraction_targets(ctx->cur_blob); + for (u32 i = 0; i < ctx->cur_blob->out_refcnt; i++) { + const struct wim_inode *inode = targets[i].inode; const struct wim_dentry *dentry; list_for_each_entry(dentry, @@ -396,7 +369,7 @@ extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx) d_extraction_alias_node) { progress->extract.completed_bytes += size; - if (ctx->cur_stream_offset == ctx->cur_stream->size) + if (ctx->cur_blob_offset == ctx->cur_blob->size) progress->extract.completed_streams++; } } @@ -451,63 +424,62 @@ extract_chunk_wrapper(const void *chunk, size_t size, void *_ctx) static int extract_from_tmpfile(const tchar *tmpfile_name, struct apply_ctx *ctx) { - struct wim_lookup_table_entry tmpfile_lte; - struct wim_lookup_table_entry *orig_lte = ctx->cur_stream; - const struct read_stream_list_callbacks *cbs = ctx->saved_cbs; + struct blob_descriptor tmpfile_blob; + struct blob_descriptor *orig_blob = ctx->cur_blob; + const struct read_blob_list_callbacks *cbs = ctx->saved_cbs; int ret; - const u32 orig_refcnt = orig_lte->out_refcnt; + const u32 orig_refcnt = orig_blob->out_refcnt; - BUILD_BUG_ON(MAX_OPEN_STREAMS < ARRAY_LEN(orig_lte->inline_stream_owners)); + BUILD_BUG_ON(MAX_OPEN_FILES < + ARRAY_LEN(orig_blob->inline_blob_extraction_targets)); - struct stream_owner *owners = orig_lte->stream_owners; + struct blob_extraction_target *targets = orig_blob->blob_extraction_targets; - /* Copy the stream's data from the temporary file to each of its - * destinations. + /* Copy the blob's data from the temporary file to each of its targets. * - * This is executed only in the very uncommon case that a - * single-instance stream is being extracted to more than - * MAX_OPEN_STREAMS locations! */ + * This is executed only in the very uncommon case that a blob is being + * extracted to more than MAX_OPEN_FILES targets! */ - memcpy(&tmpfile_lte, orig_lte, sizeof(struct wim_lookup_table_entry)); - tmpfile_lte.resource_location = RESOURCE_IN_FILE_ON_DISK; - tmpfile_lte.file_on_disk = ctx->tmpfile_name; + memcpy(&tmpfile_blob, orig_blob, sizeof(struct blob_descriptor)); + tmpfile_blob.blob_location = BLOB_IN_FILE_ON_DISK; + tmpfile_blob.file_on_disk = ctx->tmpfile_name; ret = 0; for (u32 i = 0; i < orig_refcnt; i++) { /* Note: it usually doesn't matter whether we pass the original - * stream entry to callbacks provided by the extraction backend - * as opposed to the tmpfile stream entry, since they shouldn't - * actually read data from the stream other than through the - * read_stream_prefix() call below. But for + * blob descriptor to callbacks provided by the extraction + * backend as opposed to the tmpfile blob descriptor, since they + * shouldn't actually read data from the blob other than through + * the read_blob_prefix() call below. But for * WIMLIB_EXTRACT_FLAG_WIMBOOT mode on Windows it does matter - * because it needs the original stream location in order to - * create the external backing reference. */ + * because it needs access to the original WIM resource + * descriptor in order to create the external backing reference. + */ - orig_lte->out_refcnt = 1; - orig_lte->inline_stream_owners[0] = owners[i]; + orig_blob->out_refcnt = 1; + orig_blob->inline_blob_extraction_targets[0] = targets[i]; - ret = (*cbs->begin_stream)(orig_lte, cbs->begin_stream_ctx); + ret = (*cbs->begin_blob)(orig_blob, cbs->begin_blob_ctx); if (ret) break; /* Extra SHA-1 isn't necessary here, but it shouldn't hurt as * this case is very rare anyway. */ - ret = extract_stream(&tmpfile_lte, tmpfile_lte.size, - cbs->consume_chunk, - cbs->consume_chunk_ctx); + ret = extract_blob(&tmpfile_blob, tmpfile_blob.size, + cbs->consume_chunk, + cbs->consume_chunk_ctx); - ret = (*cbs->end_stream)(orig_lte, ret, cbs->end_stream_ctx); + ret = (*cbs->end_blob)(orig_blob, ret, cbs->end_blob_ctx); if (ret) break; } - FREE(owners); - orig_lte->out_refcnt = 0; + FREE(targets); + orig_blob->out_refcnt = 0; return ret; } static int -end_extract_stream_wrapper(struct wim_lookup_table_entry *stream, - int status, void *_ctx) +end_extract_blob_wrapper(struct blob_descriptor *blob, int status, void *_ctx) { struct apply_ctx *ctx = _ctx; @@ -520,49 +492,49 @@ end_extract_stream_wrapper(struct wim_lookup_table_entry *stream, FREE(ctx->tmpfile_name); return status; } else { - return (*ctx->saved_cbs->end_stream)(stream, status, - ctx->saved_cbs->end_stream_ctx); + return (*ctx->saved_cbs->end_blob)(blob, status, + ctx->saved_cbs->end_blob_ctx); } } /* - * Read the list of single-instance streams to extract and feed their data into - * the specified callback functions. + * Read the list of blobs to extract and feed their data into the specified + * callback functions. * - * This handles checksumming each stream. + * This handles checksumming each blob. * * This also handles sending WIMLIB_PROGRESS_MSG_EXTRACT_STREAMS. * * This also works if the WIM is being read from a pipe, whereas attempting to - * read streams directly (e.g. with read_full_stream_into_buf()) will not. + * read blobs directly (e.g. with read_full_blob_into_buf()) will not. * - * This also will split up streams that will need to be extracted to more than - * MAX_OPEN_STREAMS locations, as measured by the 'out_refcnt' of each stream. + * This also will split up blobs that will need to be extracted to more than + * MAX_OPEN_FILES locations, as measured by the 'out_refcnt' of each blob. * Therefore, the apply_operations implementation need not worry about running * out of file descriptors, unless it might open more than one file descriptor * per nominal destination (e.g. Win32 currently might because the destination * file system might not support hard links). */ int -extract_stream_list(struct apply_ctx *ctx, - const struct read_stream_list_callbacks *cbs) +extract_blob_list(struct apply_ctx *ctx, + const struct read_blob_list_callbacks *cbs) { - struct read_stream_list_callbacks wrapper_cbs = { - .begin_stream = begin_extract_stream_wrapper, - .begin_stream_ctx = ctx, + struct read_blob_list_callbacks wrapper_cbs = { + .begin_blob = begin_extract_blob_wrapper, + .begin_blob_ctx = ctx, .consume_chunk = extract_chunk_wrapper, .consume_chunk_ctx = ctx, - .end_stream = end_extract_stream_wrapper, - .end_stream_ctx = ctx, + .end_blob = end_extract_blob_wrapper, + .end_blob_ctx = ctx, }; ctx->saved_cbs = cbs; if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) { - return load_streams_from_pipe(ctx, &wrapper_cbs); + return read_blobs_from_pipe(ctx, &wrapper_cbs); } else { - return read_stream_list(&ctx->stream_list, - offsetof(struct wim_lookup_table_entry, - extraction_list), - &wrapper_cbs, VERIFY_STREAM_HASHES); + return read_blob_list(&ctx->blob_list, + offsetof(struct blob_descriptor, + extraction_list), + &wrapper_cbs, VERIFY_BLOB_HASHES); } } @@ -573,38 +545,39 @@ extract_stream_list(struct apply_ctx *ctx, * unnamed data stream only. */ static int extract_dentry_to_stdout(struct wim_dentry *dentry, - const struct wim_lookup_table *lookup_table) + const struct blob_table *blob_table) { struct wim_inode *inode = dentry->d_inode; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; struct filedes _stdout; if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT | - FILE_ATTRIBUTE_DIRECTORY)) + FILE_ATTRIBUTE_DIRECTORY | + FILE_ATTRIBUTE_ENCRYPTED)) { ERROR("\"%"TS"\" is not a regular file and therefore cannot be " "extracted to standard output", dentry_full_path(dentry)); return WIMLIB_ERR_NOT_A_REGULAR_FILE; } - lte = inode_unnamed_lte(inode, lookup_table); - if (!lte) { - const u8 *hash = inode_unnamed_stream_hash(inode); + blob = inode_get_blob_for_unnamed_data_stream(inode, blob_table); + if (!blob) { + const u8 *hash = inode_get_hash_of_unnamed_data_stream(inode); if (!is_zero_hash(hash)) - return stream_not_found_error(inode, hash); + return blob_not_found_error(inode, hash); return 0; } filedes_init(&_stdout, STDOUT_FILENO); - return extract_full_stream_to_fd(lte, &_stdout); + return extract_full_blob_to_fd(blob, &_stdout); } static int extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries, - const struct wim_lookup_table *lookup_table) + const struct blob_table *blob_table) { for (size_t i = 0; i < num_dentries; i++) { - int ret = extract_dentry_to_stdout(dentries[i], lookup_table); + int ret = extract_dentry_to_stdout(dentries[i], blob_table); if (ret) return ret; } @@ -752,13 +725,13 @@ destroy_dentry_list(struct list_head *dentry_list) } static void -destroy_stream_list(struct list_head *stream_list) +destroy_blob_list(struct list_head *blob_list) { - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; - list_for_each_entry(lte, stream_list, extraction_list) - if (lte->out_refcnt > ARRAY_LEN(lte->inline_stream_owners)) - FREE(lte->stream_owners); + list_for_each_entry(blob, blob_list, extraction_list) + if (blob->out_refcnt > ARRAY_LEN(blob->inline_blob_extraction_targets)) + FREE(blob->blob_extraction_targets); } #ifdef __WIN32__ @@ -814,9 +787,6 @@ dentry_calculate_extraction_name(struct wim_dentry *dentry, { int ret; - if (unlikely(!dentry_is_supported(dentry, &ctx->supported_features))) - goto skip_dentry; - if (dentry_is_root(dentry)) return 0; @@ -965,34 +935,35 @@ dentry_list_calculate_extraction_names(struct list_head *dentry_list, static int dentry_resolve_streams(struct wim_dentry *dentry, int extract_flags, - struct wim_lookup_table *lookup_table) + struct blob_table *blob_table) { struct wim_inode *inode = dentry->d_inode; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; int ret; bool force = false; - /* Special case: when extracting from a pipe, the WIM lookup table is + /* Special case: when extracting from a pipe, the WIM blob table is * initially empty, so "resolving" an inode's streams is initially not - * possible. However, we still need to keep track of which streams, - * identified by SHA1 message digests, need to be extracted, so we - * "resolve" the inode's streams anyway by allocating new entries. */ + * possible. However, we still need to keep track of which blobs, + * identified by SHA-1 message digests, need to be extracted, so we + * "resolve" the inode's streams anyway by allocating a 'struct + * blob_descriptor' for each one. */ if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) force = true; - ret = inode_resolve_streams(inode, lookup_table, force); + ret = inode_resolve_streams(inode, blob_table, force); if (ret) return ret; - for (u32 i = 0; i <= inode->i_num_ads; i++) { - lte = inode_stream_lte_resolved(inode, i); - if (lte) - lte->out_refcnt = 0; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + blob = stream_blob_resolved(&inode->i_streams[i]); + if (blob) + blob->out_refcnt = 0; } return 0; } /* * For each dentry to be extracted, resolve all streams in the corresponding - * inode and set 'out_refcnt' in each to 0. + * inode and set 'out_refcnt' in all referenced blob_descriptors to 0. * * Possible error codes: WIMLIB_ERR_RESOURCE_NOT_FOUND, WIMLIB_ERR_NOMEM. */ @@ -1006,7 +977,7 @@ dentry_list_resolve_streams(struct list_head *dentry_list, list_for_each_entry(dentry, dentry_list, d_extraction_list_node) { ret = dentry_resolve_streams(dentry, ctx->extract_flags, - ctx->wim->lookup_table); + ctx->wim->blob_table); if (ret) return ret; } @@ -1014,142 +985,155 @@ dentry_list_resolve_streams(struct list_head *dentry_list, } static int -ref_stream(struct wim_lookup_table_entry *lte, unsigned stream_idx, - struct wim_dentry *dentry, struct apply_ctx *ctx) +ref_stream(struct wim_inode_stream *strm, struct wim_dentry *dentry, + struct apply_ctx *ctx) { struct wim_inode *inode = dentry->d_inode; - struct stream_owner *stream_owners; + struct blob_descriptor *blob = stream_blob_resolved(strm); + struct blob_extraction_target *targets; - if (!lte) + if (!blob) return 0; - /* Tally the size only for each extraction of the stream (not hard - * links). */ + /* Tally the size only for each actual extraction of the stream (not + * additional hard links to the inode). */ if (inode->i_visited && ctx->supported_features.hard_links) return 0; - ctx->progress.extract.total_bytes += lte->size; + ctx->progress.extract.total_bytes += blob->size; ctx->progress.extract.total_streams++; if (inode->i_visited) return 0; - /* Add stream to the dentry_list only one time, even if it's going - * to be extracted to multiple inodes. */ - if (lte->out_refcnt == 0) { - list_add_tail(<e->extraction_list, &ctx->stream_list); - ctx->num_streams_remaining++; + /* Add each blob to 'ctx->blob_list' only one time, regardless of how + * many extraction targets it will have. */ + if (blob->out_refcnt == 0) { + list_add_tail(&blob->extraction_list, &ctx->blob_list); + ctx->num_blobs_remaining++; } - /* If inode not yet been visited, append it to the stream_owners array. */ - if (lte->out_refcnt < ARRAY_LEN(lte->inline_stream_owners)) { - stream_owners = lte->inline_stream_owners; + /* Set this stream as an extraction target of 'blob'. */ + + if (blob->out_refcnt < ARRAY_LEN(blob->inline_blob_extraction_targets)) { + targets = blob->inline_blob_extraction_targets; } else { - struct stream_owner *prev_stream_owners; - size_t alloc_stream_owners; + struct blob_extraction_target *prev_targets; + size_t alloc_blob_extraction_targets; - if (lte->out_refcnt == ARRAY_LEN(lte->inline_stream_owners)) { - prev_stream_owners = NULL; - alloc_stream_owners = ARRAY_LEN(lte->inline_stream_owners); + if (blob->out_refcnt == ARRAY_LEN(blob->inline_blob_extraction_targets)) { + prev_targets = NULL; + alloc_blob_extraction_targets = ARRAY_LEN(blob->inline_blob_extraction_targets); } else { - prev_stream_owners = lte->stream_owners; - alloc_stream_owners = lte->alloc_stream_owners; + prev_targets = blob->blob_extraction_targets; + alloc_blob_extraction_targets = blob->alloc_blob_extraction_targets; } - if (lte->out_refcnt == alloc_stream_owners) { - alloc_stream_owners *= 2; - stream_owners = REALLOC(prev_stream_owners, - alloc_stream_owners * - sizeof(stream_owners[0])); - if (!stream_owners) + if (blob->out_refcnt == alloc_blob_extraction_targets) { + alloc_blob_extraction_targets *= 2; + targets = REALLOC(prev_targets, + alloc_blob_extraction_targets * + sizeof(targets[0])); + if (!targets) return WIMLIB_ERR_NOMEM; - if (!prev_stream_owners) { - memcpy(stream_owners, - lte->inline_stream_owners, - sizeof(lte->inline_stream_owners)); + if (!prev_targets) { + memcpy(targets, + blob->inline_blob_extraction_targets, + sizeof(blob->inline_blob_extraction_targets)); } - lte->stream_owners = stream_owners; - lte->alloc_stream_owners = alloc_stream_owners; + blob->blob_extraction_targets = targets; + blob->alloc_blob_extraction_targets = alloc_blob_extraction_targets; } - stream_owners = lte->stream_owners; - } - stream_owners[lte->out_refcnt].inode = inode; - if (stream_idx == 0) { - stream_owners[lte->out_refcnt].stream_name = NULL; - } else { - stream_owners[lte->out_refcnt].stream_name = - inode->i_ads_entries[stream_idx - 1].stream_name; + targets = blob->blob_extraction_targets; } - lte->out_refcnt++; + targets[blob->out_refcnt].inode = inode; + targets[blob->out_refcnt].stream = strm; + blob->out_refcnt++; return 0; } static int -ref_unnamed_stream(struct wim_dentry *dentry, struct apply_ctx *ctx) +ref_stream_if_needed(struct wim_dentry *dentry, struct wim_inode *inode, + struct wim_inode_stream *strm, struct apply_ctx *ctx) { - struct wim_inode *inode = dentry->d_inode; - int ret; - unsigned stream_idx; - struct wim_lookup_table_entry *stream; - - if (unlikely(ctx->apply_ops->will_externally_back)) { - ret = (*ctx->apply_ops->will_externally_back)(dentry, ctx); - if (ret >= 0) { - if (ret) /* Error */ - return ret; - /* Will externally back */ - return 0; + bool need_stream = false; + switch (strm->stream_type) { + case STREAM_TYPE_DATA: + if (stream_is_named(strm)) { + /* Named data stream */ + if (ctx->supported_features.named_data_streams) + need_stream = true; + } else if (!(inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY | + FILE_ATTRIBUTE_ENCRYPTED)) + && !(inode_is_symlink(inode) + && !ctx->supported_features.reparse_points + && ctx->supported_features.symlink_reparse_points)) + { + /* + * Unnamed data stream. Skip if any of the following is true: + * + * - file is a directory + * - file is encrypted + * - backend needs to create the file as UNIX symlink + * - backend will extract the stream as externally backed + */ + if (ctx->apply_ops->will_externally_back) { + int ret = (*ctx->apply_ops->will_externally_back)(dentry, ctx); + if (ret > 0) /* Error? */ + return ret; + if (ret < 0) /* Won't externally back? */ + need_stream = true; + } else { + need_stream = true; + } + } + break; + case STREAM_TYPE_REPARSE_POINT: + wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT); + if (ctx->supported_features.reparse_points || + (inode_is_symlink(inode) && + ctx->supported_features.symlink_reparse_points)) + need_stream = true; + break; + case STREAM_TYPE_EFSRPC_RAW_DATA: + wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED); + if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) { + if (ctx->supported_features.encrypted_directories) + need_stream = true; + } else { + if (ctx->supported_features.encrypted_files) + need_stream = true; } - /* Won't externally back */ + break; } - - stream = inode_unnamed_stream_resolved(inode, &stream_idx); - return ref_stream(stream, stream_idx, dentry, ctx); + if (need_stream) + return ref_stream(strm, dentry, ctx); + return 0; } static int dentry_ref_streams(struct wim_dentry *dentry, struct apply_ctx *ctx) { struct wim_inode *inode = dentry->d_inode; - int ret; - - /* The unnamed data stream will almost always be extracted, but there - * exist cases in which it won't be. */ - ret = ref_unnamed_stream(dentry, ctx); - if (ret) - return ret; - - /* Named data streams will be extracted only if supported in the current - * extraction mode and volume, and to avoid complications, if not doing - * a linked extraction. */ - if (ctx->supported_features.named_data_streams) { - for (unsigned i = 0; i < inode->i_num_ads; i++) { - if (!inode->i_ads_entries[i].stream_name_nbytes) - continue; - ret = ref_stream(inode->i_ads_entries[i].lte, i + 1, - dentry, ctx); - if (ret) - return ret; - } + for (unsigned i = 0; i < inode->i_num_streams; i++) { + int ret = ref_stream_if_needed(dentry, inode, + &inode->i_streams[i], ctx); + if (ret) + return ret; } inode->i_visited = 1; return 0; } /* - * For each dentry to be extracted, iterate through the data streams of the - * corresponding inode. For each such stream that is not to be ignored due to - * the supported features or extraction flags, add it to the list of streams to - * be extracted (ctx->stream_list) if not already done so. + * Given a list of dentries to be extracted, build the list of blobs that need + * to be extracted, and for each blob determine the streams to which that blob + * will be extracted. * - * Also builds a mapping from each stream to the inodes referencing it. - * - * This also initializes the extract progress info with byte and stream + * This also initializes the extract progress info with byte and blob * information. * * ctx->supported_features must be filled in. - * - * Possible error codes: WIMLIB_ERR_NOMEM. */ static int dentry_list_ref_streams(struct list_head *dentry_list, struct apply_ctx *ctx) @@ -1207,7 +1191,7 @@ inode_tally_features(const struct wim_inode *inode, features->not_context_indexed_files++; if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE) features->sparse_files++; - if (inode_has_named_stream(inode)) + if (inode_has_named_data_stream(inode)) features->named_data_streams++; if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { features->reparse_points++; @@ -1258,6 +1242,18 @@ do_feature_check(const struct wim_features *required_features, const struct wim_features *supported_features, int extract_flags) { + /* Encrypted files. */ + if (required_features->encrypted_files && + !supported_features->encrypted_files) + WARNING("Ignoring EFS-encrypted data of %lu files", + required_features->encrypted_files); + + /* Named data streams. */ + if (required_features->named_data_streams && + !supported_features->named_data_streams) + WARNING("Ignoring named data streams of %lu files", + required_features->named_data_streams); + /* File attributes. */ if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES)) { /* Note: Don't bother the user about FILE_ATTRIBUTE_ARCHIVE. @@ -1295,18 +1291,6 @@ do_feature_check(const struct wim_features *required_features, required_features->encrypted_directories); } - /* Encrypted files. */ - if (required_features->encrypted_files && - !supported_features->encrypted_files) - WARNING("Ignoring %lu encrypted files", - required_features->encrypted_files); - - /* Named data streams. */ - if (required_features->named_data_streams && - (!supported_features->named_data_streams)) - WARNING("Ignoring named data streams of %lu files", - required_features->named_data_streams); - /* Hard links. */ if (required_features->hard_links && !supported_features->hard_links) WARNING("Extracting %lu hard links as independent files", @@ -1326,12 +1310,11 @@ do_feature_check(const struct wim_features *required_features, { if (supported_features->symlink_reparse_points) { if (required_features->other_reparse_points) { - WARNING("Ignoring %lu non-symlink/junction " - "reparse point files", + WARNING("Ignoring reparse data of %lu non-symlink/junction files", required_features->other_reparse_points); } } else { - WARNING("Ignoring %lu reparse point files", + WARNING("Ignoring reparse data of %lu files", required_features->reparse_points); } } @@ -1415,7 +1398,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) { ret = extract_dentries_to_stdout(trees, num_trees, - wim->lookup_table); + wim->blob_table); goto out; } @@ -1453,7 +1436,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, wim->current_image); ctx->progress.extract.target = target; } - INIT_LIST_HEAD(&ctx->stream_list); + INIT_LIST_HEAD(&ctx->blob_list); filedes_invalidate(&ctx->tmpfile_fd); ctx->apply_ops = ops; @@ -1494,8 +1477,8 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, if (extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) { /* When extracting from a pipe, the number of bytes of data to * extract can't be determined in the normal way (examining the - * lookup table), since at this point all we have is a set of - * SHA1 message digests of streams that need to be extracted. + * blob table), since at this point all we have is a set of + * SHA-1 message digests of blobs that need to be extracted. * However, we can get a reasonably accurate estimate by taking * from the corresponding in the WIM XML * data. This does assume that a full image is being extracted, @@ -1538,7 +1521,7 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, WIMLIB_PROGRESS_MSG_EXTRACT_IMAGE_END : WIMLIB_PROGRESS_MSG_EXTRACT_TREE_END)); out_cleanup: - destroy_stream_list(&ctx->stream_list); + destroy_blob_list(&ctx->blob_list); destroy_dentry_list(&dentry_list); FREE(ctx); out: @@ -1683,7 +1666,7 @@ do_wimlib_extract_paths(WIMStruct *wim, int image, const tchar *target, if (ret) return ret; - ret = wim_checksum_unhashed_streams(wim); + ret = wim_checksum_unhashed_blobs(wim); if (ret) return ret; @@ -1905,8 +1888,8 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd, /* Read the WIM header from the pipe and get a WIMStruct to represent * the pipable WIM. Caveats: Unlike getting a WIMStruct with - * wimlib_open_wim(), getting a WIMStruct in this way will result in - * an empty lookup table, no XML data read, and no filename set. */ + * wimlib_open_wim(), getting a WIMStruct in this way will result in an + * empty blob table, no XML data read, and no filename set. */ ret = open_wim_as_WIMStruct(&pipe_fd, WIMLIB_OPEN_FLAG_FROM_PIPE, &pwm, progfunc, progctx); if (ret) @@ -1938,21 +1921,20 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd, * write_pipable_wim() for more details about the format of pipable * WIMs.) */ { - struct wim_lookup_table_entry xml_lte; - struct wim_resource_spec xml_rspec; - ret = read_pwm_stream_header(pwm, &xml_lte, &xml_rspec, 0, NULL); + struct blob_descriptor xml_blob; + struct wim_resource_descriptor xml_rdesc; + ret = read_pwm_blob_header(pwm, &xml_blob, &xml_rdesc, 0, NULL); if (ret) goto out_wimlib_free; - if (!(xml_lte.flags & WIM_RESHDR_FLAG_METADATA)) + if (!(xml_blob.flags & WIM_RESHDR_FLAG_METADATA)) { - ERROR("Expected XML data, but found non-metadata " - "stream."); + ERROR("Expected XML data, but found non-metadata resource."); ret = WIMLIB_ERR_INVALID_PIPABLE_WIM; goto out_wimlib_free; } - wim_res_spec_to_hdr(&xml_rspec, &pwm->hdr.xml_data_reshdr); + wim_res_desc_to_hdr(&xml_rdesc, &pwm->hdr.xml_data_reshdr); ret = read_wim_xml_data(pwm); if (ret) @@ -1991,33 +1973,33 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd, /* Load the needed metadata resource. */ for (i = 1; i <= pwm->hdr.image_count; i++) { - struct wim_lookup_table_entry *metadata_lte; + struct blob_descriptor *metadata_blob; struct wim_image_metadata *imd; - struct wim_resource_spec *metadata_rspec; + struct wim_resource_descriptor *metadata_rdesc; - metadata_lte = new_lookup_table_entry(); - if (metadata_lte == NULL) { + metadata_blob = new_blob_descriptor(); + if (metadata_blob == NULL) { ret = WIMLIB_ERR_NOMEM; goto out_wimlib_free; } - metadata_rspec = MALLOC(sizeof(struct wim_resource_spec)); - if (metadata_rspec == NULL) { + metadata_rdesc = MALLOC(sizeof(struct wim_resource_descriptor)); + if (metadata_rdesc == NULL) { ret = WIMLIB_ERR_NOMEM; - free_lookup_table_entry(metadata_lte); + free_blob_descriptor(metadata_blob); goto out_wimlib_free; } - ret = read_pwm_stream_header(pwm, metadata_lte, metadata_rspec, 0, NULL); + ret = read_pwm_blob_header(pwm, metadata_blob, metadata_rdesc, 0, NULL); imd = pwm->image_metadata[i - 1]; - imd->metadata_lte = metadata_lte; + imd->metadata_blob = metadata_blob; if (ret) { - FREE(metadata_rspec); + FREE(metadata_rdesc); goto out_wimlib_free; } - if (!(metadata_lte->flags & WIM_RESHDR_FLAG_METADATA)) { + if (!(metadata_blob->flags & WIM_RESHDR_FLAG_METADATA)) { ERROR("Expected metadata resource, but found " - "non-metadata stream."); + "non-metadata resource."); ret = WIMLIB_ERR_INVALID_PIPABLE_WIM; goto out_wimlib_free; } @@ -2032,7 +2014,7 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd, } else { /* Metadata resource is not for the image being * extracted. Skip over it. */ - ret = skip_wim_stream(metadata_lte); + ret = skip_wim_resource(metadata_rdesc); if (ret) goto out_wimlib_free; } diff --git a/src/header.c b/src/header.c index 634f4dab..82088998 100644 --- a/src/header.c +++ b/src/header.c @@ -143,7 +143,7 @@ read_wim_header(WIMStruct *wim, struct wim_header *hdr) return WIMLIB_ERR_IMAGE_COUNT; } - get_wim_reshdr(&disk_hdr.lookup_table_reshdr, &hdr->lookup_table_reshdr); + get_wim_reshdr(&disk_hdr.blob_table_reshdr, &hdr->blob_table_reshdr); get_wim_reshdr(&disk_hdr.xml_data_reshdr, &hdr->xml_data_reshdr); get_wim_reshdr(&disk_hdr.boot_metadata_reshdr, &hdr->boot_metadata_reshdr); hdr->boot_idx = le32_to_cpu(disk_hdr.boot_idx); @@ -182,7 +182,7 @@ write_wim_header_at_offset(const struct wim_header *hdr, struct filedes *out_fd, disk_hdr.part_number = cpu_to_le16(hdr->part_number); disk_hdr.total_parts = cpu_to_le16(hdr->total_parts); disk_hdr.image_count = cpu_to_le32(hdr->image_count); - put_wim_reshdr(&hdr->lookup_table_reshdr, &disk_hdr.lookup_table_reshdr); + put_wim_reshdr(&hdr->blob_table_reshdr, &disk_hdr.blob_table_reshdr); put_wim_reshdr(&hdr->xml_data_reshdr, &disk_hdr.xml_data_reshdr); put_wim_reshdr(&hdr->boot_metadata_reshdr, &disk_hdr.boot_metadata_reshdr); disk_hdr.boot_idx = cpu_to_le32(hdr->boot_idx); @@ -313,14 +313,14 @@ wimlib_print_header(const WIMStruct *wim) tprintf(T("Part Number = %hu\n"), hdr->part_number); tprintf(T("Total Parts = %hu\n"), hdr->total_parts); tprintf(T("Image Count = %u\n"), hdr->image_count); - tprintf(T("Lookup Table Size = %"PRIu64"\n"), - (u64)hdr->lookup_table_reshdr.size_in_wim); - tprintf(T("Lookup Table Flags = 0x%hhx\n"), - (u8)hdr->lookup_table_reshdr.flags); - tprintf(T("Lookup Table Offset = %"PRIu64"\n"), - hdr->lookup_table_reshdr.offset_in_wim); - tprintf(T("Lookup Table Original_size = %"PRIu64"\n"), - hdr->lookup_table_reshdr.uncompressed_size); + tprintf(T("Blob Table Size = %"PRIu64"\n"), + (u64)hdr->blob_table_reshdr.size_in_wim); + tprintf(T("Blob Table Flags = 0x%hhx\n"), + (u8)hdr->blob_table_reshdr.flags); + tprintf(T("Blob Table Offset = %"PRIu64"\n"), + hdr->blob_table_reshdr.offset_in_wim); + tprintf(T("Blob Table Original_size = %"PRIu64"\n"), + hdr->blob_table_reshdr.uncompressed_size); tprintf(T("XML Data Size = %"PRIu64"\n"), (u64)hdr->xml_data_reshdr.size_in_wim); tprintf(T("XML Data Flags = 0x%hhx\n"), diff --git a/src/inode.c b/src/inode.c index 6384b11c..1114a153 100644 --- a/src/inode.c +++ b/src/inode.c @@ -8,7 +8,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014 Eric Biggers + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -31,15 +31,19 @@ #include #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" -#include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/inode.h" -#include "wimlib/lookup_table.h" -#include "wimlib/security.h" #include "wimlib/timestamp.h" +/* + * The 'stream_name' field of unnamed streams always points to this array, which + * is an empty UTF-16 string. + */ +const utf16lechar NO_STREAM_NAME[1]; + /* Allocate a new inode. Set the timestamps to the current time. */ struct wim_inode * new_inode(void) @@ -62,31 +66,28 @@ new_timeless_inode(void) if (inode) { inode->i_security_id = -1; /*inode->i_nlink = 0;*/ - inode->i_next_stream_id = 1; inode->i_not_rpfixed = 1; - inode->i_canonical_streams = 1; INIT_LIST_HEAD(&inode->i_list); INIT_LIST_HEAD(&inode->i_dentry); } return inode; } -/* Free memory allocated within an alternate data stream entry. */ -static void -destroy_ads_entry(struct wim_ads_entry *ads_entry) +static inline void +destroy_stream(struct wim_inode_stream *strm) { - FREE(ads_entry->stream_name); + if (strm->stream_name != NO_STREAM_NAME) + FREE(strm->stream_name); } static void free_inode(struct wim_inode *inode) { - if (unlikely(inode->i_ads_entries)) { - for (unsigned i = 0; i < inode->i_num_ads; i++) - destroy_ads_entry(&inode->i_ads_entries[i]); - FREE(inode->i_ads_entries); - } - if (unlikely(inode->i_extra)) + for (unsigned i = 0; i < inode->i_num_streams; i++) + destroy_stream(&inode->i_streams[i]); + if (inode->i_streams != inode->i_embedded_streams) + FREE(inode->i_streams); + if (inode->i_extra) FREE(inode->i_extra); /* HACK: This may instead delete the inode from i_list, but hlist_del() * behaves the same as list_del(). */ @@ -155,346 +156,285 @@ inode_dec_num_opened_fds(struct wim_inode *inode) #endif /* - * Returns the alternate data stream entry belonging to @inode that has the - * stream name @stream_name, or NULL if the inode has no alternate data stream - * with that name. + * Retrieve a stream of an inode. * - * If @p stream_name is the empty string, NULL is returned --- that is, this - * function will not return "unnamed" alternate data stream entries. + * @inode + * The inode from which the stream is desired + * @stream_type + * The type of the stream desired + * @stream_name + * The name of the stream desired as a null-terminated UTF-16LE string, or + * NO_STREAM_NAME if an unnamed stream is desired * - * If NULL is returned, errno is set. + * Returns a pointer to the stream if found, otherwise NULL. */ -struct wim_ads_entry * -inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name) +struct wim_inode_stream * +inode_get_stream(const struct wim_inode *inode, int stream_type, + const utf16lechar *stream_name) { - int ret; - const utf16lechar *stream_name_utf16le; - size_t stream_name_utf16le_nbytes; - unsigned i; - struct wim_ads_entry *result; - - if (inode->i_num_ads == 0) { - errno = ENOENT; - return NULL; - } - - if (stream_name[0] == T('\0')) { - errno = ENOENT; - return NULL; + if (stream_name == NO_STREAM_NAME) /* Optimization */ + return inode_get_unnamed_stream(inode, stream_type); + + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct wim_inode_stream *strm = &inode->i_streams[i]; + if (strm->stream_type == stream_type && + !cmp_utf16le_strings_z(strm->stream_name, stream_name, + default_ignore_case)) + { + return strm; + } } + return NULL; +} - ret = tstr_get_utf16le_and_len(stream_name, &stream_name_utf16le, - &stream_name_utf16le_nbytes); - if (ret) - return NULL; - - i = 0; - result = NULL; - do { - if (!cmp_utf16le_strings(inode->i_ads_entries[i].stream_name, - inode->i_ads_entries[i].stream_name_nbytes / - sizeof(utf16lechar), - stream_name_utf16le, - stream_name_utf16le_nbytes / - sizeof(utf16lechar), - default_ignore_case)) +/* + * This is equivalent to inode_get_stream(inode, stream_type, NO_STREAM_NAME), + * but this optimizes for the unnamed case by not doing full string comparisons. + */ +struct wim_inode_stream * +inode_get_unnamed_stream(const struct wim_inode *inode, int stream_type) +{ + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct wim_inode_stream *strm = &inode->i_streams[i]; + if (strm->stream_type == stream_type && + strm->stream_name == NO_STREAM_NAME) { - result = &inode->i_ads_entries[i]; - break; + return strm; } - } while (++i != inode->i_num_ads); - - tstr_put_utf16le(stream_name_utf16le); - - if (!result) - errno = ENOENT; - return result; + } + return NULL; } -static struct wim_ads_entry * -do_inode_add_ads(struct wim_inode *inode, - utf16lechar *stream_name, size_t stream_name_nbytes) +/* + * Add a new stream to the specified inode. + * + * @inode + * The inode to which to add the stream + * @stream_type + * The type of the stream being added + * @stream_name + * The name of the stream being added as a null-terminated UTF-16LE string, + * or NO_STREAM_NAME if the stream is unnamed + * @blob + * The blob that the new stream will initially reference, or NULL + * + * Returns a pointer to the new stream, or NULL with errno set if it could not + * be added. + */ +struct wim_inode_stream * +inode_add_stream(struct wim_inode *inode, int stream_type, + const utf16lechar *stream_name, struct blob_descriptor *blob) { - unsigned num_ads; - struct wim_ads_entry *ads_entries; - struct wim_ads_entry *new_entry; - - if (unlikely(inode->i_num_ads >= 0xfffe)) { - ERROR("File \"%"TS"\" has too many alternate data streams!", + if (inode->i_num_streams >= 0xFFFF) { + ERROR("Inode has too many streams! Path=\"%"TS"\"", inode_first_full_path(inode)); errno = EFBIG; return NULL; } - num_ads = inode->i_num_ads + 1; - ads_entries = REALLOC(inode->i_ads_entries, - num_ads * sizeof(inode->i_ads_entries[0])); - if (!ads_entries) - return NULL; - - inode->i_ads_entries = ads_entries; - - new_entry = &inode->i_ads_entries[num_ads - 1]; - - memset(new_entry, 0, sizeof(struct wim_ads_entry)); - new_entry->stream_name = stream_name; - new_entry->stream_name_nbytes = stream_name_nbytes; - new_entry->stream_id = inode->i_next_stream_id++; - inode->i_num_ads = num_ads; - return new_entry; -} -/* - * Add an alternate data stream entry to a WIM inode (UTF-16LE version). On - * success, returns a pointer to the new entry. Note that this pointer might - * become invalid if another ADS entry is added to the inode. On failure, - * returns NULL and sets errno. - */ -struct wim_ads_entry * -inode_add_ads_utf16le(struct wim_inode *inode, - const utf16lechar *stream_name, size_t stream_name_nbytes) -{ - utf16lechar *dup = NULL; - struct wim_ads_entry *result; + struct wim_inode_stream *streams; + struct wim_inode_stream *new_strm; - if (stream_name_nbytes) { - dup = utf16le_dupz(stream_name, stream_name_nbytes); - if (!dup) + if (inode->i_streams == inode->i_embedded_streams) { + if (inode->i_num_streams < ARRAY_LEN(inode->i_embedded_streams)) { + streams = inode->i_embedded_streams; + } else { + streams = MALLOC((inode->i_num_streams + 1) * + sizeof(inode->i_streams[0])); + if (!streams) + return NULL; + memcpy(streams, inode->i_streams, + (inode->i_num_streams * + sizeof(inode->i_streams[0]))); + inode->i_streams = streams; + } + } else { + streams = REALLOC(inode->i_streams, + (inode->i_num_streams + 1) * + sizeof(inode->i_streams[0])); + if (!streams) return NULL; + inode->i_streams = streams; } + new_strm = &streams[inode->i_num_streams]; + + memset(new_strm, 0, sizeof(*new_strm)); + + new_strm->stream_type = stream_type; + if (!*stream_name) { + /* Unnamed stream */ + new_strm->stream_name = (utf16lechar *)NO_STREAM_NAME; + } else { + /* Named stream */ + new_strm->stream_name = utf16le_dup(stream_name); + if (!new_strm->stream_name) + return NULL; + } + new_strm->stream_id = inode->i_next_stream_id++; - result = do_inode_add_ads(inode, dup, stream_name_nbytes); - if (!result) - FREE(dup); - return result; -} + stream_set_blob(new_strm, blob); -/* - * Add an alternate data stream entry to a WIM inode (tchar version). On - * success, returns a pointer to the new entry. Note that this pointer might - * become invalid if another ADS entry is added to the inode. On failure, - * returns NULL and sets errno. - */ -struct wim_ads_entry * -inode_add_ads(struct wim_inode *inode, const tchar *stream_name) -{ - utf16lechar *stream_name_utf16le = NULL; - size_t stream_name_utf16le_nbytes = 0; - struct wim_ads_entry *result; - - if (stream_name && *stream_name) - if (tstr_to_utf16le(stream_name, - tstrlen(stream_name) * sizeof(tchar), - &stream_name_utf16le, - &stream_name_utf16le_nbytes)) - return NULL; + inode->i_num_streams++; - result = do_inode_add_ads(inode, stream_name_utf16le, - stream_name_utf16le_nbytes); - if (!result) - FREE(stream_name_utf16le); - return result; + return new_strm; } /* - * Add an data alternate stream entry to a WIM inode, where the contents of the - * new stream are specified in a data buffer. The inode must be resolved. + * Create a new blob descriptor for the specified data buffer or use an existing + * blob descriptor in @blob_table for an identical blob, then add a stream of + * the specified type and name to the specified inode and set it to initially + * reference the blob. + * + * @inode + * The inode to which to add the stream + * @stream_type + * The type of the stream being added + * @stream_name + * The name of the stream being added as a null-terminated UTF-16LE string, + * or NO_STREAM_NAME if the stream is unnamed + * @data + * The uncompressed data of the blob + * @size + * The size, in bytes, of the blob data + * @blob_table + * Pointer to the blob table in which the blob needs to be indexed. * - * On success, returns a pointer to the new alternate data stream entry. Note - * that this pointer might become invalid if another ADS entry is added to the - * inode. On failure, returns NULL and sets errno. + * Returns a pointer to the new stream if successfully added, otherwise NULL + * with errno set. */ -struct wim_ads_entry * -inode_add_ads_with_data(struct wim_inode *inode, const tchar *name, - const void *value, size_t size, - struct wim_lookup_table *lookup_table) +struct wim_inode_stream * +inode_add_stream_with_data(struct wim_inode *inode, + int stream_type, const utf16lechar *stream_name, + const void *data, size_t size, + struct blob_table *blob_table) { - struct wim_ads_entry *new_entry; + struct blob_descriptor *blob; + struct wim_inode_stream *strm; - wimlib_assert(inode->i_resolved); - - new_entry = inode_add_ads(inode, name); - if (unlikely(!new_entry)) - return NULL; - - new_entry->lte = new_stream_from_data_buffer(value, size, lookup_table); - if (unlikely(!new_entry->lte)) { - inode_remove_ads(inode, new_entry, NULL); + blob = new_blob_from_data_buffer(data, size, blob_table); + if (!blob) return NULL; - } - return new_entry; + strm = inode_add_stream(inode, stream_type, stream_name, blob); + if (!strm) + blob_decrement_refcnt(blob, blob_table); + return strm; } -/* Remove an alternate data stream from a WIM inode. */ +/* + * Remove a stream from the specified inode and release the reference to the + * blob descriptor, if any. + */ void -inode_remove_ads(struct wim_inode *inode, struct wim_ads_entry *entry, - struct wim_lookup_table *lookup_table) +inode_remove_stream(struct wim_inode *inode, struct wim_inode_stream *strm, + struct blob_table *blob_table) { - struct wim_lookup_table_entry *lte; - unsigned idx = entry - inode->i_ads_entries; + struct blob_descriptor *blob; + unsigned idx = strm - inode->i_streams; - wimlib_assert(idx < inode->i_num_ads); - wimlib_assert(inode->i_resolved); + wimlib_assert(idx < inode->i_num_streams); - lte = entry->lte; - if (lte) - lte_decrement_refcnt(lte, lookup_table); + blob = stream_blob(strm, blob_table); + if (blob) + blob_decrement_refcnt(blob, blob_table); - destroy_ads_entry(entry); + destroy_stream(strm); - memmove(&inode->i_ads_entries[idx], - &inode->i_ads_entries[idx + 1], - (inode->i_num_ads - idx - 1) * sizeof(inode->i_ads_entries[0])); - inode->i_num_ads--; + memmove(&inode->i_streams[idx], + &inode->i_streams[idx + 1], + (inode->i_num_streams - idx - 1) * sizeof(inode->i_streams[0])); + inode->i_num_streams--; } -/* Return true iff the specified inode has at least one named data stream. */ +/* Returns true iff the specified inode has at least one named data stream. */ bool -inode_has_named_stream(const struct wim_inode *inode) +inode_has_named_data_stream(const struct wim_inode *inode) { - for (unsigned i = 0; i < inode->i_num_ads; i++) - if (inode->i_ads_entries[i].stream_name_nbytes) + for (unsigned i = 0; i < inode->i_num_streams; i++) + if (stream_is_named_data_stream(&inode->i_streams[i])) return true; return false; } -/* Set the unnamed stream of a WIM inode, given a data buffer containing the - * stream contents. The inode must be resolved and cannot already have an - * unnamed stream. */ -int -inode_set_unnamed_stream(struct wim_inode *inode, const void *data, size_t len, - struct wim_lookup_table *lookup_table) -{ - wimlib_assert(inode->i_resolved); - wimlib_assert(!inode->i_lte); - - inode->i_lte = new_stream_from_data_buffer(data, len, lookup_table); - if (!inode->i_lte) - return WIMLIB_ERR_NOMEM; - return 0; -} - /* - * Resolve an inode's single-instance streams. + * Resolve an inode's streams. * - * This takes each SHA-1 message digest stored in the inode or one of its ADS - * entries and replaces it with a pointer directly to the appropriate 'struct - * wim_lookup_table_entry' currently inserted into @table to represent the - * single-instance stream having that SHA-1 message digest. + * For each stream, this replaces the SHA-1 message digest of the blob data with + * a pointer to the 'struct blob_descriptor' for the blob. Blob descriptors are + * looked up in @table. * * If @force is %false: - * If any of the needed single-instance streams do not exist in @table, - * return WIMLIB_ERR_RESOURCE_NOT_FOUND and leave the inode unmodified. + * If any of the needed blobs do not exist in @table, return + * WIMLIB_ERR_RESOURCE_NOT_FOUND and leave the inode unmodified. * If @force is %true: - * If any of the needed single-instance streams do not exist in @table, - * allocate new entries for them and insert them into @table. This does - * not, of course, cause these streams to magically exist, but this is + * If any of the needed blobs do not exist in @table, allocate new blob + * descriptors for them and insert them into @table. This does not, of + * course, cause the data of these blobs to magically exist, but this is * needed by the code for extraction from a pipe. * - * If the inode is already resolved, this function does nothing. - * * Returns 0 on success; WIMLIB_ERR_NOMEM if out of memory; or - * WIMLIB_ERR_RESOURCE_NOT_FOUND if @force is %false and at least one - * single-instance stream referenced by the inode was missing. + * WIMLIB_ERR_RESOURCE_NOT_FOUND if @force is %false and at least one blob + * referenced by the inode was missing. */ int -inode_resolve_streams(struct wim_inode *inode, struct wim_lookup_table *table, +inode_resolve_streams(struct wim_inode *inode, struct blob_table *table, bool force) { - const u8 *hash; - struct wim_lookup_table_entry *lte, *ads_lte; - - if (inode->i_resolved) - return 0; - - struct wim_lookup_table_entry *ads_ltes[inode->i_num_ads]; - - /* Resolve the default data stream */ - lte = NULL; - hash = inode->i_hash; - if (!is_zero_hash(hash)) { - lte = lookup_stream(table, hash); - if (!lte) { - if (force) { - lte = new_lookup_table_entry(); - if (!lte) - return WIMLIB_ERR_NOMEM; - copy_hash(lte->hash, hash); - lookup_table_insert(table, lte); - } else { - goto stream_not_found; - } - } - } + struct blob_descriptor *blobs[inode->i_num_streams]; + + for (unsigned i = 0; i < inode->i_num_streams; i++) { + + if (inode->i_streams[i].stream_resolved) + continue; - /* Resolve the alternate data streams */ - for (unsigned i = 0; i < inode->i_num_ads; i++) { - struct wim_ads_entry *cur_entry; + const u8 *hash = stream_hash(&inode->i_streams[i]); + struct blob_descriptor *blob = NULL; - ads_lte = NULL; - cur_entry = &inode->i_ads_entries[i]; - hash = cur_entry->hash; if (!is_zero_hash(hash)) { - ads_lte = lookup_stream(table, hash); - if (!ads_lte) { - if (force) { - ads_lte = new_lookup_table_entry(); - if (!ads_lte) - return WIMLIB_ERR_NOMEM; - copy_hash(ads_lte->hash, hash); - lookup_table_insert(table, ads_lte); - } else { - goto stream_not_found; - } + blob = lookup_blob(table, hash); + if (!blob) { + if (!force) + return blob_not_found_error(inode, hash); + blob = new_blob_descriptor(); + if (!blob) + return WIMLIB_ERR_NOMEM; + copy_hash(blob->hash, hash); + blob_table_insert(table, blob); } } - ads_ltes[i] = ads_lte; + blobs[i] = blob; } - inode->i_lte = lte; - for (unsigned i = 0; i < inode->i_num_ads; i++) - inode->i_ads_entries[i].lte = ads_ltes[i]; - inode->i_resolved = 1; - return 0; -stream_not_found: - return stream_not_found_error(inode, hash); + for (unsigned i = 0; i < inode->i_num_streams; i++) + if (!inode->i_streams[i].stream_resolved) + stream_set_blob(&inode->i_streams[i], blobs[i]); + return 0; } -/* - * Undo the effects of inode_resolve_streams(). - * - * If the inode is not resolved, this function does nothing. - */ +/* Undo the effects of inode_resolve_streams(). */ void inode_unresolve_streams(struct wim_inode *inode) { - if (!inode->i_resolved) - return; + for (unsigned i = 0; i < inode->i_num_streams; i++) { - if (inode->i_lte) - copy_hash(inode->i_hash, inode->i_lte->hash); - else - zero_out_hash(inode->i_hash); - - for (unsigned i = 0; i < inode->i_num_ads; i++) { - if (inode->i_ads_entries[i].lte) - copy_hash(inode->i_ads_entries[i].hash, - inode->i_ads_entries[i].lte->hash); - else - zero_out_hash(inode->i_ads_entries[i].hash); + if (!inode->i_streams[i].stream_resolved) + continue; + + copy_hash(inode->i_streams[i]._stream_hash, + stream_hash(&inode->i_streams[i])); + inode->i_streams[i].stream_resolved = 0; } - inode->i_resolved = 0; } int -stream_not_found_error(const struct wim_inode *inode, const u8 *hash) +blob_not_found_error(const struct wim_inode *inode, const u8 *hash) { if (wimlib_print_errors) { tchar hashstr[SHA1_HASH_SIZE * 2 + 1]; sprint_hash(hash, hashstr); - ERROR("\"%"TS"\": stream not found\n" - " SHA-1 message digest of missing stream:\n" + ERROR("\"%"TS"\": blob not found\n" + " SHA-1 message digest of missing blob:\n" " %"TS"", inode_first_full_path(inode), hashstr); } @@ -502,334 +442,131 @@ stream_not_found_error(const struct wim_inode *inode, const u8 *hash) } /* - * Return the lookup table entry for the specified stream of the inode, or NULL - * if the specified stream is empty or not available. - * - * stream_idx = 0: default data stream - * stream_idx > 0: alternate data stream + * Return the blob descriptor for the specified stream, or NULL if the blob for + * the stream is empty or not available. */ -struct wim_lookup_table_entry * -inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx, - const struct wim_lookup_table *table) +struct blob_descriptor * +stream_blob(const struct wim_inode_stream *strm, const struct blob_table *table) { - if (inode->i_resolved) - return inode_stream_lte_resolved(inode, stream_idx); - if (stream_idx == 0) - return lookup_stream(table, inode->i_hash); - return lookup_stream(table, inode->i_ads_entries[stream_idx - 1].hash); + if (strm->stream_resolved) + return strm->_stream_blob; + else + return lookup_blob(table, strm->_stream_hash); } -/* - * Return the lookup table entry for the unnamed data stream of a *resolved* - * inode, or NULL if the inode's unnamed data stream is empty. Also return the - * 0-based index of the unnamed data stream in *stream_idx_ret. - */ -struct wim_lookup_table_entry * -inode_unnamed_stream_resolved(const struct wim_inode *inode, - unsigned *stream_idx_ret) +/* Return the SHA-1 message digest of the data of the specified stream, or a + * void SHA-1 of all zeroes if the specified stream is empty. */ +const u8 * +stream_hash(const struct wim_inode_stream *strm) { - wimlib_assert(inode->i_resolved); - - *stream_idx_ret = 0; - if (likely(inode->i_lte)) - return inode->i_lte; - - for (unsigned i = 0; i < inode->i_num_ads; i++) { - if (inode->i_ads_entries[i].stream_name_nbytes == 0 && - inode->i_ads_entries[i].lte) - { - *stream_idx_ret = i + 1; - return inode->i_ads_entries[i].lte; - } - } - return NULL; + if (strm->stream_resolved) + return strm->_stream_blob ? strm->_stream_blob->hash : zero_hash; + else + return strm->_stream_hash; } /* - * Return the lookup table entry for the unnamed data stream of an inode, or - * NULL if the inode's unnamed data stream is empty or not available. - * - * Note: this is complicated by the fact that WIMGAPI may put the unnamed data - * stream in an alternate data stream entry rather than in the dentry itself. + * Return the blob descriptor for the unnamed data stream of the inode, or NULL + * if the inode does not have an unnamed data stream, the blob for the inode's + * unnamed data stream is empty, or the blob for the inode's unnamed data stream + * is not available in @blob_table. */ -struct wim_lookup_table_entry * -inode_unnamed_lte(const struct wim_inode *inode, - const struct wim_lookup_table *table) +struct blob_descriptor * +inode_get_blob_for_unnamed_data_stream(const struct wim_inode *inode, + const struct blob_table *blob_table) { - struct wim_lookup_table_entry *lte; - - if (inode->i_resolved) - return inode_unnamed_lte_resolved(inode); + const struct wim_inode_stream *strm; - lte = lookup_stream(table, inode->i_hash); - if (likely(lte)) - return lte; + strm = inode_get_unnamed_stream(inode, STREAM_TYPE_DATA); + if (!strm) + return NULL; - for (unsigned i = 0; i < inode->i_num_ads; i++) { - if (inode->i_ads_entries[i].stream_name_nbytes) - continue; - lte = lookup_stream(table, inode->i_ads_entries[i].hash); - if (lte) - return lte; - } - return NULL; + return stream_blob(strm, blob_table); } -/* Return the SHA-1 message digest of the specified stream of the inode, or a - * void SHA-1 of all zeroes if the specified stream is empty. */ -const u8 * -inode_stream_hash(const struct wim_inode *inode, unsigned stream_idx) +/* Like inode_get_blob_for_unnamed_data_stream(), but assumes the unnamed data + * stream is resolved. */ +struct blob_descriptor * +inode_get_blob_for_unnamed_data_stream_resolved(const struct wim_inode *inode) { - if (inode->i_resolved) { - struct wim_lookup_table_entry *lte; + const struct wim_inode_stream *strm; - lte = inode_stream_lte_resolved(inode, stream_idx); - if (lte) - return lte->hash; - return zero_hash; - } - if (stream_idx == 0) - return inode->i_hash; - return inode->i_ads_entries[stream_idx - 1].hash; + strm = inode_get_unnamed_stream(inode, STREAM_TYPE_DATA); + if (!strm) + return NULL; + + return stream_blob_resolved(strm); } -/* Return the SHA-1 message digest of the unnamed data stream of the inode, or a - * void SHA-1 of all zeroes if the inode's unnamed data stream is empty. */ +/* + * Return the SHA-1 message digest of the unnamed data stream of the inode, or a + * void SHA-1 of all zeroes if the inode does not have an unnamed data stream or + * if the inode's unnamed data stream is empty. + */ const u8 * -inode_unnamed_stream_hash(const struct wim_inode *inode) +inode_get_hash_of_unnamed_data_stream(const struct wim_inode *inode) { - const u8 *hash; + const struct wim_inode_stream *strm; - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - if (inode_stream_name_nbytes(inode, i) == 0) { - hash = inode_stream_hash(inode, i); - if (!is_zero_hash(hash)) - return hash; - } - } - return zero_hash; + strm = inode_get_unnamed_stream(inode, STREAM_TYPE_DATA); + if (!strm) + return zero_hash; + + return stream_hash(strm); } -/* Acquire another reference to each single-instance stream referenced by this - * inode. This is necessary when creating a hard link to this inode. +/* Acquire another reference to each blob referenced by this inode. This is + * necessary when creating a hard link to this inode. * - * The inode must be resolved. */ + * All streams of the inode must be resolved. */ void -inode_ref_streams(struct wim_inode *inode) +inode_ref_blobs(struct wim_inode *inode) { - wimlib_assert(inode->i_resolved); + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct blob_descriptor *blob; - if (inode->i_lte) - inode->i_lte->refcnt++; - for (unsigned i = 0; i < inode->i_num_ads; i++) - if (inode->i_ads_entries[i].lte) - inode->i_ads_entries[i].lte->refcnt++; + blob = stream_blob_resolved(&inode->i_streams[i]); + if (blob) + blob->refcnt++; + } } -/* Drop a reference to each single-instance stream referenced by this inode. - * This is necessary when deleting a hard link to this inode. */ +/* Release a reference to each blob referenced by this inode. This is necessary + * when deleting a hard link to this inode. */ void -inode_unref_streams(struct wim_inode *inode, - struct wim_lookup_table *lookup_table) +inode_unref_blobs(struct wim_inode *inode, struct blob_table *blob_table) { - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - struct wim_lookup_table_entry *lte; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct blob_descriptor *blob; - lte = inode_stream_lte(inode, i, lookup_table); - if (lte) - lte_decrement_refcnt(lte, lookup_table); + blob = stream_blob(&inode->i_streams[i], blob_table); + if (blob) + blob_decrement_refcnt(blob, blob_table); } } /* - * Read the alternate data stream entries of a WIM dentry. - * - * @p: - * Pointer to buffer that starts with the first alternate stream entry. - * - * @inode: - * Inode to load the alternate data streams into. @inode->i_num_ads must - * have been set to the number of alternate data streams that are expected. - * - * @nbytes_remaining_p: - * Number of bytes of data remaining in the buffer pointed to by @p. - * On success this will be updated to point just past the ADS entries. + * Given a blob descriptor, return a pointer to the pointer contained in the + * stream that references it. * - * On success, inode->i_ads_entries is set to an array of `struct - * wim_ads_entry's of length inode->i_num_ads. On failure, @inode is not - * modified. - * - * Return values: - * WIMLIB_ERR_SUCCESS (0) - * WIMLIB_ERR_INVALID_METADATA_RESOURCE - * WIMLIB_ERR_NOMEM + * This is only possible for "unhashed" blobs, which are guaranteed to have only + * one referencing stream, and that reference is guaranteed to be in a resolved + * stream. (It can't be in an unresolved stream, since that would imply the + * hash is known!) */ -int -read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, - size_t *nbytes_remaining_p) -{ - size_t nbytes_remaining = *nbytes_remaining_p; - unsigned num_ads; - struct wim_ads_entry *ads_entries; - int ret; - - BUILD_BUG_ON(sizeof(struct wim_ads_entry_on_disk) != WIM_ADS_ENTRY_DISK_SIZE); - - /* Allocate an array for our in-memory representation of the alternate - * data stream entries. */ - num_ads = inode->i_num_ads; - ads_entries = CALLOC(num_ads, sizeof(inode->i_ads_entries[0])); - if (!ads_entries) - goto out_of_memory; - - /* Read the entries into our newly allocated buffer. */ - for (unsigned i = 0; i < num_ads; i++) { - u64 length; - struct wim_ads_entry *cur_entry; - const struct wim_ads_entry_on_disk *disk_entry = - (const struct wim_ads_entry_on_disk*)p; - - cur_entry = &ads_entries[i]; - ads_entries[i].stream_id = i + 1; - - /* Do we have at least the size of the fixed-length data we know - * need? */ - if (nbytes_remaining < sizeof(struct wim_ads_entry_on_disk)) - goto out_invalid; - - /* Read the length field */ - length = le64_to_cpu(disk_entry->length); - - /* Make sure the length field is neither so small it doesn't - * include all the fixed-length data nor so large it overflows - * the metadata resource buffer. */ - if (length < sizeof(struct wim_ads_entry_on_disk) || - length > nbytes_remaining) - goto out_invalid; - - /* Read the rest of the fixed-length data. */ - - cur_entry->reserved = le64_to_cpu(disk_entry->reserved); - copy_hash(cur_entry->hash, disk_entry->hash); - cur_entry->stream_name_nbytes = le16_to_cpu(disk_entry->stream_name_nbytes); - - /* If stream_name_nbytes != 0, this is a named stream. - * Otherwise this is an unnamed stream, or in some cases (bugs - * in Microsoft's software I guess) a meaningless entry - * distinguished from the real unnamed stream entry, if any, by - * the fact that the real unnamed stream entry has a nonzero - * hash field. */ - if (cur_entry->stream_name_nbytes) { - /* The name is encoded in UTF16-LE, which uses 2-byte - * coding units, so the length of the name had better be - * an even number of bytes... */ - if (cur_entry->stream_name_nbytes & 1) - goto out_invalid; - - /* Add the length of the stream name to get the length - * we actually need to read. Make sure this isn't more - * than the specified length of the entry. */ - if (sizeof(struct wim_ads_entry_on_disk) + - cur_entry->stream_name_nbytes > length) - goto out_invalid; - - cur_entry->stream_name = utf16le_dupz(disk_entry->stream_name, - cur_entry->stream_name_nbytes); - if (!cur_entry->stream_name) - goto out_of_memory; - } else { - /* Mark inode as having weird stream entries. */ - inode->i_canonical_streams = 0; - } - - /* It's expected that the size of every ADS entry is a multiple - * of 8. However, to be safe, I'm allowing the possibility of - * an ADS entry at the very end of the metadata resource ending - * unaligned. So although we still need to increment the input - * pointer by @length to reach the next ADS entry, it's possible - * that less than @length is actually remaining in the metadata - * resource. We should set the remaining bytes to 0 if this - * happens. */ - length = (length + 7) & ~7; - p += length; - if (nbytes_remaining < length) - nbytes_remaining = 0; - else - nbytes_remaining -= length; - } - inode->i_ads_entries = ads_entries; - inode->i_next_stream_id = inode->i_num_ads + 1; - *nbytes_remaining_p = nbytes_remaining; - ret = 0; - goto out; -out_of_memory: - ret = WIMLIB_ERR_NOMEM; - goto out_free_ads_entries; -out_invalid: - ERROR("An alternate data stream entry is invalid"); - ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; -out_free_ads_entries: - if (ads_entries) { - for (unsigned i = 0; i < num_ads; i++) - destroy_ads_entry(&ads_entries[i]); - FREE(ads_entries); - } -out: - return ret; -} - -/* Check a WIM inode for unusual field values. */ -void -check_inode(struct wim_inode *inode, const struct wim_security_data *sd) +struct blob_descriptor ** +retrieve_pointer_to_unhashed_blob(struct blob_descriptor *blob) { - /* Check the security ID. -1 is valid and means "no security - * descriptor". Anything else has to be a valid index into the WIM - * image's security descriptors table. */ - if (inode->i_security_id < -1 || - (inode->i_security_id >= 0 && - inode->i_security_id >= sd->num_entries)) - { - WARNING("\"%"TS"\" has an invalid security ID (%d)", - inode_first_full_path(inode), inode->i_security_id); - inode->i_security_id = -1; - } + wimlib_assert(blob->unhashed); - /* Make sure there is only one unnamed data stream. */ - unsigned num_unnamed_streams = 0; - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - const u8 *hash; - hash = inode_stream_hash(inode, i); - if (inode_stream_name_nbytes(inode, i) == 0 && !is_zero_hash(hash)) - num_unnamed_streams++; - } - if (num_unnamed_streams > 1) { - WARNING("\"%"TS"\" has multiple (%u) unnamed streams", - inode_first_full_path(inode), num_unnamed_streams); - /* We currently don't treat this as an error and will just end - * up using the first unnamed data stream in the inode. */ + struct wim_inode *inode = blob->back_inode; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + if (inode->i_streams[i].stream_id == blob->back_stream_id) { + wimlib_assert(inode->i_streams[i]._stream_blob == blob); + return &inode->i_streams[i]._stream_blob; + } } -} -/* - * Translate a single-instance stream entry into the pointer contained in the - * inode (or ads entry of an inode) that references it. - * - * This is only possible for "unhashed" streams, which are guaranteed to have - * only one reference, and that reference is guaranteed to be in a resolved - * inode. (It can't be in an unresolved inode, since that would imply the hash - * is known!) - */ -struct wim_lookup_table_entry ** -retrieve_lte_pointer(struct wim_lookup_table_entry *lte) -{ - wimlib_assert(lte->unhashed); - struct wim_inode *inode = lte->back_inode; - u32 stream_id = lte->back_stream_id; - if (stream_id == 0) - return &inode->i_lte; - for (unsigned i = 0; i < inode->i_num_ads; i++) - if (inode->i_ads_entries[i].stream_id == stream_id) - return &inode->i_ads_entries[i].lte; wimlib_assert(0); return NULL; } diff --git a/src/inode_fixup.c b/src/inode_fixup.c index 76245623..6929cf7a 100644 --- a/src/inode_fixup.c +++ b/src/inode_fixup.c @@ -53,8 +53,8 @@ inodes_consistent(const struct wim_inode *inode_1, * doesn't link the dentries.) * * For non-buggy WIMs this function will always return true. */ - return hashes_equal(inode_unnamed_stream_hash(inode_1), - inode_unnamed_stream_hash(inode_2)); + return hashes_equal(inode_get_hash_of_unnamed_data_stream(inode_1), + inode_get_hash_of_unnamed_data_stream(inode_2)); } static int diff --git a/src/inode_table.c b/src/inode_table.c index 49dd8217..f6de4e60 100644 --- a/src/inode_table.c +++ b/src/inode_table.c @@ -117,7 +117,7 @@ inode_table_new_dentry(struct wim_inode_table *table, const tchar *name, hlist_for_each_entry(inode, cur, &table->array[pos], i_hlist) { if (inode->i_ino == ino && inode->i_devno == devno) { /* Found; use the existing inode. */ - inode_ref_streams(inode); + inode_ref_blobs(inode); goto have_inode; } } diff --git a/src/integrity.c b/src/integrity.c index c36b755b..c4a6f2e4 100644 --- a/src/integrity.c +++ b/src/integrity.c @@ -279,7 +279,7 @@ out_free_new_table: * chunks of the file). * * This function can optionally re-use entries from an older integrity table. - * To do this, specify old_lookup_table_end and old_table. + * To do this, specify old_blob_table_end and old_table. * * @wim: * WIMStruct for the WIM file. @wim->out_fd must be a seekable descriptor @@ -287,17 +287,17 @@ out_free_new_table: * which the integrity table is to be written. Furthermore, * @wim->hdr.integrity is expected to be a resource entry which will be set * to the integrity table information on success. In addition, if - * @old_lookup_table_end != 0, @wim->hdr.integrity must initially contain + * @old_blob_table_end != 0, @wim->hdr.integrity must initially contain * information about the old integrity table, and @wim->in_fd must be a * seekable descriptor to the original WIM file opened for reading. * - * @new_lookup_table_end: - * The offset of the byte directly following the lookup table in the WIM + * @new_blob_table_end: + * The offset of the byte directly following the blob table in the WIM * being written. * - * @old_lookup_table_end: - * If nonzero, the offset of the byte directly following the old lookup - * table in the WIM. + * @old_blob_table_end: + * If nonzero, the offset of the byte directly following the old blob table + * in the WIM. * * @old_table * Pointer to the old integrity table read into memory, or NULL if not @@ -305,8 +305,8 @@ out_free_new_table: */ int write_integrity_table(WIMStruct *wim, - off_t new_lookup_table_end, - off_t old_lookup_table_end, + off_t new_blob_table_end, + off_t old_blob_table_end, struct integrity_table *old_table) { struct integrity_table *new_table; @@ -314,13 +314,13 @@ write_integrity_table(WIMStruct *wim, u32 new_table_size; DEBUG("Writing integrity table " - "(new_lookup_table_end=%"PRIu64", old_lookup_table_end=%"PRIu64")", - new_lookup_table_end, old_lookup_table_end); + "(new_blob_table_end=%"PRIu64", old_blob_table_end=%"PRIu64")", + new_blob_table_end, old_blob_table_end); - wimlib_assert(old_lookup_table_end <= new_lookup_table_end); + wimlib_assert(old_blob_table_end <= new_blob_table_end); - ret = calculate_integrity_table(&wim->out_fd, new_lookup_table_end, - old_table, old_lookup_table_end, + ret = calculate_integrity_table(&wim->out_fd, new_blob_table_end, + old_table, old_blob_table_end, &new_table, wim->progfunc, wim->progctx); if (ret) return ret; @@ -358,7 +358,7 @@ write_integrity_table(WIMStruct *wim, * * @bytes_to_check: * Number of bytes in the WIM that need to be checked (offset of end of the - * lookup table minus offset of end of the header). + * blob table minus offset of end of the header). * * Returns: * > 0 (WIMLIB_ERR_READ, WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error @@ -442,22 +442,22 @@ check_wim_integrity(WIMStruct *wim) int ret; u64 bytes_to_check; struct integrity_table *table; - u64 end_lookup_table_offset; + u64 end_blob_table_offset; if (!wim_has_integrity_table(wim)) { DEBUG("No integrity information."); return WIM_INTEGRITY_NONEXISTENT; } - end_lookup_table_offset = wim->hdr.lookup_table_reshdr.offset_in_wim + - wim->hdr.lookup_table_reshdr.size_in_wim; + end_blob_table_offset = wim->hdr.blob_table_reshdr.offset_in_wim + + wim->hdr.blob_table_reshdr.size_in_wim; - if (end_lookup_table_offset < WIM_HEADER_DISK_SIZE) { - ERROR("WIM lookup table ends before WIM header ends!"); + if (end_blob_table_offset < WIM_HEADER_DISK_SIZE) { + ERROR("WIM blob table ends before WIM header ends!"); return WIMLIB_ERR_INVALID_INTEGRITY_TABLE; } - bytes_to_check = end_lookup_table_offset - WIM_HEADER_DISK_SIZE; + bytes_to_check = end_blob_table_offset - WIM_HEADER_DISK_SIZE; ret = read_integrity_table(wim, bytes_to_check, &table); if (ret) diff --git a/src/iterate_dir.c b/src/iterate_dir.c index 76bb2b3c..7dc63486 100644 --- a/src/iterate_dir.c +++ b/src/iterate_dir.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2013 Eric Biggers + * Copyright (C) 2013, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -27,9 +27,9 @@ #endif #include "wimlib.h" +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/paths.h" #include "wimlib/security.h" @@ -38,6 +38,49 @@ #include "wimlib/util.h" #include "wimlib/wim.h" +static int +stream_to_wimlib_stream_entry(const struct wim_inode *inode, + const struct wim_inode_stream *strm, + struct wimlib_stream_entry *wstream, + const struct blob_table *blob_table, + int flags) +{ + const struct blob_descriptor *blob; + const u8 *hash; + + if (stream_is_named(strm)) { + size_t dummy; + int ret; + + ret = utf16le_get_tstr(strm->stream_name, + utf16le_len_bytes(strm->stream_name), + &wstream->stream_name, &dummy); + if (ret) + return ret; + } + + blob = stream_blob(strm, blob_table); + if (blob) { + blob_to_wimlib_resource_entry(blob, &wstream->resource); + } else if (!is_zero_hash((hash = stream_hash(strm)))) { + if (flags & WIMLIB_ITERATE_DIR_TREE_FLAG_RESOURCES_NEEDED) + return blob_not_found_error(inode, hash); + copy_hash(wstream->resource.sha1_hash, hash); + wstream->resource.is_missing = 1; + } + return 0; +} + +static int +get_default_stream_type(const struct wim_inode *inode) +{ + if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) + return STREAM_TYPE_EFSRPC_RAW_DATA; + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) + return STREAM_TYPE_REPARSE_POINT; + return STREAM_TYPE_DATA; +} + static int init_wimlib_dentry(struct wimlib_dir_entry *wdentry, struct wim_dentry *dentry, WIMStruct *wim, int flags) @@ -45,8 +88,7 @@ init_wimlib_dentry(struct wimlib_dir_entry *wdentry, struct wim_dentry *dentry, int ret; size_t dummy; const struct wim_inode *inode = dentry->d_inode; - struct wim_lookup_table_entry *lte; - const u8 *hash; + const struct wim_inode_stream *strm; struct wimlib_unix_data unix_data; ret = utf16le_get_tstr(dentry->file_name, dentry->file_name_nbytes, @@ -88,40 +130,28 @@ init_wimlib_dentry(struct wimlib_dir_entry *wdentry, struct wim_dentry *dentry, wdentry->unix_rdev = unix_data.rdev; } - lte = inode_unnamed_lte(inode, wim->lookup_table); - if (lte) { - lte_to_wimlib_resource_entry(lte, &wdentry->streams[0].resource); - } else if (!is_zero_hash(hash = inode_unnamed_stream_hash(inode))) { - if (flags & WIMLIB_ITERATE_DIR_TREE_FLAG_RESOURCES_NEEDED) - return stream_not_found_error(inode, hash); - copy_hash(wdentry->streams[0].resource.sha1_hash, hash); - wdentry->streams[0].resource.is_missing = 1; + strm = inode_get_unnamed_stream(inode, get_default_stream_type(inode)); + if (strm) { + ret = stream_to_wimlib_stream_entry(inode, strm, + &wdentry->streams[0], + wim->blob_table, flags); + if (ret) + return ret; } - for (unsigned i = 0; i < inode->i_num_ads; i++) { - if (!inode->i_ads_entries[i].stream_name_nbytes) + for (unsigned i = 0; i < inode->i_num_streams; i++) { + + strm = &inode->i_streams[i]; + + if (!stream_is_named_data_stream(strm)) continue; - lte = inode_stream_lte(inode, i + 1, wim->lookup_table); - wdentry->num_named_streams++; - if (lte) { - lte_to_wimlib_resource_entry(lte, &wdentry->streams[ - wdentry->num_named_streams].resource); - } else if (!is_zero_hash(hash = inode_stream_hash(inode, i + 1))) { - if (flags & WIMLIB_ITERATE_DIR_TREE_FLAG_RESOURCES_NEEDED) - return stream_not_found_error(inode, hash); - copy_hash(wdentry->streams[ - wdentry->num_named_streams].resource.sha1_hash, hash); - wdentry->streams[ - wdentry->num_named_streams].resource.is_missing = 1; - } - size_t dummy; + wdentry->num_named_streams++; - ret = utf16le_get_tstr(inode->i_ads_entries[i].stream_name, - inode->i_ads_entries[i].stream_name_nbytes, - &wdentry->streams[ - wdentry->num_named_streams].stream_name, - &dummy); + ret = stream_to_wimlib_stream_entry(inode, strm, + &wdentry->streams[ + wdentry->num_named_streams], + wim->blob_table, flags); if (ret) return ret; } @@ -149,7 +179,7 @@ do_iterate_dir_tree(WIMStruct *wim, wdentry = CALLOC(1, sizeof(struct wimlib_dir_entry) + - (1 + dentry->d_inode->i_num_ads) * + (1 + dentry->d_inode->i_num_streams) * sizeof(struct wimlib_stream_entry)); if (wdentry == NULL) goto out; diff --git a/src/lookup_table.c b/src/lookup_table.c deleted file mode 100644 index 26f231e4..00000000 --- a/src/lookup_table.c +++ /dev/null @@ -1,1434 +0,0 @@ -/* - * lookup_table.c - * - * Lookup table, implemented as a hash table, that maps SHA1 message digests to - * data streams; plus code to read and write the corresponding on-disk data. - */ - -/* - * Copyright (C) 2012, 2013, 2014 Eric Biggers - * - * This file is free software; you can redistribute it and/or modify it under - * the terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) any - * later version. - * - * This file is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this file; if not, see http://www.gnu.org/licenses/. - */ - -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#include -#include -#include /* for unlink() */ - -#include "wimlib/assert.h" -#include "wimlib/endianness.h" -#include "wimlib/error.h" -#include "wimlib/lookup_table.h" -#include "wimlib/metadata.h" -#include "wimlib/ntfs_3g.h" -#include "wimlib/resource.h" -#include "wimlib/unaligned.h" -#include "wimlib/util.h" -#include "wimlib/write.h" - -/* WIM lookup table: - * - * This is a logical mapping from SHA1 message digests to the data streams - * contained in a WIM. - * - * Here it is implemented as a hash table. - * - * Note: Everything will break horribly if there is a SHA1 collision. - */ -struct wim_lookup_table { - struct hlist_head *array; - size_t num_entries; - size_t capacity; -}; - -struct wim_lookup_table * -new_lookup_table(size_t capacity) -{ - struct wim_lookup_table *table; - struct hlist_head *array; - - table = MALLOC(sizeof(struct wim_lookup_table)); - if (table == NULL) - goto oom; - - array = CALLOC(capacity, sizeof(array[0])); - if (array == NULL) { - FREE(table); - goto oom; - } - - table->num_entries = 0; - table->capacity = capacity; - table->array = array; - return table; - -oom: - ERROR("Failed to allocate memory for lookup table " - "with capacity %zu", capacity); - return NULL; -} - -static int -do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore) -{ - free_lookup_table_entry(entry); - return 0; -} - -void -free_lookup_table(struct wim_lookup_table *table) -{ - if (table) { - for_lookup_table_entry(table, do_free_lookup_table_entry, NULL); - FREE(table->array); - FREE(table); - } -} - -struct wim_lookup_table_entry * -new_lookup_table_entry(void) -{ - struct wim_lookup_table_entry *lte; - - lte = CALLOC(1, sizeof(struct wim_lookup_table_entry)); - if (lte == NULL) - return NULL; - - lte->refcnt = 1; - - /* lte->resource_location = RESOURCE_NONEXISTENT */ - BUILD_BUG_ON(RESOURCE_NONEXISTENT != 0); - - return lte; -} - -struct wim_lookup_table_entry * -clone_lookup_table_entry(const struct wim_lookup_table_entry *old) -{ - struct wim_lookup_table_entry *new; - - new = memdup(old, sizeof(struct wim_lookup_table_entry)); - if (new == NULL) - return NULL; - - switch (new->resource_location) { - case RESOURCE_IN_WIM: - list_add(&new->rspec_node, &new->rspec->stream_list); - break; - - case RESOURCE_IN_FILE_ON_DISK: -#ifdef __WIN32__ - case RESOURCE_IN_WINNT_FILE_ON_DISK: - case RESOURCE_WIN32_ENCRYPTED: -#endif -#ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: - BUILD_BUG_ON((void*)&old->file_on_disk != - (void*)&old->staging_file_name); -#endif - new->file_on_disk = TSTRDUP(old->file_on_disk); - if (new->file_on_disk == NULL) - goto out_free; - break; - case RESOURCE_IN_ATTACHED_BUFFER: - new->attached_buffer = memdup(old->attached_buffer, old->size); - if (new->attached_buffer == NULL) - goto out_free; - break; -#ifdef WITH_NTFS_3G - case RESOURCE_IN_NTFS_VOLUME: - if (old->ntfs_loc) { - struct ntfs_location *loc; - loc = memdup(old->ntfs_loc, sizeof(struct ntfs_location)); - if (loc == NULL) - goto out_free; - loc->path = NULL; - loc->stream_name = NULL; - new->ntfs_loc = loc; - loc->path = STRDUP(old->ntfs_loc->path); - if (loc->path == NULL) - goto out_free; - if (loc->stream_name_nchars != 0) { - loc->stream_name = memdup(old->ntfs_loc->stream_name, - loc->stream_name_nchars * 2); - if (loc->stream_name == NULL) - goto out_free; - } - } - break; -#endif - default: - break; - } - return new; - -out_free: - free_lookup_table_entry(new); - return NULL; -} - -void -lte_put_resource(struct wim_lookup_table_entry *lte) -{ - switch (lte->resource_location) { - case RESOURCE_IN_WIM: - list_del(<e->rspec_node); - if (list_empty(<e->rspec->stream_list)) - FREE(lte->rspec); - break; - case RESOURCE_IN_FILE_ON_DISK: -#ifdef __WIN32__ - case RESOURCE_IN_WINNT_FILE_ON_DISK: - case RESOURCE_WIN32_ENCRYPTED: -#endif -#ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: - BUILD_BUG_ON((void*)<e->file_on_disk != - (void*)<e->staging_file_name); -#endif - case RESOURCE_IN_ATTACHED_BUFFER: - BUILD_BUG_ON((void*)<e->file_on_disk != - (void*)<e->attached_buffer); - FREE(lte->file_on_disk); - break; -#ifdef WITH_NTFS_3G - case RESOURCE_IN_NTFS_VOLUME: - if (lte->ntfs_loc) { - FREE(lte->ntfs_loc->path); - FREE(lte->ntfs_loc->stream_name); - FREE(lte->ntfs_loc); - } - break; -#endif - default: - break; - } -} - -void -free_lookup_table_entry(struct wim_lookup_table_entry *lte) -{ - if (lte) { - lte_put_resource(lte); - FREE(lte); - } -} - -/* Should this stream be retained even if it has no references? */ -static bool -should_retain_lte(const struct wim_lookup_table_entry *lte) -{ - return lte->resource_location == RESOURCE_IN_WIM; -} - -static void -finalize_lte(struct wim_lookup_table_entry *lte) -{ - if (!should_retain_lte(lte)) - free_lookup_table_entry(lte); -} - -/* - * Decrements the reference count of the single-instance stream @lte, which must - * be inserted in the stream lookup table @table. - * - * If the stream's reference count reaches 0, we may unlink it from @table and - * free it. However, we retain streams with 0 reference count that originated - * from WIM files (RESOURCE_IN_WIM). We do this for two reasons: - * - * 1. This prevents information about valid streams in a WIM file --- streams - * which will continue to be present after appending to the WIM file --- from - * being lost merely because we dropped all references to them. - * - * 2. Stream reference counts we read from WIM files can't be trusted. It's - * possible that a WIM has reference counts that are too low; WIMGAPI - * sometimes creates WIMs where this is the case. It's also possible that - * streams have been referenced from an external WIM; those streams can - * potentially have any reference count at all, either lower or higher than - * would be expected for this WIM ("this WIM" meaning the owner of @table) if - * it were a standalone WIM. - * - * So we can't take the reference counts too seriously. But at least, we do - * recalculate by default when writing a new WIM file. - */ -void -lte_decrement_refcnt(struct wim_lookup_table_entry *lte, - struct wim_lookup_table *table) -{ - if (unlikely(lte->refcnt == 0)) /* See comment above */ - return; - - if (--lte->refcnt == 0) { - if (lte->unhashed) { - list_del(<e->unhashed_list); - #ifdef WITH_FUSE - /* If the stream has been extracted to a staging file - * for a FUSE mount, unlink the staging file. (Note - * that there still may be open file descriptors to it.) - * */ - if (lte->resource_location == RESOURCE_IN_STAGING_FILE) - unlinkat(lte->staging_dir_fd, - lte->staging_file_name, 0); - #endif - } else { - if (!should_retain_lte(lte)) - lookup_table_unlink(table, lte); - } - - /* If FUSE mounts are enabled, we don't actually free the entry - * until the last file descriptor has been closed by - * lte_decrement_num_opened_fds(). */ -#ifdef WITH_FUSE - if (lte->num_opened_fds == 0) -#endif - finalize_lte(lte); - } -} - -#ifdef WITH_FUSE -void -lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte) -{ - wimlib_assert(lte->num_opened_fds != 0); - - if (--lte->num_opened_fds == 0 && lte->refcnt == 0) - finalize_lte(lte); -} -#endif - -static void -lookup_table_insert_raw(struct wim_lookup_table *table, - struct wim_lookup_table_entry *lte) -{ - size_t i = lte->hash_short % table->capacity; - - hlist_add_head(<e->hash_list, &table->array[i]); -} - -static void -enlarge_lookup_table(struct wim_lookup_table *table) -{ - size_t old_capacity, new_capacity; - struct hlist_head *old_array, *new_array; - struct wim_lookup_table_entry *lte; - struct hlist_node *cur, *tmp; - size_t i; - - old_capacity = table->capacity; - new_capacity = old_capacity * 2; - new_array = CALLOC(new_capacity, sizeof(struct hlist_head)); - if (new_array == NULL) - return; - old_array = table->array; - table->array = new_array; - table->capacity = new_capacity; - - for (i = 0; i < old_capacity; i++) { - hlist_for_each_entry_safe(lte, cur, tmp, &old_array[i], hash_list) { - hlist_del(<e->hash_list); - lookup_table_insert_raw(table, lte); - } - } - FREE(old_array); -} - -/* Inserts an entry into the lookup table. */ -void -lookup_table_insert(struct wim_lookup_table *table, - struct wim_lookup_table_entry *lte) -{ - lookup_table_insert_raw(table, lte); - if (++table->num_entries > table->capacity) - enlarge_lookup_table(table); -} - -/* Unlinks a lookup table entry from the table; does not free it. */ -void -lookup_table_unlink(struct wim_lookup_table *table, - struct wim_lookup_table_entry *lte) -{ - wimlib_assert(!lte->unhashed); - wimlib_assert(table->num_entries != 0); - - hlist_del(<e->hash_list); - table->num_entries--; -} - -/* Given a SHA1 message digest, return the corresponding entry in the WIM's - * lookup table, or NULL if there is none. */ -struct wim_lookup_table_entry * -lookup_stream(const struct wim_lookup_table *table, const u8 hash[]) -{ - size_t i; - struct wim_lookup_table_entry *lte; - struct hlist_node *pos; - - i = load_size_t_unaligned(hash) % table->capacity; - hlist_for_each_entry(lte, pos, &table->array[i], hash_list) - if (hashes_equal(hash, lte->hash)) - return lte; - return NULL; -} - -/* Calls a function on all the entries in the WIM lookup table. Stop early and - * return nonzero if any call to the function returns nonzero. */ -int -for_lookup_table_entry(struct wim_lookup_table *table, - int (*visitor)(struct wim_lookup_table_entry *, void *), - void *arg) -{ - struct wim_lookup_table_entry *lte; - struct hlist_node *pos, *tmp; - int ret; - - for (size_t i = 0; i < table->capacity; i++) { - hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i], - hash_list) - { - ret = visitor(lte, arg); - if (ret) - return ret; - } - } - return 0; -} - -/* qsort() callback that sorts streams (represented by `struct - * wim_lookup_table_entry's) into an order optimized for reading. - * - * Sorting is done primarily by resource location, then secondarily by a - * per-resource location order. For example, resources in WIM files are sorted - * primarily by part number, then secondarily by offset, as to implement optimal - * reading of either a standalone or split WIM. */ -int -cmp_streams_by_sequential_order(const void *p1, const void *p2) -{ - const struct wim_lookup_table_entry *lte1, *lte2; - int v; - WIMStruct *wim1, *wim2; - - lte1 = *(const struct wim_lookup_table_entry**)p1; - lte2 = *(const struct wim_lookup_table_entry**)p2; - - v = (int)lte1->resource_location - (int)lte2->resource_location; - - /* Different resource locations? */ - if (v) - return v; - - switch (lte1->resource_location) { - case RESOURCE_IN_WIM: - wim1 = lte1->rspec->wim; - wim2 = lte2->rspec->wim; - - /* Different (possibly split) WIMs? */ - if (wim1 != wim2) { - v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN); - if (v) - return v; - } - - /* Different part numbers in the same WIM? */ - v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number; - if (v) - return v; - - if (lte1->rspec->offset_in_wim != lte2->rspec->offset_in_wim) - return cmp_u64(lte1->rspec->offset_in_wim, - lte2->rspec->offset_in_wim); - - return cmp_u64(lte1->offset_in_res, lte2->offset_in_res); - - case RESOURCE_IN_FILE_ON_DISK: -#ifdef WITH_FUSE - case RESOURCE_IN_STAGING_FILE: -#endif -#ifdef __WIN32__ - case RESOURCE_IN_WINNT_FILE_ON_DISK: - case RESOURCE_WIN32_ENCRYPTED: -#endif - /* Compare files by path: just a heuristic that will place files - * in the same directory next to each other. */ - return tstrcmp(lte1->file_on_disk, lte2->file_on_disk); -#ifdef WITH_NTFS_3G - case RESOURCE_IN_NTFS_VOLUME: - return tstrcmp(lte1->ntfs_loc->path, lte2->ntfs_loc->path); -#endif - default: - /* No additional sorting order defined for this resource - * location (e.g. RESOURCE_IN_ATTACHED_BUFFER); simply compare - * everything equal to each other. */ - return 0; - } -} - -int -sort_stream_list(struct list_head *stream_list, - size_t list_head_offset, - int (*compar)(const void *, const void*)) -{ - struct list_head *cur; - struct wim_lookup_table_entry **array; - size_t i; - size_t array_size; - size_t num_streams = 0; - - list_for_each(cur, stream_list) - num_streams++; - - if (num_streams <= 1) - return 0; - - array_size = num_streams * sizeof(array[0]); - array = MALLOC(array_size); - if (array == NULL) - return WIMLIB_ERR_NOMEM; - - cur = stream_list->next; - for (i = 0; i < num_streams; i++) { - array[i] = (struct wim_lookup_table_entry*)((u8*)cur - - list_head_offset); - cur = cur->next; - } - - qsort(array, num_streams, sizeof(array[0]), compar); - - INIT_LIST_HEAD(stream_list); - for (i = 0; i < num_streams; i++) { - list_add_tail((struct list_head*) - ((u8*)array[i] + list_head_offset), - stream_list); - } - FREE(array); - return 0; -} - -/* Sort the specified list of streams in an order optimized for reading. */ -int -sort_stream_list_by_sequential_order(struct list_head *stream_list, - size_t list_head_offset) -{ - return sort_stream_list(stream_list, list_head_offset, - cmp_streams_by_sequential_order); -} - - -static int -add_lte_to_array(struct wim_lookup_table_entry *lte, - void *_pp) -{ - struct wim_lookup_table_entry ***pp = _pp; - *(*pp)++ = lte; - return 0; -} - -/* Iterate through the lookup table entries, but first sort them by stream - * offset in the WIM. Caution: this is intended to be used when the stream - * offset field has actually been set. */ -int -for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, - int (*visitor)(struct wim_lookup_table_entry *, - void *), - void *arg) -{ - struct wim_lookup_table_entry **lte_array, **p; - size_t num_streams = table->num_entries; - int ret; - - lte_array = MALLOC(num_streams * sizeof(lte_array[0])); - if (!lte_array) - return WIMLIB_ERR_NOMEM; - p = lte_array; - for_lookup_table_entry(table, add_lte_to_array, &p); - - wimlib_assert(p == lte_array + num_streams); - - qsort(lte_array, num_streams, sizeof(lte_array[0]), - cmp_streams_by_sequential_order); - ret = 0; - for (size_t i = 0; i < num_streams; i++) { - ret = visitor(lte_array[i], arg); - if (ret) - break; - } - FREE(lte_array); - return ret; -} - -/* On-disk format of a WIM lookup table entry (stream entry). */ -struct wim_lookup_table_entry_disk { - /* Size, offset, and flags of the stream. */ - struct wim_reshdr_disk reshdr; - - /* Which part of the split WIM this stream is in; indexed from 1. */ - le16 part_number; - - /* Reference count of this stream over all WIM images. (But see comment - * above lte_decrement_refcnt().) */ - le32 refcnt; - - /* SHA1 message digest of the uncompressed data of this stream, or - * optionally all zeroes if this stream is of zero length. */ - u8 hash[SHA1_HASH_SIZE]; -} _packed_attribute; - -#define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50 - -/* Given a nonempty run of consecutive lookup table entries with the SOLID flag - * set, count how many specify resources (as opposed to streams within those - * resources). - * - * Returns the resulting count. */ -static size_t -count_solid_resources(const struct wim_lookup_table_entry_disk *entries, size_t max) -{ - size_t count = 0; - do { - struct wim_reshdr reshdr; - - get_wim_reshdr(&(entries++)->reshdr, &reshdr); - - if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) { - /* Run was terminated by a stand-alone stream entry. */ - break; - } - - if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) { - /* This is a resource entry. */ - count++; - } - } while (--max); - return count; -} - -/* - * Given a run of consecutive lookup table entries with the SOLID flag set and - * having @num_rspecs resource entries, load resource information from them into - * the resource specifications in the @rspecs array. - * - * Returns 0 on success, or a nonzero error code on failure. - */ -static int -do_load_solid_info(WIMStruct *wim, struct wim_resource_spec **rspecs, - size_t num_rspecs, - const struct wim_lookup_table_entry_disk *entries) -{ - for (size_t i = 0; i < num_rspecs; i++) { - struct wim_reshdr reshdr; - struct alt_chunk_table_header_disk hdr; - struct wim_resource_spec *rspec; - int ret; - - /* Advance to next resource entry. */ - - do { - get_wim_reshdr(&(entries++)->reshdr, &reshdr); - } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER); - - rspec = rspecs[i]; - - wim_res_hdr_to_spec(&reshdr, wim, rspec); - - /* For solid resources, the uncompressed size, compression type, - * and chunk size are stored in the resource itself, not in the - * lookup table. */ - - ret = full_pread(&wim->in_fd, &hdr, - sizeof(hdr), reshdr.offset_in_wim); - if (ret) { - ERROR("Failed to read header of solid resource " - "(offset_in_wim=%"PRIu64")", - reshdr.offset_in_wim); - return ret; - } - - rspec->uncompressed_size = le64_to_cpu(hdr.res_usize); - - /* Compression format numbers must be the same as in - * WIMGAPI to be compatible here. */ - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2); - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3); - rspec->compression_type = le32_to_cpu(hdr.compression_format); - - rspec->chunk_size = le32_to_cpu(hdr.chunk_size); - - DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" " - "(%"TS"/%"PRIu32") @ +%"PRIu64"", - i + 1, num_rspecs, - rspec->uncompressed_size, - rspec->size_in_wim, - wimlib_get_compression_type_string(rspec->compression_type), - rspec->chunk_size, - rspec->offset_in_wim); - - } - return 0; -} - -/* - * Given a nonempty run of consecutive lookup table entries with the SOLID flag - * set, allocate a 'struct wim_resource_spec' for each resource within that run. - * - * Returns 0 on success, or a nonzero error code on failure. - * Returns the pointers and count in *rspecs_ret and *num_rspecs_ret. - */ -static int -load_solid_info(WIMStruct *wim, - const struct wim_lookup_table_entry_disk *entries, - size_t num_remaining_entries, - struct wim_resource_spec ***rspecs_ret, - size_t *num_rspecs_ret) -{ - size_t num_rspecs; - struct wim_resource_spec **rspecs; - size_t i; - int ret; - - num_rspecs = count_solid_resources(entries, num_remaining_entries); - rspecs = CALLOC(num_rspecs, sizeof(rspecs[0])); - if (!rspecs) - return WIMLIB_ERR_NOMEM; - - for (i = 0; i < num_rspecs; i++) { - rspecs[i] = MALLOC(sizeof(struct wim_resource_spec)); - if (!rspecs[i]) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_rspecs; - } - } - - ret = do_load_solid_info(wim, rspecs, num_rspecs, entries); - if (ret) - goto out_free_rspecs; - - *rspecs_ret = rspecs; - *num_rspecs_ret = num_rspecs; - return 0; - -out_free_rspecs: - for (i = 0; i < num_rspecs; i++) - FREE(rspecs[i]); - FREE(rspecs); - return ret; -} - -/* Given a 'struct wim_lookup_table_entry' allocated for a stream entry with the - * SOLID flag set, try to bind it to resource in the current solid run. */ -static int -bind_stream_to_solid_resource(const struct wim_reshdr *reshdr, - struct wim_lookup_table_entry *stream, - struct wim_resource_spec **rspecs, - size_t num_rspecs) -{ - u64 offset = reshdr->offset_in_wim; - - /* XXX: This linear search will be slow in the degenerate case where the - * number of solid resources in the run is huge. */ - stream->size = reshdr->size_in_wim; - stream->flags = reshdr->flags; - for (size_t i = 0; i < num_rspecs; i++) { - if (offset + stream->size <= rspecs[i]->uncompressed_size) { - stream->offset_in_res = offset; - lte_bind_wim_resource_spec(stream, rspecs[i]); - return 0; - } - offset -= rspecs[i]->uncompressed_size; - } - ERROR("Stream could not be assigned to a solid resource"); - return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; -} - -static void -free_solid_rspecs(struct wim_resource_spec **rspecs, size_t num_rspecs) -{ - if (rspecs) { - for (size_t i = 0; i < num_rspecs; i++) - if (list_empty(&rspecs[i]->stream_list)) - FREE(rspecs[i]); - FREE(rspecs); - } -} - -static int -cmp_streams_by_offset_in_res(const void *p1, const void *p2) -{ - const struct wim_lookup_table_entry *lte1, *lte2; - - lte1 = *(const struct wim_lookup_table_entry**)p1; - lte2 = *(const struct wim_lookup_table_entry**)p2; - - return cmp_u64(lte1->offset_in_res, lte2->offset_in_res); -} - -/* Validate the size and location of a WIM resource. */ -static int -validate_resource(struct wim_resource_spec *rspec) -{ - struct wim_lookup_table_entry *lte; - bool out_of_order; - u64 expected_next_offset; - int ret; - - /* Verify that the resource itself has a valid offset and size. */ - if (rspec->offset_in_wim + rspec->size_in_wim < rspec->size_in_wim) - goto invalid_due_to_overflow; - - /* Verify that each stream in the resource has a valid offset and size. - */ - expected_next_offset = 0; - out_of_order = false; - list_for_each_entry(lte, &rspec->stream_list, rspec_node) { - if (lte->offset_in_res + lte->size < lte->size || - lte->offset_in_res + lte->size > rspec->uncompressed_size) - goto invalid_due_to_overflow; - - if (lte->offset_in_res >= expected_next_offset) - expected_next_offset = lte->offset_in_res + lte->size; - else - out_of_order = true; - } - - /* If the streams were not located at strictly increasing positions (not - * allowing for overlap), sort them. Then make sure that none overlap. - */ - if (out_of_order) { - ret = sort_stream_list(&rspec->stream_list, - offsetof(struct wim_lookup_table_entry, - rspec_node), - cmp_streams_by_offset_in_res); - if (ret) - return ret; - - expected_next_offset = 0; - list_for_each_entry(lte, &rspec->stream_list, rspec_node) { - if (lte->offset_in_res >= expected_next_offset) - expected_next_offset = lte->offset_in_res + lte->size; - else - goto invalid_due_to_overlap; - } - } - - return 0; - -invalid_due_to_overflow: - ERROR("Invalid resource entry (offset overflow)"); - return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - -invalid_due_to_overlap: - ERROR("Invalid resource entry (streams in solid resource overlap)"); - return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; -} - -static int -finish_solid_rspecs(struct wim_resource_spec **rspecs, size_t num_rspecs) -{ - int ret = 0; - for (size_t i = 0; i < num_rspecs; i++) { - ret = validate_resource(rspecs[i]); - if (ret) - break; - } - free_solid_rspecs(rspecs, num_rspecs); - return ret; -} - -/* - * Reads the lookup table from a WIM file. Usually, each entry specifies a - * stream that the WIM file contains, along with its location and SHA1 message - * digest. - * - * Saves lookup table entries for non-metadata streams in a hash table (set to - * wim->lookup_table), and saves the metadata entry for each image in a special - * per-image location (the wim->image_metadata array). - * - * This works for both version WIM_VERSION_DEFAULT (68864) and version - * WIM_VERSION_SOLID (3584) WIMs. In the latter, a consecutive run of lookup - * table entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid - * run". A solid run logically contains zero or more resources, each of which - * logically contains zero or more streams. Physically, in such a run, a - * "lookup table entry" with uncompressed size SOLID_RESOURCE_MAGIC_NUMBER - * (0x100000000) specifies a resource, whereas any other entry specifies a - * stream. Within such a run, stream entries and resource entries need not be - * in any particular order, except that the order of the resource entries is - * important, as it affects how streams are assigned to resources. See the code - * for details. - * - * Possible return values: - * WIMLIB_ERR_SUCCESS (0) - * WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY - * WIMLIB_ERR_NOMEM - * - * Or an error code caused by failure to read the lookup table from the WIM - * file. - */ -int -read_wim_lookup_table(WIMStruct *wim) -{ - int ret; - size_t num_entries; - void *buf = NULL; - struct wim_lookup_table *table = NULL; - struct wim_lookup_table_entry *cur_entry = NULL; - size_t num_duplicate_entries = 0; - size_t num_wrong_part_entries = 0; - u32 image_index = 0; - struct wim_resource_spec **cur_solid_rspecs = NULL; - size_t cur_num_solid_rspecs = 0; - - DEBUG("Reading lookup table."); - - /* Sanity check: lookup table entries are 50 bytes each. */ - BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) != - WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE); - - /* Calculate the number of entries in the lookup table. */ - num_entries = wim->hdr.lookup_table_reshdr.uncompressed_size / - sizeof(struct wim_lookup_table_entry_disk); - - /* Read the lookup table into a buffer. */ - ret = wim_reshdr_to_data(&wim->hdr.lookup_table_reshdr, wim, &buf); - if (ret) - goto out; - - /* Allocate a hash table to map SHA1 message digests into stream - * specifications. This is the in-memory "lookup table". */ - table = new_lookup_table(num_entries * 2 + 1); - if (!table) - goto oom; - - /* Allocate and initalize stream entries ('struct - * wim_lookup_table_entry's) from the raw lookup table buffer. Each of - * these entries will point to a 'struct wim_resource_spec' that - * describes the underlying resource. In WIMs with version number - * WIM_VERSION_SOLID, a resource may contain multiple streams. - */ - for (size_t i = 0; i < num_entries; i++) { - const struct wim_lookup_table_entry_disk *disk_entry = - &((const struct wim_lookup_table_entry_disk*)buf)[i]; - struct wim_reshdr reshdr; - u16 part_number; - - /* Get the resource header */ - get_wim_reshdr(&disk_entry->reshdr, &reshdr); - - DEBUG("reshdr: size_in_wim=%"PRIu64", " - "uncompressed_size=%"PRIu64", " - "offset_in_wim=%"PRIu64", " - "flags=0x%02x", - reshdr.size_in_wim, reshdr.uncompressed_size, - reshdr.offset_in_wim, reshdr.flags); - - /* Ignore SOLID flag if it isn't supposed to be used in this WIM - * version. */ - if (wim->hdr.wim_version == WIM_VERSION_DEFAULT) - reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID; - - /* Allocate a new 'struct wim_lookup_table_entry'. */ - cur_entry = new_lookup_table_entry(); - if (!cur_entry) - goto oom; - - /* Get the part number, reference count, and hash. */ - part_number = le16_to_cpu(disk_entry->part_number); - cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt); - copy_hash(cur_entry->hash, disk_entry->hash); - - if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) { - - /* SOLID entry */ - - if (!cur_solid_rspecs) { - /* Starting new run */ - ret = load_solid_info(wim, disk_entry, - num_entries - i, - &cur_solid_rspecs, - &cur_num_solid_rspecs); - if (ret) - goto out; - } - - if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) { - /* Resource entry, not stream entry */ - goto free_cur_entry_and_continue; - } - - /* Stream entry */ - - ret = bind_stream_to_solid_resource(&reshdr, - cur_entry, - cur_solid_rspecs, - cur_num_solid_rspecs); - if (ret) - goto out; - - } else { - /* Normal stream/resource entry; SOLID not set. */ - - struct wim_resource_spec *rspec; - - if (unlikely(cur_solid_rspecs)) { - /* This entry terminated a solid run. */ - ret = finish_solid_rspecs(cur_solid_rspecs, - cur_num_solid_rspecs); - cur_solid_rspecs = NULL; - if (ret) - goto out; - } - - /* How to handle an uncompressed resource with its - * uncompressed size different from its compressed size? - * - * Based on a simple test, WIMGAPI seems to handle this - * as follows: - * - * if (size_in_wim > uncompressed_size) { - * Ignore uncompressed_size; use size_in_wim - * instead. - * } else { - * Honor uncompressed_size, but treat the part of - * the file data above size_in_wim as all zeros. - * } - * - * So we will do the same. */ - if (unlikely(!(reshdr.flags & - WIM_RESHDR_FLAG_COMPRESSED) && - (reshdr.size_in_wim > - reshdr.uncompressed_size))) - { - reshdr.uncompressed_size = reshdr.size_in_wim; - } - - /* Set up a resource specification for this stream. */ - - rspec = MALLOC(sizeof(struct wim_resource_spec)); - if (!rspec) - goto oom; - - wim_res_hdr_to_spec(&reshdr, wim, rspec); - - cur_entry->offset_in_res = 0; - cur_entry->size = reshdr.uncompressed_size; - cur_entry->flags = reshdr.flags; - - lte_bind_wim_resource_spec(cur_entry, rspec); - } - - /* cur_entry is now a stream bound to a resource. */ - - /* Ignore entries with all zeroes in the hash field. */ - if (is_zero_hash(cur_entry->hash)) - goto free_cur_entry_and_continue; - - /* Verify that the part number matches that of the underlying - * WIM file. */ - if (part_number != wim->hdr.part_number) { - num_wrong_part_entries++; - goto free_cur_entry_and_continue; - } - - if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) { - - /* Lookup table entry for a metadata resource. */ - - /* Metadata entries with no references must be ignored. - * See, for example, the WinPE WIMs from the WAIK v2.1. - */ - if (cur_entry->refcnt == 0) - goto free_cur_entry_and_continue; - - if (cur_entry->refcnt != 1) { - /* We don't currently support this case due to - * the complications of multiple images sharing - * the same metadata resource or a metadata - * resource also being referenced by files. */ - ERROR("Found metadata resource with refcnt != 1"); - ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY; - goto out; - } - - if (wim->hdr.part_number != 1) { - WARNING("Ignoring metadata resource found in a " - "non-first part of the split WIM"); - goto free_cur_entry_and_continue; - } - - /* The number of entries in the lookup table with - * WIM_RESHDR_FLAG_METADATA set should be the same as - * the image_count field in the WIM header. */ - if (image_index == wim->hdr.image_count) { - WARNING("Found more metadata resources than images"); - goto free_cur_entry_and_continue; - } - - /* Notice very carefully: We are assigning the metadata - * resources to images in the same order in which their - * lookup table entries occur on disk. (This is also - * the behavior of Microsoft's software.) In - * particular, this overrides the actual locations of - * the metadata resources themselves in the WIM file as - * well as any information written in the XML data. */ - DEBUG("Found metadata resource for image %"PRIu32" at " - "offset %"PRIu64".", - image_index + 1, - reshdr.offset_in_wim); - - wim->image_metadata[image_index++]->metadata_lte = cur_entry; - } else { - /* Lookup table entry for a non-metadata stream. */ - - /* Ignore this stream if it's a duplicate. */ - if (lookup_stream(table, cur_entry->hash)) { - num_duplicate_entries++; - goto free_cur_entry_and_continue; - } - - /* Insert the stream into the in-memory lookup table, - * keyed by its SHA1 message digest. */ - lookup_table_insert(table, cur_entry); - } - - continue; - - free_cur_entry_and_continue: - if (cur_solid_rspecs && - cur_entry->resource_location == RESOURCE_IN_WIM) - lte_unbind_wim_resource_spec(cur_entry); - free_lookup_table_entry(cur_entry); - } - cur_entry = NULL; - - if (cur_solid_rspecs) { - /* End of lookup table terminated a solid run. */ - ret = finish_solid_rspecs(cur_solid_rspecs, cur_num_solid_rspecs); - cur_solid_rspecs = NULL; - if (ret) - goto out; - } - - if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) { - WARNING("Could not find metadata resources for all images"); - for (u32 i = image_index; i < wim->hdr.image_count; i++) - put_image_metadata(wim->image_metadata[i], NULL); - wim->hdr.image_count = image_index; - } - - if (num_duplicate_entries > 0) { - WARNING("Ignoring %zu duplicate streams in the WIM lookup table", - num_duplicate_entries); - } - - if (num_wrong_part_entries > 0) { - WARNING("Ignoring %zu streams with wrong part number", - num_wrong_part_entries); - } - - DEBUG("Done reading lookup table."); - wim->lookup_table = table; - ret = 0; - goto out_free_buf; - -oom: - ERROR("Not enough memory to read lookup table!"); - ret = WIMLIB_ERR_NOMEM; -out: - free_solid_rspecs(cur_solid_rspecs, cur_num_solid_rspecs); - free_lookup_table_entry(cur_entry); - free_lookup_table(table); -out_free_buf: - FREE(buf); - return ret; -} - -static void -put_wim_lookup_table_entry(struct wim_lookup_table_entry_disk *disk_entry, - const struct wim_reshdr *out_reshdr, - u16 part_number, u32 refcnt, const u8 *hash) -{ - put_wim_reshdr(out_reshdr, &disk_entry->reshdr); - disk_entry->part_number = cpu_to_le16(part_number); - disk_entry->refcnt = cpu_to_le32(refcnt); - copy_hash(disk_entry->hash, hash); -} - -/* Note: the list of stream entries must be sorted so that all entries for the - * same solid resource are consecutive. In addition, entries with - * WIM_RESHDR_FLAG_METADATA set must be in the same order as the indices of the - * underlying images. */ -int -write_wim_lookup_table_from_stream_list(struct list_head *stream_list, - struct filedes *out_fd, - u16 part_number, - struct wim_reshdr *out_reshdr, - int write_resource_flags) -{ - size_t table_size; - struct wim_lookup_table_entry *lte; - struct wim_lookup_table_entry_disk *table_buf; - struct wim_lookup_table_entry_disk *table_buf_ptr; - int ret; - u64 prev_res_offset_in_wim = ~0ULL; - u64 prev_uncompressed_size; - u64 logical_offset; - - table_size = 0; - list_for_each_entry(lte, stream_list, lookup_table_list) { - table_size += sizeof(struct wim_lookup_table_entry_disk); - - if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID && - lte->out_res_offset_in_wim != prev_res_offset_in_wim) - { - table_size += sizeof(struct wim_lookup_table_entry_disk); - prev_res_offset_in_wim = lte->out_res_offset_in_wim; - } - } - - DEBUG("Writing WIM lookup table (size=%zu, offset=%"PRIu64")", - table_size, out_fd->offset); - - table_buf = MALLOC(table_size); - if (table_buf == NULL) { - ERROR("Failed to allocate %zu bytes for temporary lookup table", - table_size); - return WIMLIB_ERR_NOMEM; - } - table_buf_ptr = table_buf; - - prev_res_offset_in_wim = ~0ULL; - prev_uncompressed_size = 0; - logical_offset = 0; - list_for_each_entry(lte, stream_list, lookup_table_list) { - if (lte->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { - struct wim_reshdr tmp_reshdr; - - /* Eww. When WIMGAPI sees multiple solid resources, it - * expects the offsets to be adjusted as if there were - * really only one solid resource. */ - - if (lte->out_res_offset_in_wim != prev_res_offset_in_wim) { - /* Put the resource entry for solid resource */ - tmp_reshdr.offset_in_wim = lte->out_res_offset_in_wim; - tmp_reshdr.size_in_wim = lte->out_res_size_in_wim; - tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER; - tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID; - - put_wim_lookup_table_entry(table_buf_ptr++, - &tmp_reshdr, - part_number, - 1, zero_hash); - - logical_offset += prev_uncompressed_size; - - prev_res_offset_in_wim = lte->out_res_offset_in_wim; - prev_uncompressed_size = lte->out_res_uncompressed_size; - } - tmp_reshdr = lte->out_reshdr; - tmp_reshdr.offset_in_wim += logical_offset; - put_wim_lookup_table_entry(table_buf_ptr++, - &tmp_reshdr, - part_number, - lte->out_refcnt, - lte->hash); - } else { - put_wim_lookup_table_entry(table_buf_ptr++, - <e->out_reshdr, - part_number, - lte->out_refcnt, - lte->hash); - } - - } - wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size); - - /* Write the lookup table uncompressed. Although wimlib can handle a - * compressed lookup table, MS software cannot. */ - ret = write_wim_resource_from_buffer(table_buf, - table_size, - WIM_RESHDR_FLAG_METADATA, - out_fd, - WIMLIB_COMPRESSION_TYPE_NONE, - 0, - out_reshdr, - NULL, - write_resource_flags); - FREE(table_buf); - DEBUG("ret=%d", ret); - return ret; -} - -/* Allocate a stream entry for the contents of the buffer, or re-use an existing - * entry in @lookup_table for the same stream. */ -struct wim_lookup_table_entry * -new_stream_from_data_buffer(const void *buffer, size_t size, - struct wim_lookup_table *lookup_table) -{ - u8 hash[SHA1_HASH_SIZE]; - struct wim_lookup_table_entry *lte, *existing_lte; - - sha1_buffer(buffer, size, hash); - existing_lte = lookup_stream(lookup_table, hash); - if (existing_lte) { - wimlib_assert(existing_lte->size == size); - lte = existing_lte; - lte->refcnt++; - } else { - void *buffer_copy; - lte = new_lookup_table_entry(); - if (lte == NULL) - return NULL; - buffer_copy = memdup(buffer, size); - if (buffer_copy == NULL) { - free_lookup_table_entry(lte); - return NULL; - } - lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; - lte->attached_buffer = buffer_copy; - lte->size = size; - copy_hash(lte->hash, hash); - lookup_table_insert(lookup_table, lte); - } - return lte; -} - -/* Calculate the SHA1 message digest of a stream and move it from the list of - * unhashed streams to the stream lookup table, possibly joining it with an - * existing lookup table entry for an identical stream. - * - * @lte: An unhashed lookup table entry. - * @lookup_table: Lookup table for the WIM. - * @lte_ret: On success, write a pointer to the resulting lookup table - * entry to this location. This will be the same as @lte - * if it was inserted into the lookup table, or different if - * a duplicate stream was found. - * - * Returns 0 on success; nonzero if there is an error reading the stream. - */ -int -hash_unhashed_stream(struct wim_lookup_table_entry *lte, - struct wim_lookup_table *lookup_table, - struct wim_lookup_table_entry **lte_ret) -{ - int ret; - struct wim_lookup_table_entry *duplicate_lte; - struct wim_lookup_table_entry **back_ptr; - - wimlib_assert(lte->unhashed); - - /* back_ptr must be saved because @back_inode and @back_stream_id are in - * union with the SHA1 message digest and will no longer be valid once - * the SHA1 has been calculated. */ - back_ptr = retrieve_lte_pointer(lte); - - ret = sha1_stream(lte); - if (ret) - return ret; - - /* Look for a duplicate stream */ - duplicate_lte = lookup_stream(lookup_table, lte->hash); - list_del(<e->unhashed_list); - if (duplicate_lte) { - /* We have a duplicate stream. Transfer the reference counts - * from this stream to the duplicate and update the reference to - * this stream (in an inode or ads_entry) to point to the - * duplicate. The caller is responsible for freeing @lte if - * needed. */ - wimlib_assert(!(duplicate_lte->unhashed)); - wimlib_assert(duplicate_lte->size == lte->size); - duplicate_lte->refcnt += lte->refcnt; - lte->refcnt = 0; - *back_ptr = duplicate_lte; - lte = duplicate_lte; - } else { - /* No duplicate stream, so we need to insert this stream into - * the lookup table and treat it as a hashed stream. */ - lookup_table_insert(lookup_table, lte); - lte->unhashed = 0; - } - *lte_ret = lte; - return 0; -} - -void -lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, - struct wimlib_resource_entry *wentry) -{ - memset(wentry, 0, sizeof(*wentry)); - - wentry->uncompressed_size = lte->size; - if (lte->resource_location == RESOURCE_IN_WIM) { - wentry->part_number = lte->rspec->wim->hdr.part_number; - if (lte->flags & WIM_RESHDR_FLAG_SOLID) { - wentry->compressed_size = 0; - wentry->offset = lte->offset_in_res; - } else { - wentry->compressed_size = lte->rspec->size_in_wim; - wentry->offset = lte->rspec->offset_in_wim; - } - wentry->raw_resource_offset_in_wim = lte->rspec->offset_in_wim; - /*wentry->raw_resource_uncompressed_size = lte->rspec->uncompressed_size;*/ - wentry->raw_resource_compressed_size = lte->rspec->size_in_wim; - } - copy_hash(wentry->sha1_hash, lte->hash); - wentry->reference_count = lte->refcnt; - wentry->is_compressed = (lte->flags & WIM_RESHDR_FLAG_COMPRESSED) != 0; - wentry->is_metadata = (lte->flags & WIM_RESHDR_FLAG_METADATA) != 0; - wentry->is_free = (lte->flags & WIM_RESHDR_FLAG_FREE) != 0; - wentry->is_spanned = (lte->flags & WIM_RESHDR_FLAG_SPANNED) != 0; - wentry->packed = (lte->flags & WIM_RESHDR_FLAG_SOLID) != 0; -} - -struct iterate_lte_context { - wimlib_iterate_lookup_table_callback_t cb; - void *user_ctx; -}; - -static int -do_iterate_lte(struct wim_lookup_table_entry *lte, void *_ctx) -{ - struct iterate_lte_context *ctx = _ctx; - struct wimlib_resource_entry entry; - - lte_to_wimlib_resource_entry(lte, &entry); - return (*ctx->cb)(&entry, ctx->user_ctx); -} - -/* API function documented in wimlib.h */ -WIMLIBAPI int -wimlib_iterate_lookup_table(WIMStruct *wim, int flags, - wimlib_iterate_lookup_table_callback_t cb, - void *user_ctx) -{ - if (flags != 0) - return WIMLIB_ERR_INVALID_PARAM; - - struct iterate_lte_context ctx = { - .cb = cb, - .user_ctx = user_ctx, - }; - if (wim_has_metadata(wim)) { - int ret; - for (int i = 0; i < wim->hdr.image_count; i++) { - ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte, - &ctx); - if (ret) - return ret; - } - } - return for_lookup_table_entry(wim->lookup_table, do_iterate_lte, &ctx); -} diff --git a/src/metadata_resource.c b/src/metadata_resource.c index 54ea9153..c187d698 100644 --- a/src/metadata_resource.c +++ b/src/metadata_resource.c @@ -24,20 +24,38 @@ #endif #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/resource.h" #include "wimlib/security.h" #include "wimlib/write.h" +/* Fix the security ID for every inode to be either -1 or in bounds. */ +static void +fix_security_ids(struct wim_image_metadata *imd, const u32 num_entries) +{ + struct wim_inode *inode; + unsigned long invalid_count = 0; + + image_for_each_inode(inode, imd) { + if ((u32)inode->i_security_id >= num_entries) { + if (inode->i_security_id >= 0) + invalid_count++; + inode->i_security_id = -1; + } + } + if (invalid_count) + WARNING("%lu inodes had invalid security IDs", invalid_count); +} + /* * Reads and parses a metadata resource for an image in the WIM file. * * @imd: * Pointer to the image metadata structure for the image whose metadata - * resource we are reading. Its `metadata_lte' member specifies the lookup + * resource we are reading. Its `metadata_blob' member specifies the blob * table entry for the metadata resource. The rest of the image metadata * entry will be filled in by this function. * @@ -52,28 +70,27 @@ int read_metadata_resource(struct wim_image_metadata *imd) { - const struct wim_lookup_table_entry *metadata_lte; + const struct blob_descriptor *metadata_blob; void *buf; int ret; struct wim_security_data *sd; struct wim_dentry *root; - struct wim_inode *inode; - metadata_lte = imd->metadata_lte; + metadata_blob = imd->metadata_blob; - DEBUG("Reading metadata resource (size=%"PRIu64").", metadata_lte->size); + DEBUG("Reading metadata resource (size=%"PRIu64").", metadata_blob->size); /* Read the metadata resource into memory. (It may be compressed.) */ - ret = read_full_stream_into_alloc_buf(metadata_lte, &buf); + ret = read_full_blob_into_alloc_buf(metadata_blob, &buf); if (ret) return ret; /* Checksum the metadata resource. */ - if (!metadata_lte->dont_check_metadata_hash) { + if (!metadata_blob->dont_check_metadata_hash) { u8 hash[SHA1_HASH_SIZE]; - sha1_buffer(buf, metadata_lte->size, hash); - if (!hashes_equal(metadata_lte->hash, hash)) { + sha1_buffer(buf, metadata_blob->size, hash); + if (!hashes_equal(metadata_blob->hash, hash)) { ERROR("Metadata resource is corrupted " "(invalid SHA-1 message digest)!"); ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; @@ -91,11 +108,11 @@ read_metadata_resource(struct wim_image_metadata *imd) * by a directory entry of length '0', really of length 8, because * that's how long the 'length' field is. */ - ret = read_wim_security_data(buf, metadata_lte->size, &sd); + ret = read_wim_security_data(buf, metadata_blob->size, &sd); if (ret) goto out_free_buf; - ret = read_dentry_tree(buf, metadata_lte->size, sd->total_length, &root); + ret = read_dentry_tree(buf, metadata_blob->size, sd->total_length, &root); if (ret) goto out_free_security_data; @@ -109,13 +126,12 @@ read_metadata_resource(struct wim_image_metadata *imd) if (ret) goto out_free_dentry_tree; - image_for_each_inode(inode, imd) - check_inode(inode, sd); + fix_security_ids(imd, sd->num_entries); /* Success; fill in the image_metadata structure. */ imd->root_dentry = root; imd->security_data = sd; - INIT_LIST_HEAD(&imd->unhashed_streams); + INIT_LIST_HEAD(&imd->unhashed_blobs); DEBUG("Done parsing metadata resource."); return 0; @@ -226,18 +242,18 @@ write_metadata_resource(WIMStruct *wim, int image, int write_resource_flags) imd = wim->image_metadata[image - 1]; /* Write the metadata resource to the output WIM using the proper - * compression type, in the process updating the lookup table entry for - * the metadata resource. */ + * compression type, in the process updating the blob descriptor for the + * metadata resource. */ ret = write_wim_resource_from_buffer(buf, len, WIM_RESHDR_FLAG_METADATA, &wim->out_fd, wim->out_compression_type, wim->out_chunk_size, - &imd->metadata_lte->out_reshdr, - imd->metadata_lte->hash, + &imd->metadata_blob->out_reshdr, + imd->metadata_blob->hash, write_resource_flags); /* Original checksum was overridden; set a flag so it isn't used. */ - imd->metadata_lte->dont_check_metadata_hash = 1; + imd->metadata_blob->dont_check_metadata_hash = 1; FREE(buf); return ret; diff --git a/src/mount_image.c b/src/mount_image.c index 2e50d6c3..dd6d94c6 100644 --- a/src/mount_image.c +++ b/src/mount_image.c @@ -8,7 +8,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014 Eric Biggers + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -54,9 +54,9 @@ #include #include +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/paths.h" #include "wimlib/progress.h" @@ -97,12 +97,12 @@ struct wimfs_fd { * that reference it. */ struct wim_inode *f_inode; - /* Pointer to the lookup table entry for the data stream that has been - * opened. 'num_opened_fds' of the lookup table entry tracks the number - * of file descriptors that reference it. Or, this value may be NULL, + /* Pointer to the blob descriptor for the data stream that has been + * opened. 'num_opened_fds' of the blob descriptor tracks the number of + * file descriptors that reference it. Or, this value may be NULL, * which indicates that the opened stream is empty and consequently does - * not have a lookup table entry. */ - struct wim_lookup_table_entry *f_lte; + * not have a blob descriptor. */ + struct blob_descriptor *f_blob; /* If valid (filedes_valid(&f_staging_fd)), this contains the * corresponding native file descriptor for the staging file that has @@ -121,8 +121,8 @@ struct wimfs_fd { u16 f_idx; /* Unique ID of the opened stream in the inode. This will stay the same - * even if the indices of the inode's alternate data streams are changed - * by a deletion. */ + * even if the indices of the inode's streams are changed by a deletion. + */ u32 f_stream_id; }; @@ -158,9 +158,9 @@ struct wimfs_context { /* Number of file descriptors open to the mounted WIM image. */ unsigned long num_open_fds; - /* Original list of single-instance streams in the mounted image, linked - * by 'struct wim_lookup_table_entry'.orig_stream_list. */ - struct list_head orig_stream_list; + /* Original list of blobs in the mounted image, linked by + * 'struct blob_descriptor'.orig_blob_list. */ + struct list_head orig_blob_list; /* Parameters for unmounting the image (can be set via extended * attribute "wimfs.unmount_info"). */ @@ -220,12 +220,9 @@ fuse_mask_mode(mode_t mode, const struct fuse_context *fuse_ctx) * Allocate a file descriptor to a data stream in the mounted WIM image. * * @inode - * A pointer to the inode containing the stream being opened. - * @stream_id - * The ID of the data stream being opened within the inode. - * @lte - * A pointer to the lookup table entry for the stream data. Or, for a - * 0-byte stream, this may be NULL. + * The inode containing the stream being opened + * @strm + * The stream of the inode being opened * @fd_ret * On success, a pointer to the new file descriptor will be stored here. * @@ -233,8 +230,7 @@ fuse_mask_mode(mode_t mode, const struct fuse_context *fuse_ctx) */ static int alloc_wimfs_fd(struct wim_inode *inode, - u32 stream_id, - struct wim_lookup_table_entry *lte, + struct wim_inode_stream *strm, struct wimfs_fd **fd_ret) { static const u16 min_fds_per_alloc = 8; @@ -282,15 +278,15 @@ alloc_wimfs_fd(struct wim_inode *inode, return -ENOMEM; fd->f_inode = inode; - fd->f_lte = lte; + fd->f_blob = stream_blob_resolved(strm); filedes_invalidate(&fd->f_staging_fd); fd->f_idx = i; - fd->f_stream_id = stream_id; + fd->f_stream_id = strm->stream_id; *fd_ret = fd; inode->i_fds[i] = fd; inode->i_num_opened_fds++; - if (lte) - lte->num_opened_fds++; + if (fd->f_blob) + fd->f_blob->num_opened_fds++; wimfs_inc_num_open_fds(); inode->i_next_fd = i + 1; return 0; @@ -312,9 +308,9 @@ close_wimfs_fd(struct wimfs_fd *fd) if (filedes_close(&fd->f_staging_fd)) ret = -errno; - /* Release this file descriptor from its lookup table entry. */ - if (fd->f_lte) - lte_decrement_num_opened_fds(fd->f_lte); + /* Release this file descriptor from its blob descriptor. */ + if (fd->f_blob) + blob_decrement_num_opened_fds(fd->f_blob); wimfs_dec_num_open_fds(); @@ -352,26 +348,47 @@ wim_pathname_to_inode(WIMStruct *wim, const char *path) /* Can look up directory (otherwise get -ENOTDIR) */ #define LOOKUP_FLAG_DIRECTORY_OK 0x02 +/* Get the data stream of the specified name from the specified inode. Returns + * NULL with errno set if not found. */ +static struct wim_inode_stream * +inode_get_data_stream_tstr(const struct wim_inode *inode, + const char *stream_name) +{ + struct wim_inode_stream *strm; + + if (!stream_name || !*stream_name) { + strm = inode_get_unnamed_stream(inode, STREAM_TYPE_DATA); + } else { + const utf16lechar *uname; + + if (tstr_get_utf16le(stream_name, &uname)) + return NULL; + strm = inode_get_stream(inode, STREAM_TYPE_DATA, uname); + tstr_put_utf16le(uname); + } + if (!strm) + errno = ENOENT; + return strm; +} + /* - * Translate a path into the corresponding dentry, lookup table entry, and - * stream index in the mounted WIM image. + * Translate a path into the corresponding dentry and stream in the mounted WIM + * image. * - * Returns 0 or a -errno code. All of @dentry_ret, @lte_ret, and - * @stream_idx_ret are optional. + * Returns 0 or a -errno code. @dentry_ret and @strm_ret are both optional. */ static int -wim_pathname_to_stream(const struct wimfs_context *ctx, const char *path, +wim_pathname_to_stream(const struct wimfs_context *ctx, + const char *path, int lookup_flags, struct wim_dentry **dentry_ret, - struct wim_lookup_table_entry **lte_ret, - unsigned *stream_idx_ret) + struct wim_inode_stream **strm_ret) { WIMStruct *wim = ctx->wim; struct wim_dentry *dentry; - struct wim_lookup_table_entry *lte; - unsigned stream_idx; - const char *stream_name = NULL; struct wim_inode *inode; + struct wim_inode_stream *strm; + const char *stream_name = NULL; char *p = NULL; lookup_flags |= ctx->default_lookup_flags; @@ -392,31 +409,27 @@ wim_pathname_to_stream(const struct wimfs_context *ctx, const char *path, inode = dentry->d_inode; - if (inode_resolve_streams(inode, wim->lookup_table, false)) + if (inode_resolve_streams(inode, wim->blob_table, false)) return -EIO; if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK) && inode_is_directory(inode)) return -EISDIR; - if (stream_name) { - struct wim_ads_entry *ads_entry; - - ads_entry = inode_get_ads_entry(inode, stream_name); - if (!ads_entry) + strm = inode_get_data_stream_tstr(inode, stream_name); + if (!strm) { + /* Force creation of an unnamed data stream */ + if (!stream_name) + strm = inode_add_stream(inode, STREAM_TYPE_DATA, + NO_STREAM_NAME, NULL); + if (!strm) return -errno; - - stream_idx = ads_entry - inode->i_ads_entries + 1; - lte = ads_entry->lte; - } else { - lte = inode_unnamed_stream_resolved(inode, &stream_idx); } + if (dentry_ret) *dentry_ret = dentry; - if (lte_ret) - *lte_ret = lte; - if (stream_idx_ret) - *stream_idx_ret = stream_idx; + if (strm_ret) + *strm_ret = strm; return 0; } @@ -472,7 +485,6 @@ create_dentry(struct fuse_context *fuse_ctx, const char *path, new_inode = new_dentry->d_inode; - new_inode->i_resolved = 1; new_inode->i_ino = wimfs_ctx->next_ino++; new_inode->i_attributes = attributes; @@ -504,11 +516,10 @@ create_dentry(struct fuse_context *fuse_ctx, const char *path, * inode. */ static void -remove_dentry(struct wim_dentry *dentry, - struct wim_lookup_table *lookup_table) +remove_dentry(struct wim_dentry *dentry, struct blob_table *blob_table) { - /* Drop the reference to each stream the inode contains. */ - inode_unref_streams(dentry->d_inode, lookup_table); + /* Drop blob references. */ + inode_unref_blobs(dentry->d_inode, blob_table); /* Unlink the dentry from the image's dentry tree. */ unlink_dentry(dentry); @@ -541,16 +552,15 @@ inode_default_unix_mode(const struct wim_inode *inode) /* * Retrieve standard UNIX metadata ('struct stat') for a WIM inode. * - * @lte specifies the stream of the inode that is being queried. We mostly - * return the same information for all streams, but st_size and st_blocks may be - * different for different streams. + * @blob is the blob descriptor for the stream of the inode that is being + * queried, or NULL. We mostly return the same information for all streams, but + * st_size and st_blocks may be different for different streams. * * This always returns 0. */ static int inode_to_stbuf(const struct wim_inode *inode, - const struct wim_lookup_table_entry *lte, - struct stat *stbuf) + const struct blob_descriptor *blob, struct stat *stbuf) { const struct wimfs_context *ctx = wimfs_get_context(); struct wimlib_unix_data unix_data; @@ -576,8 +586,8 @@ inode_to_stbuf(const struct wim_inode *inode, } stbuf->st_ino = inode->i_ino; stbuf->st_nlink = inode->i_nlink; - if (lte) - stbuf->st_size = lte->size; + if (blob) + stbuf->st_size = blob->size; #ifdef HAVE_STAT_NANOSECOND_PRECISION stbuf->st_atim = wim_timestamp_to_timespec(inode->i_last_access_time); stbuf->st_mtim = wim_timestamp_to_timespec(inode->i_last_write_time); @@ -644,49 +654,37 @@ retry: } /* - * Extract a WIM resource to the staging directory. - * This is necessary if a stream using the resource is being opened for writing. + * Extract a blob to the staging directory. This is necessary when a stream + * using the blob is being opened for writing and the blob has not already been + * extracted to the staging directory. * * @inode * The inode containing the stream being opened for writing. - * - * @stream_idx - * The index of the stream in @inode being opened for writing. - * - * @lte_ptr - * *lte_ptr is the lookup table entry for the stream being extracted, or - * NULL if the stream does not have a lookup table entry (which is possible - * if the stream is empty). On success, *lte_ptr will be set to point to a - * lookup table entry that represents the resource in its new location in a - * staging file. This may be the same as the old entry in the case that it - * was reused, or it may be a new entry. - * + * @strm + * The stream being opened for writing. The blob descriptor to which the + * stream refers will be changed by this function. * @size - * Number of bytes of the stream to extract and include in the staging file - * resource. It may be less than the actual stream length, in which case - * only a prefix of the resource will be extracted. It may also be more - * than the actual stream length, in which case the extra space will be - * zero-filled. + * Number of bytes of the blob to extract and include in the staging file. + * It may be less than the actual blob length, in which case only a prefix + * of the blob will be extracted. It may also be more than the actual blob + * length, in which case the extra space will be zero-filled. * * Returns 0 or a -errno code. */ static int -extract_resource_to_staging_dir(struct wim_inode *inode, - unsigned stream_idx, - struct wim_lookup_table_entry **lte_ptr, - off_t size, - const struct wimfs_context *ctx) -{ - struct wim_lookup_table_entry *old_lte; - struct wim_lookup_table_entry *new_lte; +extract_blob_to_staging_dir(struct wim_inode *inode, + struct wim_inode_stream *strm, + off_t size, const struct wimfs_context *ctx) +{ + struct blob_descriptor *old_blob; + struct blob_descriptor *new_blob; char *staging_file_name; int staging_fd; off_t extract_size; int result; - u32 stream_id; int ret; - old_lte = *lte_ptr; + old_blob = stream_blob_resolved(strm); /* Create the staging file. */ staging_fd = create_staging_file(ctx, &staging_file_name); @@ -694,13 +692,13 @@ extract_resource_to_staging_dir(struct wim_inode *inode, return -errno; /* Extract the stream to the staging file (possibly truncated). */ - if (old_lte) { + if (old_blob) { struct filedes fd; filedes_init(&fd, staging_fd); errno = 0; - extract_size = min(old_lte->size, size); - result = extract_stream_to_fd(old_lte, &fd, extract_size); + extract_size = min(old_blob->size, size); + result = extract_blob_to_fd(old_blob, &fd, extract_size); } else { extract_size = 0; result = 0; @@ -717,106 +715,80 @@ extract_resource_to_staging_dir(struct wim_inode *inode, /* If an error occurred, unlink the staging file. */ if (unlikely(result)) { - /* extract_stream_to_fd() should set errno, but if it didn't, + /* extract_blob_to_fd() should set errno, but if it didn't, * set a default value. */ ret = errno ? -errno : -EIO; goto out_delete_staging_file; } - /* Now deal with the lookup table entries. We may be able to re-use the - * existing entry, but we may have to create a new one instead. */ + /* Create a blob descriptor for the staging file. */ + new_blob = new_blob_descriptor(); + if (unlikely(!new_blob)) { + ret = -ENOMEM; + goto out_delete_staging_file; + } - stream_id = inode_stream_idx_to_id(inode, stream_idx); + /* There may already be open file descriptors to this stream if it's + * previously been opened read-only, but just now we're opening it + * read-write. Identify those file descriptors, update them to use the + * new blob descriptor, and open staging file descriptors for them. */ + for (u16 i = 0, j = 0; j < inode->i_num_opened_fds; i++) { + struct wimfs_fd *fd; + int raw_fd; - if (old_lte && inode->i_nlink == old_lte->refcnt) { - /* The reference count of the existing lookup table entry is the - * same as the link count of the inode that contains the stream - * we're opening. Therefore, all the references to the lookup - * table entry correspond to the stream we're trying to extract, - * so the lookup table entry can be re-used. */ - lookup_table_unlink(ctx->wim->lookup_table, old_lte); - lte_put_resource(old_lte); - new_lte = old_lte; - } else { - /* We need to split the old lookup table entry because it also - * has other references. Or, there was no old lookup table - * entry, so we need to create a new one anyway. */ + fd = inode->i_fds[i]; + if (!fd) + continue; - new_lte = new_lookup_table_entry(); - if (unlikely(!new_lte)) { - ret = -ENOMEM; - goto out_delete_staging_file; - } + j++; - /* There may already be open file descriptors to this stream if - * it's previously been opened read-only, but just now we're - * opening it read-write. Identify those file descriptors and - * change their lookup table entry pointers to point to the new - * lookup table entry, and open staging file descriptors for - * them. - * - * At the same time, we need to count the number of these opened - * file descriptors to the new lookup table entry. If there's - * an old lookup table entry, this number needs to be subtracted - * from the fd's opened to the old entry. */ - for (u16 i = 0, j = 0; j < inode->i_num_opened_fds; i++) { - struct wimfs_fd *fd; - int raw_fd; - - fd = inode->i_fds[i]; - if (!fd) - continue; - - j++; - - if (fd->f_stream_id != stream_id) - continue; - - /* This is a readonly fd for the same stream. */ - fd->f_lte = new_lte; - new_lte->num_opened_fds++; - raw_fd = openat(ctx->staging_dir_fd, staging_file_name, - O_RDONLY | O_NOFOLLOW); - if (unlikely(raw_fd < 0)) { - ret = -errno; - goto out_revert_fd_changes; - } - filedes_init(&fd->f_staging_fd, raw_fd); - } - if (old_lte) { - old_lte->num_opened_fds -= new_lte->num_opened_fds; - old_lte->refcnt -= inode->i_nlink; + if (fd->f_stream_id != strm->stream_id) + continue; + + /* This is a readonly fd for the same stream. */ + fd->f_blob = new_blob; + new_blob->num_opened_fds++; + raw_fd = openat(ctx->staging_dir_fd, staging_file_name, + O_RDONLY | O_NOFOLLOW); + if (unlikely(raw_fd < 0)) { + ret = -errno; + goto out_revert_fd_changes; } + filedes_init(&fd->f_staging_fd, raw_fd); + } + + /* Remove the appropriate count of file descriptors and stream + * references from the old blob. */ + if (old_blob) { + old_blob->num_opened_fds -= new_blob->num_opened_fds; + for (u32 i = 0; i < inode->i_nlink; i++) + blob_decrement_refcnt(old_blob, ctx->wim->blob_table); } - new_lte->refcnt = inode->i_nlink; - new_lte->resource_location = RESOURCE_IN_STAGING_FILE; - new_lte->staging_file_name = staging_file_name; - new_lte->staging_dir_fd = ctx->staging_dir_fd; - new_lte->size = size; + new_blob->refcnt = inode->i_nlink; + new_blob->blob_location = BLOB_IN_STAGING_FILE; + new_blob->staging_file_name = staging_file_name; + new_blob->staging_dir_fd = ctx->staging_dir_fd; + new_blob->size = size; - add_unhashed_stream(new_lte, inode, stream_id, - &wim_get_current_image_metadata(ctx->wim)->unhashed_streams); - if (stream_idx == 0) - inode->i_lte = new_lte; - else - inode->i_ads_entries[stream_idx - 1].lte = new_lte; - *lte_ptr = new_lte; + prepare_unhashed_blob(new_blob, inode, strm->stream_id, + &wim_get_current_image_metadata(ctx->wim)->unhashed_blobs); + stream_set_blob(strm, new_blob); return 0; out_revert_fd_changes: - for (u16 i = 0; new_lte->num_opened_fds; i++) { + for (u16 i = 0; new_blob->num_opened_fds; i++) { struct wimfs_fd *fd = inode->i_fds[i]; - if (fd && fd->f_stream_id == stream_id) { - fd->f_lte = old_lte; + if (fd && fd->f_stream_id == strm->stream_id) { + fd->f_blob = old_blob; if (filedes_valid(&fd->f_staging_fd)) { filedes_close(&fd->f_staging_fd); filedes_invalidate(&fd->f_staging_fd); } - new_lte->num_opened_fds--; + new_blob->num_opened_fds--; } } - free_lookup_table_entry(new_lte); + free_blob_descriptor(new_blob); out_delete_staging_file: unlinkat(ctx->staging_dir_fd, staging_file_name, 0); FREE(staging_file_name); @@ -971,34 +943,34 @@ reassign_inode_numbers(struct wimfs_context *ctx) static void release_extra_refcnts(struct wimfs_context *ctx) { - struct list_head *list = &ctx->orig_stream_list; - struct wim_lookup_table *lookup_table = ctx->wim->lookup_table; - struct wim_lookup_table_entry *lte, *tmp; + struct list_head *list = &ctx->orig_blob_list; + struct blob_table *blob_table = ctx->wim->blob_table; + struct blob_descriptor *blob, *tmp; - list_for_each_entry_safe(lte, tmp, list, orig_stream_list) { - u32 n = lte->out_refcnt; + list_for_each_entry_safe(blob, tmp, list, orig_blob_list) { + u32 n = blob->out_refcnt; while (n--) - lte_decrement_refcnt(lte, lookup_table); + blob_decrement_refcnt(blob, blob_table); } } -/* Delete the 'struct wim_lookup_table_entry' for any stream that was modified +/* Delete the 'struct blob_descriptor' for any stream that was modified * or created in the read-write mounted image and had a final size of 0. */ static void -delete_empty_streams(struct wimfs_context *ctx) +delete_empty_blobs(struct wimfs_context *ctx) { - struct wim_lookup_table_entry *lte, *tmp; + struct blob_descriptor *blob, *tmp; struct wim_image_metadata *imd; imd = wim_get_current_image_metadata(ctx->wim); - image_for_each_unhashed_stream_safe(lte, tmp, imd) { - if (!lte->size) { - *retrieve_lte_pointer(lte) = NULL; - list_del(<e->unhashed_list); - free_lookup_table_entry(lte); - } - } + image_for_each_unhashed_blob_safe(blob, tmp, imd) { + if (!blob->size) { + *retrieve_pointer_to_unhashed_blob(blob) = NULL; + list_del(&blob->unhashed_list); + free_blob_descriptor(blob); + } + } } /* Close all file descriptors open to the specified inode. @@ -1039,7 +1011,7 @@ renew_current_image(struct wimfs_context *ctx) int idx = wim->current_image - 1; struct wim_image_metadata *imd = wim->image_metadata[idx]; struct wim_image_metadata *replace_imd; - struct wim_lookup_table_entry *new_lte; + struct blob_descriptor *new_blob; int ret; /* Create 'replace_imd' structure to use for the reset original, @@ -1049,36 +1021,36 @@ renew_current_image(struct wimfs_context *ctx) if (!replace_imd) goto err; - /* Create new stream reference for the modified image's metadata + /* Create new blob descriptor for the modified image's metadata * resource, which doesn't exist yet. */ ret = WIMLIB_ERR_NOMEM; - new_lte = new_lookup_table_entry(); - if (!new_lte) + new_blob = new_blob_descriptor(); + if (!new_blob) goto err_put_replace_imd; - new_lte->flags = WIM_RESHDR_FLAG_METADATA; - new_lte->unhashed = 1; + new_blob->flags = WIM_RESHDR_FLAG_METADATA; + new_blob->unhashed = 1; /* Make the image being moved available at a new index. Increments the * WIM's image count, but does not increment the reference count of the * 'struct image_metadata'. */ ret = append_image_metadata(wim, imd); if (ret) - goto err_free_new_lte; + goto err_free_new_blob; ret = xml_add_image(wim, ""); if (ret) goto err_undo_append; - replace_imd->metadata_lte = imd->metadata_lte; - imd->metadata_lte = new_lte; + replace_imd->metadata_blob = imd->metadata_blob; + imd->metadata_blob = new_blob; wim->image_metadata[idx] = replace_imd; wim->current_image = wim->hdr.image_count; return 0; err_undo_append: wim->hdr.image_count--; -err_free_new_lte: - free_lookup_table_entry(new_lte); +err_free_new_blob: + free_blob_descriptor(new_blob); err_put_replace_imd: put_image_metadata(replace_imd, NULL); err: @@ -1119,8 +1091,8 @@ commit_image(struct wimfs_context *ctx, int unmount_flags, mqd_t mq) } else { release_extra_refcnts(ctx); } - INIT_LIST_HEAD(&ctx->orig_stream_list); - delete_empty_streams(ctx); + INIT_LIST_HEAD(&ctx->orig_blob_list); + delete_empty_blobs(ctx); xml_update_image_info(ctx->wim, ctx->wim->current_image); write_flags = 0; @@ -1276,7 +1248,7 @@ static int wimfs_fgetattr(const char *path, struct stat *stbuf, struct fuse_file_info *fi) { struct wimfs_fd *fd = WIMFS_FD(fi); - return inode_to_stbuf(fd->f_inode, fd->f_lte, stbuf); + return inode_to_stbuf(fd->f_inode, fd->f_blob, stbuf); } static int @@ -1286,7 +1258,7 @@ wimfs_ftruncate(const char *path, off_t size, struct fuse_file_info *fi) if (ftruncate(fd->f_staging_fd.fd, size)) return -errno; touch_inode(fd->f_inode); - fd->f_lte->size = size; + fd->f_blob->size = size; return 0; } @@ -1295,14 +1267,16 @@ wimfs_getattr(const char *path, struct stat *stbuf) { const struct wimfs_context *ctx = wimfs_get_context(); struct wim_dentry *dentry; - struct wim_lookup_table_entry *lte; + struct wim_inode_stream *strm; int ret; ret = wim_pathname_to_stream(ctx, path, LOOKUP_FLAG_DIRECTORY_OK, - &dentry, <e, NULL); + &dentry, &strm); if (ret) return ret; - return inode_to_stbuf(dentry->d_inode, lte, stbuf); + + return inode_to_stbuf(dentry->d_inode, + stream_blob_resolved(strm), stbuf); } static int @@ -1322,8 +1296,8 @@ wimfs_getxattr(const char *path, const char *name, char *value, { const struct wimfs_context *ctx = wimfs_get_context(); struct wim_inode *inode; - struct wim_ads_entry *ads_entry; - struct wim_lookup_table_entry *lte; + struct wim_inode_stream *strm; + struct blob_descriptor *blob; if (!strncmp(name, "wimfs.", 6)) { /* Handle some magical extended attributes. These really should @@ -1372,31 +1346,34 @@ wimfs_getxattr(const char *path, const char *name, char *value, return -ENOATTR; name += 5; + if (!*name) + return -ENOATTR; + /* Querying a named data stream */ inode = wim_pathname_to_inode(ctx->wim, path); if (!inode) return -errno; - ads_entry = inode_get_ads_entry(inode, name); - if (!ads_entry) + strm = inode_get_data_stream_tstr(inode, name); + if (!strm) return (errno == ENOENT) ? -ENOATTR : -errno; - lte = ads_entry->lte; - if (!lte) + blob = stream_blob_resolved(strm); + if (!blob) return 0; - if (unlikely(lte->size > INT_MAX)) + if (unlikely(blob->size > INT_MAX)) return -EFBIG; if (size) { - if (size < lte->size) + if (size < blob->size) return -ERANGE; - if (read_full_stream_into_buf(lte, value)) + if (read_full_blob_into_buf(blob, value)) return errno ? -errno : -EIO; } - return lte->size; + return blob->size; } static int @@ -1431,7 +1408,7 @@ wimfs_link(const char *existing_path, const char *new_path) if (new_dentry(new_name, &new_alias)) return -ENOMEM; - inode_ref_streams(inode); + inode_ref_blobs(inode); d_associate(new_alias, inode); dentry_add_child(dir, new_alias); touch_inode(dir->d_inode); @@ -1457,18 +1434,18 @@ wimfs_listxattr(const char *path, char *list, size_t size) if (!inode) return -errno; - for (u16 i = 0; i < inode->i_num_ads; i++) { - const struct wim_ads_entry *entry; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm; char *stream_name_mbs; size_t stream_name_mbs_nbytes; - entry = &inode->i_ads_entries[i]; + strm = &inode->i_streams[i]; - if (!entry->stream_name_nbytes) + if (!stream_is_named_data_stream(strm)) continue; - if (utf16le_to_tstr(entry->stream_name, - entry->stream_name_nbytes, + if (utf16le_to_tstr(strm->stream_name, + utf16le_len_bytes(strm->stream_name), &stream_name_mbs, &stream_name_mbs_nbytes)) return -errno; @@ -1518,10 +1495,11 @@ wimfs_mknod(const char *path, mode_t mode, dev_t rdev) if ((wimfs_ctx->mount_flags & WIMLIB_MOUNT_FLAG_STREAM_INTERFACE_WINDOWS) && (stream_name = path_stream_name(path))) { - struct wim_ads_entry *old_entry; - struct wim_ads_entry *new_entry; struct wim_inode *inode; + struct wim_inode_stream *existing_strm; + struct wim_inode_stream *new_strm; char *p; + const utf16lechar *uname; /* Create a named data stream. */ @@ -1536,14 +1514,20 @@ wimfs_mknod(const char *path, mode_t mode, dev_t rdev) if (!inode) return -errno; - old_entry = inode_get_ads_entry(inode, stream_name); - if (old_entry) - return -EEXIST; - if (errno != ENOENT) + if (tstr_get_utf16le(stream_name, &uname)) return -errno; - new_entry = inode_add_ads(inode, stream_name); - if (!new_entry) + existing_strm = inode_get_stream(inode, STREAM_TYPE_DATA, uname); + if (existing_strm) { + tstr_put_utf16le(uname); + return -EEXIST; + } + + new_strm = inode_add_stream(inode, STREAM_TYPE_DATA, uname, NULL); + + tstr_put_utf16le(uname); + + if (!new_strm) return -errno; return 0; } else { @@ -1574,44 +1558,45 @@ wimfs_open(const char *path, struct fuse_file_info *fi) struct wimfs_context *ctx = wimfs_get_context(); struct wim_dentry *dentry; struct wim_inode *inode; - struct wim_lookup_table_entry *lte; - unsigned stream_idx; + struct wim_inode_stream *strm; + struct blob_descriptor *blob; struct wimfs_fd *fd; int ret; - ret = wim_pathname_to_stream(ctx, path, 0, &dentry, <e, &stream_idx); + ret = wim_pathname_to_stream(ctx, path, 0, &dentry, &strm); if (ret) return ret; inode = dentry->d_inode; + blob = stream_blob_resolved(strm); - /* The file resource may be in the staging directory (read-write mounts - * only) or in the WIM. If it's in the staging directory, we need to - * open a native file descriptor for the corresponding file. Otherwise, - * we can read the file resource directly from the WIM file if we are - * opening it read-only, but we need to extract the resource to the - * staging directory if we are opening it writable. */ + /* The data of the file being opened may be in the staging directory + * (read-write mounts only) or in the WIM. If it's in the staging + * directory, we need to open a native file descriptor for the + * corresponding file. Otherwise, we can read the file data directly + * from the WIM file if we are opening it read-only, but we need to + * extract the data to the staging directory if we are opening it + * writable. */ if (flags_writable(fi->flags) && - (!lte || lte->resource_location != RESOURCE_IN_STAGING_FILE)) { - ret = extract_resource_to_staging_dir(inode, - stream_idx, - <e, - lte ? lte->size : 0, - ctx); + (!blob || blob->blob_location != BLOB_IN_STAGING_FILE)) { + ret = extract_blob_to_staging_dir(inode, + strm, + blob ? blob->size : 0, + ctx); if (ret) return ret; + blob = stream_blob_resolved(strm); } - ret = alloc_wimfs_fd(inode, inode_stream_idx_to_id(inode, stream_idx), - lte, &fd); + ret = alloc_wimfs_fd(inode, strm, &fd); if (ret) return ret; - if (lte && lte->resource_location == RESOURCE_IN_STAGING_FILE) { + if (blob && blob->blob_location == BLOB_IN_STAGING_FILE) { int raw_fd; - raw_fd = openat(lte->staging_dir_fd, lte->staging_file_name, + raw_fd = openat(blob->staging_dir_fd, blob->staging_file_name, (fi->flags & O_ACCMODE) | O_NOFOLLOW); if (raw_fd < 0) { close_wimfs_fd(fd); @@ -1628,6 +1613,7 @@ wimfs_opendir(const char *path, struct fuse_file_info *fi) { WIMStruct *wim = wimfs_get_WIMStruct(); struct wim_inode *inode; + struct wim_inode_stream *strm; struct wimfs_fd *fd; int ret; @@ -1636,7 +1622,10 @@ wimfs_opendir(const char *path, struct fuse_file_info *fi) return -errno; if (!inode_is_directory(inode)) return -ENOTDIR; - ret = alloc_wimfs_fd(inode, 0, NULL, &fd); + strm = inode_get_unnamed_stream(inode, STREAM_TYPE_DATA); + if (!strm) + return -ENOTDIR; + ret = alloc_wimfs_fd(inode, strm, &fd); if (ret) return ret; fi->fh = (uintptr_t)fd; @@ -1648,36 +1637,36 @@ wimfs_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi) { struct wimfs_fd *fd = WIMFS_FD(fi); - const struct wim_lookup_table_entry *lte; + const struct blob_descriptor *blob; ssize_t ret; - lte = fd->f_lte; - if (!lte) + blob = fd->f_blob; + if (!blob) return 0; - if (offset >= lte->size) + if (offset >= blob->size) return 0; - if (size > lte->size - offset) - size = lte->size - offset; + if (size > blob->size - offset) + size = blob->size - offset; if (!size) return 0; - switch (lte->resource_location) { - case RESOURCE_IN_WIM: - if (read_partial_wim_stream_into_buf(lte, size, offset, buf)) + switch (blob->blob_location) { + case BLOB_IN_WIM: + if (read_partial_wim_blob_into_buf(blob, size, offset, buf)) ret = errno ? -errno : -EIO; else ret = size; break; - case RESOURCE_IN_STAGING_FILE: + case BLOB_IN_STAGING_FILE: ret = raw_pread(&fd->f_staging_fd, buf, size, offset); if (ret < 0) ret = -errno; break; - case RESOURCE_IN_ATTACHED_BUFFER: - memcpy(buf, lte->attached_buffer + offset, size); + case BLOB_IN_ATTACHED_BUFFER: + memcpy(buf, blob->attached_buffer + offset, size); ret = size; break; default: @@ -1761,7 +1750,7 @@ wimfs_removexattr(const char *path, const char *name) { struct wimfs_context *ctx = wimfs_get_context(); struct wim_inode *inode; - struct wim_ads_entry *ads_entry; + struct wim_inode_stream *strm; if (!(ctx->mount_flags & WIMLIB_MOUNT_FLAG_STREAM_INTERFACE_XATTR)) return -ENOTSUP; @@ -1770,17 +1759,20 @@ wimfs_removexattr(const char *path, const char *name) return -ENOATTR; name += 5; + if (!*name) + return -ENOATTR; + /* Removing a named data stream. */ inode = wim_pathname_to_inode(ctx->wim, path); if (!inode) return -errno; - ads_entry = inode_get_ads_entry(inode, name); - if (!ads_entry) + strm = inode_get_data_stream_tstr(inode, name); + if (!strm) return (errno == ENOENT) ? -ENOATTR : -errno; - inode_remove_ads(inode, ads_entry, ctx->wim->lookup_table); + inode_remove_stream(inode, strm, ctx->wim->blob_table); return 0; } @@ -1808,7 +1800,7 @@ wimfs_rmdir(const char *path) return -ENOTEMPTY; touch_parent(dentry); - remove_dentry(dentry, wim->lookup_table); + remove_dentry(dentry, wim->blob_table); return 0; } @@ -1818,7 +1810,9 @@ wimfs_setxattr(const char *path, const char *name, { struct wimfs_context *ctx = wimfs_get_context(); struct wim_inode *inode; - struct wim_ads_entry *existing_entry; + struct wim_inode_stream *existing_strm; + const utf16lechar *uname; + int ret; if (!strncmp(name, "wimfs.", 6)) { /* Handle some magical extended attributes. These really should @@ -1845,29 +1839,42 @@ wimfs_setxattr(const char *path, const char *name, return -ENOATTR; name += 5; + if (!*name) + return -ENOATTR; + /* Setting the contents of a named data stream. */ inode = wim_pathname_to_inode(ctx->wim, path); if (!inode) return -errno; - existing_entry = inode_get_ads_entry(inode, name); - if (existing_entry) { + ret = tstr_get_utf16le(name, &uname); + if (ret) + return -errno; + + existing_strm = inode_get_stream(inode, STREAM_TYPE_DATA, uname); + if (existing_strm) { + ret = -EEXIST; if (flags & XATTR_CREATE) - return -EEXIST; + goto out_put_uname; } else { - if (errno != ENOENT) - return -errno; + ret = -ENOATTR; if (flags & XATTR_REPLACE) - return -ENOATTR; + goto out_put_uname; } - if (!inode_add_ads_with_data(inode, name, value, - size, ctx->wim->lookup_table)) - return -errno; - if (existing_entry) - inode_remove_ads(inode, existing_entry, ctx->wim->lookup_table); - return 0; + if (!inode_add_stream_with_data(inode, STREAM_TYPE_DATA, uname, + value, size, ctx->wim->blob_table)) + { + ret = -errno; + goto out_put_uname; + } + if (existing_strm) + inode_remove_stream(inode, existing_strm, ctx->wim->blob_table); + ret = 0; +out_put_uname: + tstr_put_utf16le(uname); + return ret; } static int @@ -1884,9 +1891,9 @@ wimfs_symlink(const char *to, const char *from) return ret; dentry->d_inode->i_reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK; ret = wim_inode_set_symlink(dentry->d_inode, to, - wimfs_ctx->wim->lookup_table); + wimfs_ctx->wim->blob_table); if (ret) { - remove_dentry(dentry, wimfs_ctx->wim->lookup_table); + remove_dentry(dentry, wimfs_ctx->wim->blob_table); if (ret == WIMLIB_ERR_NOMEM) ret = -ENOMEM; else @@ -1902,33 +1909,35 @@ wimfs_truncate(const char *path, off_t size) { const struct wimfs_context *ctx = wimfs_get_context(); struct wim_dentry *dentry; - struct wim_lookup_table_entry *lte; - unsigned stream_idx; + struct wim_inode_stream *strm; + struct blob_descriptor *blob; int ret; int fd; - ret = wim_pathname_to_stream(ctx, path, 0, &dentry, <e, &stream_idx); + ret = wim_pathname_to_stream(ctx, path, 0, &dentry, &strm); if (ret) return ret; - if (!lte && !size) + blob = stream_blob_resolved(strm); + + if (!blob && !size) return 0; - if (!lte || lte->resource_location != RESOURCE_IN_STAGING_FILE) { - return extract_resource_to_staging_dir(dentry->d_inode, - stream_idx, <e, - size, ctx); + if (!blob || blob->blob_location != BLOB_IN_STAGING_FILE) { + return extract_blob_to_staging_dir(dentry->d_inode, + strm, size, ctx); } /* Truncate the staging file. */ - fd = openat(lte->staging_dir_fd, lte->staging_file_name, + fd = openat(blob->staging_dir_fd, blob->staging_file_name, O_WRONLY | O_NOFOLLOW); if (fd < 0) return -errno; ret = ftruncate(fd, size); if (close(fd) || ret) return -errno; - lte->size = size; + blob->size = size; + touch_inode(dentry->d_inode); return 0; } @@ -1937,20 +1946,19 @@ wimfs_unlink(const char *path) { const struct wimfs_context *ctx = wimfs_get_context(); struct wim_dentry *dentry; - unsigned stream_idx; + struct wim_inode_stream *strm; int ret; - ret = wim_pathname_to_stream(ctx, path, 0, &dentry, NULL, &stream_idx); + ret = wim_pathname_to_stream(ctx, path, 0, &dentry, &strm); if (ret) return ret; - if (inode_stream_name_nbytes(dentry->d_inode, stream_idx) == 0) { - touch_parent(dentry); - remove_dentry(dentry, ctx->wim->lookup_table); + if (stream_is_named(strm)) { + inode_remove_stream(dentry->d_inode, strm, + ctx->wim->blob_table); } else { - inode_remove_ads(dentry->d_inode, - &dentry->d_inode->i_ads_entries[stream_idx - 1], - ctx->wim->lookup_table); + touch_parent(dentry); + remove_dentry(dentry, ctx->wim->blob_table); } return 0; } @@ -2013,8 +2021,8 @@ wimfs_write(const char *path, const char *buf, size_t size, if (ret < 0) return -errno; - if (offset + size > fd->f_lte->size) - fd->f_lte->size = offset + size; + if (offset + size > fd->f_blob->size) + fd->f_blob->size = offset + size; touch_inode(fd->f_inode); return ret; @@ -2140,35 +2148,35 @@ wimlib_mount_image(WIMStruct *wim, int image, const char *dir, ctx.owner_uid = getuid(); ctx.owner_gid = getgid(); - /* Add each stream referenced by files in the image to a list and + /* Add each blob referenced by files in the image to a list and * preemptively double the number of references to each. This is done * to allow implementing the WIMLIB_UNMOUNT_FLAG_NEW_IMAGE semantics. */ - INIT_LIST_HEAD(&ctx.orig_stream_list); + INIT_LIST_HEAD(&ctx.orig_blob_list); if (mount_flags & WIMLIB_MOUNT_FLAG_READWRITE) { unsigned i; struct wim_inode *inode; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; image_for_each_inode(inode, imd) { - for (i = 0; i <= inode->i_num_ads; i++) { - lte = inode_stream_lte(inode, i, - wim->lookup_table); - if (lte) - lte->out_refcnt = 0; + for (i = 0; i < inode->i_num_streams; i++) { + blob = stream_blob(&inode->i_streams[i], + wim->blob_table); + if (blob) + blob->out_refcnt = 0; } } image_for_each_inode(inode, imd) { - for (i = 0; i <= inode->i_num_ads; i++) { - lte = inode_stream_lte(inode, i, - wim->lookup_table); - if (lte) { - if (lte->out_refcnt == 0) - list_add(<e->orig_stream_list, - &ctx.orig_stream_list); - lte->out_refcnt += inode->i_nlink; - lte->refcnt += inode->i_nlink; + for (i = 0; i < inode->i_num_streams; i++) { + blob = stream_blob(&inode->i_streams[i], + wim->blob_table); + if (blob) { + if (blob->out_refcnt == 0) + list_add(&blob->orig_blob_list, + &ctx.orig_blob_list); + blob->out_refcnt += inode->i_nlink; + blob->refcnt += inode->i_nlink; } } } diff --git a/src/ntfs-3g_apply.c b/src/ntfs-3g_apply.c index b3fdfc18..5833d041 100644 --- a/src/ntfs-3g_apply.c +++ b/src/ntfs-3g_apply.c @@ -10,7 +10,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014 Eric Biggers + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -39,10 +39,10 @@ #include "wimlib/assert.h" #include "wimlib/apply.h" +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/ntfs_3g.h" #include "wimlib/reparse.h" @@ -75,20 +75,20 @@ struct ntfs_3g_apply_ctx { /* Pointer to the open NTFS volume */ ntfs_volume *vol; - ntfs_attr *open_attrs[MAX_OPEN_STREAMS]; + ntfs_attr *open_attrs[MAX_OPEN_FILES]; unsigned num_open_attrs; - ntfs_inode *open_inodes[MAX_OPEN_STREAMS]; + ntfs_inode *open_inodes[MAX_OPEN_FILES]; unsigned num_open_inodes; struct reparse_buffer_disk rpbuf; u8 *reparse_ptr; - /* Offset in the stream currently being read */ + /* Offset in the blob currently being read */ u64 offset; unsigned num_reparse_inodes; - ntfs_inode *ntfs_reparse_inodes[MAX_OPEN_STREAMS]; - struct wim_inode *wim_reparse_inodes[MAX_OPEN_STREAMS]; + ntfs_inode *ntfs_reparse_inodes[MAX_OPEN_FILES]; + struct wim_inode *wim_reparse_inodes[MAX_OPEN_FILES]; }; static size_t @@ -315,31 +315,27 @@ out_close: return ret; } -/* Create empty named data streams. +/* + * Create empty named data streams for the specified file, if there are any. * - * Since these won't have 'struct wim_lookup_table_entry's, they won't show up - * in the call to extract_stream_list(). Hence the need for the special case. + * Since these won't have blob descriptors, they won't show up in the call to + * extract_blob_list(). Hence the need for the special case. */ static int -ntfs_3g_create_any_empty_ads(ntfs_inode *ni, const struct wim_inode *inode, - const struct ntfs_3g_apply_ctx *ctx) +ntfs_3g_create_empty_named_data_streams(ntfs_inode *ni, + const struct wim_inode *inode, + const struct ntfs_3g_apply_ctx *ctx) { - for (u16 i = 0; i < inode->i_num_ads; i++) { - const struct wim_ads_entry *entry; - - entry = &inode->i_ads_entries[i]; + for (unsigned i = 0; i < inode->i_num_streams; i++) { - /* Not named? */ - if (!entry->stream_name_nbytes) - continue; + const struct wim_inode_stream *strm = &inode->i_streams[i]; - /* Not empty? */ - if (entry->lte) + if (!stream_is_named_data_stream(strm) || + stream_blob_resolved(strm) != NULL) continue; - if (ntfs_attr_add(ni, AT_DATA, entry->stream_name, - entry->stream_name_nbytes / - sizeof(utf16lechar), + if (ntfs_attr_add(ni, AT_DATA, strm->stream_name, + utf16le_len_chars(strm->stream_name), NULL, 0)) { ERROR_WITH_ERRNO("Failed to create named data stream " @@ -450,7 +446,7 @@ ntfs_3g_create_dirs_recursive(ntfs_inode *dir_ni, struct wim_dentry *dir, if (!ret) ret = ntfs_3g_set_metadata(ni, child->d_inode, ctx); if (!ret) - ret = ntfs_3g_create_any_empty_ads(ni, child->d_inode, ctx); + ret = ntfs_3g_create_empty_named_data_streams(ni, child->d_inode, ctx); if (!ret) ret = ntfs_3g_create_dirs_recursive(ni, child, ctx); @@ -645,7 +641,7 @@ ntfs_3g_create_nondirectory(struct wim_inode *inode, if (ret) goto out_close_ni; - ret = ntfs_3g_create_any_empty_ads(ni, inode, ctx); + ret = ntfs_3g_create_empty_named_data_streams(ni, inode, ctx); out_close_ni: /* Close the inode. */ @@ -686,34 +682,27 @@ ntfs_3g_create_nondirectories(struct list_head *dentry_list, } static int -ntfs_3g_begin_extract_stream_to_attr(struct wim_lookup_table_entry *stream, - ntfs_inode *ni, - struct wim_inode *inode, - ntfschar *stream_name, - struct ntfs_3g_apply_ctx *ctx) +ntfs_3g_begin_extract_blob_instance(struct blob_descriptor *blob, + ntfs_inode *ni, + struct wim_inode *inode, + const struct wim_inode_stream *strm, + struct ntfs_3g_apply_ctx *ctx) { struct wim_dentry *one_dentry = inode_first_extraction_dentry(inode); - size_t stream_name_nchars = 0; + size_t stream_name_nchars; ntfs_attr *attr; - if (stream_name) - for (const ntfschar *p = stream_name; *p; p++) - stream_name_nchars++; + if (unlikely(strm->stream_type == STREAM_TYPE_REPARSE_POINT)) { - if (stream_name_nchars == 0) - stream_name = AT_UNNAMED; - if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) - && (stream_name_nchars == 0)) - { - if (stream->size > REPARSE_DATA_MAX_SIZE) { + if (blob->size > REPARSE_DATA_MAX_SIZE) { ERROR("Reparse data of \"%s\" has size " "%"PRIu64" bytes (exceeds %u bytes)", dentry_full_path(one_dentry), - stream->size, REPARSE_DATA_MAX_SIZE); + blob->size, REPARSE_DATA_MAX_SIZE); return WIMLIB_ERR_INVALID_REPARSE_DATA; } ctx->reparse_ptr = ctx->rpbuf.rpdata; - ctx->rpbuf.rpdatalen = cpu_to_le16(stream->size); + ctx->rpbuf.rpdatalen = cpu_to_le16(blob->size); ctx->rpbuf.rpreserved = cpu_to_le16(0); ctx->ntfs_reparse_inodes[ctx->num_reparse_inodes] = ni; ctx->wim_reparse_inodes[ctx->num_reparse_inodes] = inode; @@ -721,8 +710,13 @@ ntfs_3g_begin_extract_stream_to_attr(struct wim_lookup_table_entry *stream, return 0; } + /* It's a data stream (may be unnamed or named). */ + wimlib_assert(strm->stream_type == STREAM_TYPE_DATA); + + stream_name_nchars = utf16le_len_chars(strm->stream_name); + if (stream_name_nchars && - (ntfs_attr_add(ni, AT_DATA, stream_name, + (ntfs_attr_add(ni, AT_DATA, strm->stream_name, stream_name_nchars, NULL, 0))) { ERROR_WITH_ERRNO("Failed to create named data stream of \"%s\"", @@ -730,22 +724,23 @@ ntfs_3g_begin_extract_stream_to_attr(struct wim_lookup_table_entry *stream, return WIMLIB_ERR_NTFS_3G; } - /* This should be ensured by extract_stream_list() */ - wimlib_assert(ctx->num_open_attrs < MAX_OPEN_STREAMS); + /* This should be ensured by extract_blob_list() */ + wimlib_assert(ctx->num_open_attrs < MAX_OPEN_FILES); - attr = ntfs_attr_open(ni, AT_DATA, stream_name, stream_name_nchars); + attr = ntfs_attr_open(ni, AT_DATA, strm->stream_name, + stream_name_nchars); if (!attr) { ERROR_WITH_ERRNO("Failed to open data stream of \"%s\"", dentry_full_path(one_dentry)); return WIMLIB_ERR_NTFS_3G; } ctx->open_attrs[ctx->num_open_attrs++] = attr; - ntfs_attr_truncate_solid(attr, stream->size); + ntfs_attr_truncate_solid(attr, blob->size); return 0; } static int -ntfs_3g_cleanup_stream_extract(struct ntfs_3g_apply_ctx *ctx) +ntfs_3g_cleanup_blob_extract(struct ntfs_3g_apply_ctx *ctx) { int ret = 0; @@ -798,24 +793,22 @@ ntfs_3g_open_inode(struct wim_inode *inode, struct ntfs_3g_apply_ctx *ctx) } static int -ntfs_3g_begin_extract_stream(struct wim_lookup_table_entry *stream, void *_ctx) +ntfs_3g_begin_extract_blob(struct blob_descriptor *blob, void *_ctx) { struct ntfs_3g_apply_ctx *ctx = _ctx; - const struct stream_owner *owners = stream_owners(stream); + const struct blob_extraction_target *targets = blob_extraction_targets(blob); int ret; + ntfs_inode *ni; - for (u32 i = 0; i < stream->out_refcnt; i++) { - struct wim_inode *inode = owners[i].inode; - ntfschar *stream_name = (ntfschar *)owners[i].stream_name; - ntfs_inode *ni; - + for (u32 i = 0; i < blob->out_refcnt; i++) { ret = WIMLIB_ERR_NTFS_3G; - ni = ntfs_3g_open_inode(inode, ctx); + ni = ntfs_3g_open_inode(targets[i].inode, ctx); if (!ni) goto out_cleanup; - ret = ntfs_3g_begin_extract_stream_to_attr(stream, ni, inode, - stream_name, ctx); + ret = ntfs_3g_begin_extract_blob_instance(blob, ni, + targets[i].inode, + targets[i].stream, ctx); if (ret) goto out_cleanup; } @@ -823,10 +816,10 @@ ntfs_3g_begin_extract_stream(struct wim_lookup_table_entry *stream, void *_ctx) goto out; out_cleanup: - ntfs_3g_cleanup_stream_extract(ctx); + ntfs_3g_cleanup_blob_extract(ctx); out: - for (u32 i = 0; i < stream->out_refcnt; i++) - owners[i].inode->i_visited = 0; + for (u32 i = 0; i < blob->out_refcnt; i++) + targets[i].inode->i_visited = 0; return ret; } @@ -851,8 +844,7 @@ ntfs_3g_extract_chunk(const void *chunk, size_t size, void *_ctx) } static int -ntfs_3g_end_extract_stream(struct wim_lookup_table_entry *stream, - int status, void *_ctx) +ntfs_3g_end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx) { struct ntfs_3g_apply_ctx *ctx = _ctx; int ret; @@ -869,7 +861,7 @@ ntfs_3g_end_extract_stream(struct wim_lookup_table_entry *stream, if (ntfs_set_ntfs_reparse_data(ctx->ntfs_reparse_inodes[i], (const char *)&ctx->rpbuf, - stream->size + REPARSE_DATA_OFFSET, + blob->size + REPARSE_DATA_OFFSET, 0)) { ERROR_WITH_ERRNO("Failed to set reparse " @@ -882,7 +874,7 @@ ntfs_3g_end_extract_stream(struct wim_lookup_table_entry *stream, } ret = 0; out: - if (ntfs_3g_cleanup_stream_extract(ctx) && !ret) { + if (ntfs_3g_cleanup_blob_extract(ctx) && !ret) { ERROR_WITH_ERRNO("Error writing data to NTFS volume"); ret = WIMLIB_ERR_NTFS_3G; } @@ -902,7 +894,6 @@ ntfs_3g_count_dentries(const struct list_head *dentry_list) { count++; } - } return count; @@ -950,16 +941,16 @@ ntfs_3g_extract(struct list_head *dentry_list, struct apply_ctx *_ctx) if (ret) goto out_unmount; - /* Extract streams. */ - struct read_stream_list_callbacks cbs = { - .begin_stream = ntfs_3g_begin_extract_stream, - .begin_stream_ctx = ctx, + /* Extract blobs. */ + struct read_blob_list_callbacks cbs = { + .begin_blob = ntfs_3g_begin_extract_blob, + .begin_blob_ctx = ctx, .consume_chunk = ntfs_3g_extract_chunk, .consume_chunk_ctx = ctx, - .end_stream = ntfs_3g_end_extract_stream, - .end_stream_ctx = ctx, + .end_blob = ntfs_3g_end_extract_blob, + .end_blob_ctx = ctx, }; - ret = extract_stream_list(&ctx->common, &cbs); + ret = extract_blob_list(&ctx->common, &cbs); /* We do not need a final pass to set timestamps because libntfs-3g does * not update timestamps automatically (exception: diff --git a/src/ntfs-3g_capture.c b/src/ntfs-3g_capture.c index 0417b0b8..a232772c 100644 --- a/src/ntfs-3g_capture.c +++ b/src/ntfs-3g_capture.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014 Eric Biggers + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -37,14 +37,15 @@ #include "wimlib/alloca.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/capture.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/ntfs_3g.h" #include "wimlib/paths.h" +#include "wimlib/reparse.h" #include "wimlib/security.h" static inline ntfschar * @@ -54,14 +55,14 @@ attr_record_name(ATTR_RECORD *ar) } static ntfs_attr * -open_ntfs_attr(ntfs_inode *ni, struct ntfs_location *loc) +open_ntfs_attr(ntfs_inode *ni, const struct ntfs_location *loc) { ntfs_attr *na; na = ntfs_attr_open(ni, - loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA, - loc->stream_name, - loc->stream_name_nchars); + (ATTR_TYPES)loc->attr_type, + loc->attr_name, + loc->attr_name_nchars); if (!na) { ERROR_WITH_ERRNO("Failed to open attribute of \"%"TS"\" in " "NTFS volume", loc->path); @@ -70,10 +71,10 @@ open_ntfs_attr(ntfs_inode *ni, struct ntfs_location *loc) } int -read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, void *cb_ctx) +read_ntfs_attribute_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx) { - struct ntfs_location *loc = lte->ntfs_loc; + const struct ntfs_location *loc = blob->ntfs_loc; ntfs_volume *vol = loc->ntfs_vol; ntfs_inode *ni; ntfs_attr *na; @@ -95,7 +96,7 @@ read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, goto out_close_ntfs_inode; } - pos = (loc->is_reparse_point) ? 8 : 0; + pos = (loc->attr_type == AT_REPARSE_POINT) ? REPARSE_DATA_OFFSET : 0; bytes_remaining = size; while (bytes_remaining) { s64 to_read = min(bytes_remaining, sizeof(buf)); @@ -150,24 +151,38 @@ out: } -/* Load the streams from a file or reparse point in the NTFS volume */ static int -capture_ntfs_streams(struct wim_inode *inode, - ntfs_inode *ni, - char *path, - size_t path_len, - struct list_head *unhashed_streams, - ntfs_volume *vol, - ATTR_TYPES type) +attr_type_to_wimlib_stream_type(ATTR_TYPES type) +{ + switch (type) { + case AT_DATA: + return STREAM_TYPE_DATA; + case AT_REPARSE_POINT: + return STREAM_TYPE_REPARSE_POINT; + default: + wimlib_assert(0); + return STREAM_TYPE_UNKNOWN; + } +} + +/* Load attributes of the specified type from a file in the NTFS volume */ +static int +load_ntfs_attrs_with_type(struct wim_inode *inode, + ntfs_inode *ni, + char *path, + size_t path_len, + struct list_head *unhashed_blobs, + ntfs_volume *vol, + ATTR_TYPES type) { ntfs_attr_search_ctx *actx; struct ntfs_location *ntfs_loc; int ret; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; - DEBUG("Capturing NTFS data streams from `%s'", path); + DEBUG("Loading NTFS attributes from \"%s\"", path); - /* Get context to search the streams of the NTFS file. */ + /* Get context to search the attributes of the NTFS file. */ actx = ntfs_attr_get_search_ctx(ni, NULL); if (!actx) { ERROR_WITH_ERRNO("Cannot get NTFS attribute search " @@ -175,17 +190,18 @@ capture_ntfs_streams(struct wim_inode *inode, return WIMLIB_ERR_NTFS_3G; } - /* Capture each data stream or reparse data stream. */ + /* Save each attribute */ while (!ntfs_attr_lookup(type, NULL, 0, CASE_SENSITIVE, 0, NULL, 0, actx)) { u64 data_size = ntfs_get_attribute_value_length(actx->attr); - u64 name_length = actx->attr->name_length; - u32 stream_id; + size_t name_nchars = actx->attr->name_length; + struct wim_inode_stream *strm; + const utf16lechar *stream_name = NO_STREAM_NAME; if (data_size == 0) { - /* Empty stream. No lookup table entry is needed. */ - lte = NULL; + /* Empty attribute. No blob is needed. */ + blob = NULL; ntfs_loc = NULL; } else { ntfs_loc = CALLOC(1, sizeof(*ntfs_loc)); @@ -194,90 +210,58 @@ capture_ntfs_streams(struct wim_inode *inode, goto out_put_actx; } ntfs_loc->ntfs_vol = vol; + ntfs_loc->attr_type = type; ntfs_loc->path = memdup(path, path_len + 1); if (!ntfs_loc->path) { ret = WIMLIB_ERR_NOMEM; goto out_free_ntfs_loc; } - if (name_length) { - ntfs_loc->stream_name = memdup(attr_record_name(actx->attr), - name_length * 2); - if (!ntfs_loc->stream_name) { + if (name_nchars) { + ntfs_loc->attr_name = + utf16le_dupz(attr_record_name(actx->attr), + name_nchars * sizeof(ntfschar)); + if (!ntfs_loc->attr_name) { ret = WIMLIB_ERR_NOMEM; goto out_free_ntfs_loc; } - ntfs_loc->stream_name_nchars = name_length; + ntfs_loc->attr_name_nchars = name_nchars; + stream_name = ntfs_loc->attr_name; } - lte = new_lookup_table_entry(); - if (!lte) { + blob = new_blob_descriptor(); + if (!blob) { ret = WIMLIB_ERR_NOMEM; goto out_free_ntfs_loc; } - lte->resource_location = RESOURCE_IN_NTFS_VOLUME; - lte->ntfs_loc = ntfs_loc; + blob->blob_location = BLOB_IN_NTFS_VOLUME; + blob->ntfs_loc = ntfs_loc; + blob->size = data_size; ntfs_loc = NULL; if (type == AT_REPARSE_POINT) { - if (data_size < 8) { - ERROR("Invalid reparse data on \"%s\" " - "(only %u bytes)!", path, (unsigned)data_size); + if (data_size < REPARSE_DATA_OFFSET) { + ERROR("Reparse data of \"%s\" " + "is invalid (only %u bytes)!", + path, (unsigned)data_size); ret = WIMLIB_ERR_NTFS_3G; - goto out_free_lte; + goto out_free_blob; } - lte->ntfs_loc->is_reparse_point = true; - lte->size = data_size - 8; - ret = read_reparse_tag(ni, lte->ntfs_loc, + blob->size -= REPARSE_DATA_OFFSET; + ret = read_reparse_tag(ni, blob->ntfs_loc, &inode->i_reparse_tag); if (ret) - goto out_free_lte; - } else { - lte->ntfs_loc->is_reparse_point = false; - lte->size = data_size; + goto out_free_blob; } } - if (name_length == 0) { - /* Unnamed data stream. Put the reference to it in the - * dentry's inode. */ - if (inode->i_lte) { - if (lte) { - if (!(inode->i_attributes & - FILE_ATTRIBUTE_REPARSE_POINT)) - { - WARNING("Found two un-named " - "data streams for \"%s\" " - "(sizes = %"PRIu64", " - "%"PRIu64")", - path, - inode->i_lte->size, - lte->size); - } - free_lookup_table_entry(lte); - continue; - } - } else { - stream_id = 0; - inode->i_lte = lte; - } - } else { - /* Named data stream. Put the reference to it in the - * alternate data stream entries */ - struct wim_ads_entry *new_ads_entry; - - new_ads_entry = inode_add_ads_utf16le(inode, - attr_record_name(actx->attr), - name_length * 2); - if (!new_ads_entry) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_lte; - } - wimlib_assert(new_ads_entry->stream_name_nbytes == name_length * 2); - stream_id = new_ads_entry->stream_id; - new_ads_entry->lte = lte; - } - if (lte) { - add_unhashed_stream(lte, inode, - stream_id, unhashed_streams); + + strm = inode_add_stream(inode, + attr_type_to_wimlib_stream_type(type), + stream_name, + blob); + if (!strm) { + ret = WIMLIB_ERR_NOMEM; + goto out_free_blob; } + prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs); } if (errno == ENOENT) { ret = 0; @@ -286,20 +270,20 @@ capture_ntfs_streams(struct wim_inode *inode, ret = WIMLIB_ERR_NTFS_3G; } goto out_put_actx; -out_free_lte: - free_lookup_table_entry(lte); +out_free_blob: + free_blob_descriptor(blob); out_free_ntfs_loc: if (ntfs_loc) { FREE(ntfs_loc->path); - FREE(ntfs_loc->stream_name); + FREE(ntfs_loc->attr_name); FREE(ntfs_loc); } out_put_actx: ntfs_attr_put_search_ctx(actx); if (ret == 0) - DEBUG("Successfully captured NTFS streams from \"%s\"", path); + DEBUG("Successfully loaded NTFS attributes from \"%s\"", path); else - ERROR("Failed to capture NTFS streams from \"%s\"", path); + ERROR("Failed to load NTFS attributes from \"%s\"", path); return ret; } @@ -513,10 +497,7 @@ out: return ret; } -/* Recursively build a WIM dentry tree corresponding to an NTFS volume. - * At the same time, update the WIM lookup table with lookup table entries for - * the NTFS streams, and build an array of security descriptors. - */ +/* Recursive scan routine for NTFS volumes */ static int build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret, ntfs_inode *ni, @@ -545,12 +526,11 @@ build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret, goto out; } - if ((attributes & (FILE_ATTRIBUTE_DIRECTORY | - FILE_ATTRIBUTE_ENCRYPTED)) == FILE_ATTRIBUTE_ENCRYPTED) - { + if (attributes & FILE_ATTRIBUTE_ENCRYPTED) { if (params->add_flags & WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE) { - ERROR("Can't archive unsupported encrypted file \"%s\"", path); + ERROR("Can't archive \"%s\" because NTFS-3g capture mode " + "does not support encrypted files and directories", path); ret = WIMLIB_ERR_UNSUPPORTED_FILE; goto out; } @@ -580,36 +560,28 @@ build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret, inode->i_last_write_time = le64_to_cpu(ni->last_data_change_time); inode->i_last_access_time = le64_to_cpu(ni->last_access_time); inode->i_attributes = attributes; - inode->i_resolved = 1; - - /* Capture streams. */ - if (attributes & FILE_ATTR_REPARSE_POINT) { - /* Capture reparse data stream. */ - ret = capture_ntfs_streams(inode, ni, path, path_len, - params->unhashed_streams, - vol, AT_REPARSE_POINT); + if (attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + /* Load the reparse point stream. */ + ret = load_ntfs_attrs_with_type(inode, ni, path, path_len, + params->unhashed_blobs, + vol, AT_REPARSE_POINT); if (ret) goto out; } - /* Capture data streams. - * - * Directories should not have an unnamed data stream, but they may have - * named data streams. - * - * Reparse points may have an unnamed data stream (which will be ignored - * in favor of the reparse data stream), and they also may have named - * data streams. + /* Load the data streams. * - * Regular files can have an unnamed data stream as well as named data + * Note: directories should not have an unnamed data stream, but they + * may have named data streams. Nondirectories (including reparse + * points) can have an unnamed data stream as well as named data * streams. */ - ret = capture_ntfs_streams(inode, ni, path, path_len, - params->unhashed_streams, vol, AT_DATA); + ret = load_ntfs_attrs_with_type(inode, ni, path, path_len, + params->unhashed_blobs, vol, AT_DATA); if (ret) goto out; - if (ni->mrec->flags & MFT_RECORD_IS_DIRECTORY) { + if (inode_is_directory(inode)) { /* Recurse to directory children */ s64 pos = 0; @@ -621,7 +593,7 @@ build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret, .dos_name_map = &dos_name_map, .vol = vol, .params = params, - .ret = 0, + .ret = 0, }; ret = ntfs_readdir(ni, &pos, &ctx, wim_ntfs_capture_filldir); if (ret) { @@ -702,7 +674,7 @@ out_progress: ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_OK, inode); out: if (unlikely(ret)) { - free_dentry_tree(root, params->lookup_table); + free_dentry_tree(root, params->blob_table); root = NULL; ret = report_capture_error(params, ret, path); } diff --git a/src/paths.c b/src/paths.c index 32770ac0..4b9db67f 100644 --- a/src/paths.c +++ b/src/paths.c @@ -113,7 +113,7 @@ do_canonicalize_path(const tchar *in, tchar *out) * * Examples (with WIM_PATH_SEPARATOR == '/'): * - * => / [ either NULL or empty string ] + * => / [ either NULL or empty string ] * / => / * \ => / * hello => /hello diff --git a/src/reference.c b/src/reference.c index 013b285d..ab0bab2b 100644 --- a/src/reference.c +++ b/src/reference.c @@ -1,7 +1,7 @@ /* * reference.c * - * Reference resources from external WIM file(s). + * Reference blobs from external WIM file(s). */ /* @@ -26,9 +26,9 @@ #endif #include "wimlib.h" +#include "wimlib/blob_table.h" #include "wimlib/error.h" #include "wimlib/glob.h" -#include "wimlib/lookup_table.h" #include "wimlib/wim.h" #define WIMLIB_REF_MASK_PUBLIC (WIMLIB_REF_FLAG_GLOB_ENABLE | \ @@ -36,10 +36,10 @@ struct reference_info { WIMStruct *dest_wim; - struct list_head new_streams; + struct list_head new_blobs; struct list_head new_subwims; int ref_flags; - struct wim_lookup_table *src_table; + struct blob_table *src_table; }; static void @@ -47,7 +47,7 @@ init_reference_info(struct reference_info *info, WIMStruct *dest_wim, int ref_flags) { info->dest_wim = dest_wim; - INIT_LIST_HEAD(&info->new_streams); + INIT_LIST_HEAD(&info->new_blobs); INIT_LIST_HEAD(&info->new_subwims); info->ref_flags = ref_flags; } @@ -62,7 +62,7 @@ static void rollback_reference_info(struct reference_info *info) { WIMStruct *subwim; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; while (!list_empty(&info->new_subwims)) { subwim = list_first_entry(&info->new_subwims, @@ -71,13 +71,12 @@ rollback_reference_info(struct reference_info *info) wimlib_free(subwim); } - while (!list_empty(&info->new_streams)) { - lte = list_first_entry(&info->new_streams, - struct wim_lookup_table_entry, - lookup_table_list); - list_del(<e->lookup_table_list); - lookup_table_unlink(info->dest_wim->lookup_table, lte); - free_lookup_table_entry(lte); + while (!list_empty(&info->new_blobs)) { + blob = list_first_entry(&info->new_blobs, + struct blob_descriptor, blob_table_list); + list_del(&blob->blob_table_list); + blob_table_unlink(info->dest_wim->blob_table, blob); + free_blob_descriptor(blob); } } @@ -92,18 +91,16 @@ commit_or_rollback_reference_info(struct reference_info *info, int ret) } static bool -need_stream(const struct reference_info *info, - const struct wim_lookup_table_entry *lte) +need_blob(const struct reference_info *info, const struct blob_descriptor *blob) { - return !lookup_stream(info->dest_wim->lookup_table, lte->hash); + return !lookup_blob(info->dest_wim->blob_table, blob->hash); } static void -reference_stream(struct reference_info *info, - struct wim_lookup_table_entry *lte) +reference_blob(struct reference_info *info, struct blob_descriptor *blob) { - lookup_table_insert(info->dest_wim->lookup_table, lte); - list_add(<e->lookup_table_list, &info->new_streams); + blob_table_insert(info->dest_wim->blob_table, blob); + list_add(&blob->blob_table_list, &info->new_blobs); } static void @@ -113,15 +110,15 @@ reference_subwim(struct reference_info *info, WIMStruct *subwim) } static int -lte_clone_if_new(struct wim_lookup_table_entry *lte, void *_info) +blob_clone_if_new(struct blob_descriptor *blob, void *_info) { struct reference_info *info = _info; - if (need_stream(info, lte)) { - lte = clone_lookup_table_entry(lte); - if (unlikely(!lte)) + if (need_blob(info, blob)) { + blob = clone_blob_descriptor(blob); + if (unlikely(!blob)) return WIMLIB_ERR_NOMEM; - reference_stream(info, lte); + reference_blob(info, blob); } return 0; } @@ -151,8 +148,8 @@ wimlib_reference_resources(WIMStruct *wim, WIMStruct **resource_wims, init_reference_info(&info, wim, ref_flags); for (i = 0; i < num_resource_wims; i++) { - ret = for_lookup_table_entry(resource_wims[i]->lookup_table, - lte_clone_if_new, &info); + ret = for_blob_in_table(resource_wims[i]->blob_table, + blob_clone_if_new, &info); if (ret) break; } @@ -161,15 +158,15 @@ wimlib_reference_resources(WIMStruct *wim, WIMStruct **resource_wims, } static int -lte_gift(struct wim_lookup_table_entry *lte, void *_info) +blob_gift(struct blob_descriptor *blob, void *_info) { struct reference_info *info = _info; - lookup_table_unlink(info->src_table, lte); - if (need_stream(info, lte)) - reference_stream(info, lte); + blob_table_unlink(info->src_table, blob); + if (need_blob(info, blob)) + reference_blob(info, blob); else - free_lookup_table_entry(lte); + free_blob_descriptor(blob); return 0; } @@ -186,8 +183,8 @@ reference_resource_path(struct reference_info *info, const tchar *path, if (ret) return ret; - info->src_table = src_wim->lookup_table; - for_lookup_table_entry(src_wim->lookup_table, lte_gift, info); + info->src_table = src_wim->blob_table; + for_blob_in_table(src_wim->blob_table, blob_gift, info); reference_subwim(info, src_wim); return 0; } diff --git a/src/reparse.c b/src/reparse.c index ff46e9b1..2f1b4169 100644 --- a/src/reparse.c +++ b/src/reparse.c @@ -27,12 +27,12 @@ #include "wimlib/alloca.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/compiler.h" #include "wimlib/endianness.h" #include "wimlib/encoding.h" #include "wimlib/error.h" #include "wimlib/inode.h" -#include "wimlib/lookup_table.h" #include "wimlib/reparse.h" #include "wimlib/resource.h" @@ -157,41 +157,48 @@ make_reparse_buffer(const struct reparse_data * restrict rpdata, * * Note: in the WIM format, the first 8 bytes of the reparse point data buffer * are omitted, presumably because we already know the reparse tag from the - * dentry, and we already know the reparse tag length from the lookup table - * entry resource length. However, we reconstruct the first 8 bytes in the - * buffer returned by this function. + * dentry, and we already know the reparse tag length from the blob length. + * However, we reconstruct the first 8 bytes in the buffer returned by this + * function. */ -int +static int wim_inode_get_reparse_data(const struct wim_inode * restrict inode, u8 * restrict rpbuf, u16 * restrict rpbuflen_ret, - struct wim_lookup_table_entry *lte_override) + struct blob_descriptor *blob_override) { - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; int ret; struct reparse_buffer_disk *rpbuf_disk; u16 rpdatalen; wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT); - if (!lte_override) { - lte = inode_unnamed_lte_resolved(inode); - if (!lte) { + if (blob_override) { + blob = blob_override; + } else { + struct wim_inode_stream *strm; + + strm = inode_get_stream(inode, STREAM_TYPE_REPARSE_POINT, + NO_STREAM_NAME); + if (strm) + blob = stream_blob_resolved(strm); + else + blob = NULL; + if (!blob) { ERROR("Reparse point has no reparse data!"); return WIMLIB_ERR_INVALID_REPARSE_DATA; } - } else { - lte = lte_override; } - if (lte->size > REPARSE_POINT_MAX_SIZE - 8) { + if (blob->size > REPARSE_DATA_MAX_SIZE) { ERROR("Reparse data is too long!"); return WIMLIB_ERR_INVALID_REPARSE_DATA; } - rpdatalen = lte->size; + rpdatalen = blob->size; - /* Read the data from the WIM file */ - ret = read_full_stream_into_buf(lte, rpbuf + 8); + /* Read the reparse data from blob */ + ret = read_full_blob_into_buf(blob, rpbuf + REPARSE_DATA_OFFSET); if (ret) return ret; @@ -318,9 +325,9 @@ parse_substitute_name(const utf16lechar *substitute_name, * @bufsize * Available space in @buf, in bytes. * - * @lte_override - * If not NULL, the stream from which to read the reparse data. Otherwise, - * the reparse data will be read from the unnamed stream of @inode. + * @blob_override + * If not NULL, the blob from which to read the reparse data. Otherwise, + * the reparse data will be read from the reparse point stream of @inode. * * If the entire symbolic link target was placed in the buffer, returns the * number of bytes written. The resulting string is not null-terminated. If @@ -332,7 +339,7 @@ parse_substitute_name(const utf16lechar *substitute_name, ssize_t wim_inode_readlink(const struct wim_inode * restrict inode, char * restrict buf, size_t bufsize, - struct wim_lookup_table_entry *lte_override) + struct blob_descriptor *blob_override) { int ret; struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8); @@ -345,7 +352,7 @@ wim_inode_readlink(const struct wim_inode * restrict inode, wimlib_assert(inode_is_symlink(inode)); if (wim_inode_get_reparse_data(inode, (u8*)&rpbuf_disk, &rpbuflen, - lte_override)) + blob_override)) return -EIO; if (parse_reparse_data((const u8*)&rpbuf_disk, rpbuflen, &rpdata)) @@ -394,10 +401,11 @@ out_free_link_target: return ret; } +/* Given a UNIX-style symbolic link target, create a Windows-style reparse point + * buffer and assign it to the specified inode. */ int -wim_inode_set_symlink(struct wim_inode *inode, - const char *target, - struct wim_lookup_table *lookup_table) +wim_inode_set_symlink(struct wim_inode *inode, const char *target, + struct blob_table *blob_table) { struct reparse_buffer_disk rpbuf_disk _aligned_attribute(8); @@ -492,10 +500,13 @@ wim_inode_set_symlink(struct wim_inode *inode, ret = make_reparse_buffer(&rpdata, (u8*)&rpbuf_disk, &rpbuflen); if (ret == 0) { - ret = inode_set_unnamed_stream(inode, - (u8*)&rpbuf_disk + 8, - rpbuflen - 8, - lookup_table); + if (!inode_add_stream_with_data(inode, + STREAM_TYPE_REPARSE_POINT, + NO_STREAM_NAME, + (u8*)&rpbuf_disk + 8, + rpbuflen - 8, + blob_table)) + ret = WIMLIB_ERR_NOMEM; } FREE(name_utf16le); return ret; diff --git a/src/resource.c b/src/resource.c index f6e30fc8..f3e1349b 100644 --- a/src/resource.c +++ b/src/resource.c @@ -1,11 +1,11 @@ /* * resource.c * - * Code for reading streams and resources, including compressed WIM resources. + * Code for reading blobs and resources, including compressed WIM resources. */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * Copyright (C) 2012, 2013, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -32,21 +32,21 @@ #include "wimlib/alloca.h" #include "wimlib/assert.h" #include "wimlib/bitops.h" +#include "wimlib/blob_table.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/file_io.h" -#include "wimlib/lookup_table.h" #include "wimlib/resource.h" #include "wimlib/sha1.h" #include "wimlib/wim.h" #ifdef __WIN32__ -/* for read_winnt_file_prefix(), read_win32_encrypted_file_prefix() */ +/* for read_winnt_stream_prefix(), read_win32_encrypted_file_prefix() */ # include "wimlib/win32.h" #endif #ifdef WITH_NTFS_3G -/* for read_ntfs_file_prefix() */ +/* for read_ntfs_attribute_prefix() */ # include "wimlib/ntfs_3g.h" #endif @@ -71,7 +71,7 @@ * little-endian integers. * * - The chunk table is included in the compressed size of the resource provided - * in the corresponding entry in the WIM's stream lookup table. + * in the corresponding entry in the WIM's blob table. * * - The compressed size of a chunk is never greater than the uncompressed size. * From the compressor's point of view, chunks that would have compressed to a @@ -99,8 +99,8 @@ struct data_range { * * Read data from a compressed WIM resource. * - * @rspec - * Specification of the compressed WIM resource to read from. + * @rdesc + * Description of the compressed WIM resource to read from. * @ranges * Nonoverlapping, nonempty ranges of the uncompressed resource data to * read, sorted by increasing offset. @@ -125,7 +125,7 @@ struct data_range { * or other error code returned by the @cb function. */ static int -read_compressed_wim_resource(const struct wim_resource_spec * const rspec, +read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc, const struct data_range * const ranges, const size_t num_ranges, const consume_data_callback_t cb, @@ -143,17 +143,17 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, struct wimlib_decompressor *decompressor = NULL; /* Sanity checks */ - wimlib_assert(rspec != NULL); - wimlib_assert(resource_is_compressed(rspec)); + wimlib_assert(rdesc != NULL); + wimlib_assert(resource_is_compressed(rdesc)); wimlib_assert(cb != NULL); wimlib_assert(num_ranges != 0); for (size_t i = 0; i < num_ranges; i++) { DEBUG("Range %zu/%zu: %"PRIu64"@+%"PRIu64" / %"PRIu64, i + 1, num_ranges, ranges[i].size, ranges[i].offset, - rspec->uncompressed_size); + rdesc->uncompressed_size); wimlib_assert(ranges[i].size != 0); wimlib_assert(ranges[i].offset + ranges[i].size >= ranges[i].size); - wimlib_assert(ranges[i].offset + ranges[i].size <= rspec->uncompressed_size); + wimlib_assert(ranges[i].offset + ranges[i].size <= rdesc->uncompressed_size); } for (size_t i = 0; i < num_ranges - 1; i++) wimlib_assert(ranges[i].offset + ranges[i].size <= ranges[i + 1].offset); @@ -163,25 +163,25 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const u64 last_offset = ranges[num_ranges - 1].offset + ranges[num_ranges - 1].size - 1; /* Get the file descriptor for the WIM. */ - struct filedes * const in_fd = &rspec->wim->in_fd; + struct filedes * const in_fd = &rdesc->wim->in_fd; /* Determine if we're reading a pipable resource from a pipe or not. */ - const bool is_pipe_read = (rspec->is_pipable && !filedes_is_seekable(in_fd)); + const bool is_pipe_read = (rdesc->is_pipable && !filedes_is_seekable(in_fd)); /* Determine if the chunk table is in an alternate format. */ - const bool alt_chunk_table = (rspec->flags & WIM_RESHDR_FLAG_SOLID) + const bool alt_chunk_table = (rdesc->flags & WIM_RESHDR_FLAG_SOLID) && !is_pipe_read; /* Get the maximum size of uncompressed chunks in this resource, which * we require be a power of 2. */ - u64 cur_read_offset = rspec->offset_in_wim; - int ctype = rspec->compression_type; - u32 chunk_size = rspec->chunk_size; + u64 cur_read_offset = rdesc->offset_in_wim; + int ctype = rdesc->compression_type; + u32 chunk_size = rdesc->chunk_size; if (alt_chunk_table) { /* Alternate chunk table format. Its header specifies the chunk * size and compression format. Note: it could be read here; - * however, the relevant data was already loaded into @rspec by - * read_wim_lookup_table(). */ + * however, the relevant data was already loaded into @rdesc by + * read_blob_table(). */ cur_read_offset += sizeof(struct alt_chunk_table_header_disk); } @@ -195,13 +195,13 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, } /* Get valid decompressor. */ - if (ctype == rspec->wim->decompressor_ctype && - chunk_size == rspec->wim->decompressor_max_block_size) + if (ctype == rdesc->wim->decompressor_ctype && + chunk_size == rdesc->wim->decompressor_max_block_size) { /* Cached decompressor. */ - decompressor = rspec->wim->decompressor; - rspec->wim->decompressor_ctype = WIMLIB_COMPRESSION_TYPE_NONE; - rspec->wim->decompressor = NULL; + decompressor = rdesc->wim->decompressor; + rdesc->wim->decompressor_ctype = WIMLIB_COMPRESSION_TYPE_NONE; + rdesc->wim->decompressor = NULL; } else { ret = wimlib_create_decompressor(ctype, chunk_size, &decompressor); @@ -215,7 +215,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const u32 chunk_order = fls32(chunk_size); /* Calculate the total number of chunks the resource is divided into. */ - const u64 num_chunks = (rspec->uncompressed_size + chunk_size - 1) >> chunk_order; + const u64 num_chunks = (rdesc->uncompressed_size + chunk_size - 1) >> chunk_order; /* Calculate the 0-based indices of the first and last chunks containing * data that needs to be passed to the callback. */ @@ -241,7 +241,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Set the size of each chunk table entry based on the resource's * uncompressed size. */ - const u64 chunk_entry_size = get_chunk_entry_size(rspec->uncompressed_size, + const u64 chunk_entry_size = get_chunk_entry_size(rdesc->uncompressed_size, alt_chunk_table); /* Calculate the size of the chunk table in bytes. */ @@ -309,7 +309,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, const u64 file_offset_of_needed_chunk_entries = cur_read_offset + (first_chunk_entry_to_read * chunk_entry_size) - + (rspec->is_pipable ? (rspec->size_in_wim - chunk_table_size) : 0); + + (rdesc->is_pipable ? (rdesc->size_in_wim - chunk_table_size) : 0); void * const chunk_table_data = (u8*)chunk_offsets + @@ -357,7 +357,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Set offset to beginning of first chunk to read. */ cur_read_offset += chunk_offsets[0]; - if (rspec->is_pipable) + if (rdesc->is_pipable) cur_read_offset += read_start_chunk * sizeof(struct pwm_chunk_hdr); else cur_read_offset += chunk_table_size; @@ -397,8 +397,8 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, /* Calculate uncompressed size of next chunk. */ u32 chunk_usize; - if ((i == num_chunks - 1) && (rspec->uncompressed_size & (chunk_size - 1))) - chunk_usize = (rspec->uncompressed_size & (chunk_size - 1)); + if ((i == num_chunks - 1) && (rdesc->uncompressed_size & (chunk_size - 1))) + chunk_usize = (rdesc->uncompressed_size & (chunk_size - 1)); else chunk_usize = chunk_size; @@ -414,10 +414,10 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, chunk_csize = le32_to_cpu(chunk_hdr.compressed_size); } else { if (i == num_chunks - 1) { - chunk_csize = rspec->size_in_wim - + chunk_csize = rdesc->size_in_wim - chunk_table_full_size - chunk_offsets[i - read_start_chunk]; - if (rspec->is_pipable) + if (rdesc->is_pipable) chunk_csize -= num_chunks * sizeof(struct pwm_chunk_hdr); } else { chunk_csize = chunk_offsets[i + 1 - read_start_chunk] - @@ -430,7 +430,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, ret = WIMLIB_ERR_DECOMPRESSION; goto out_free_memory; } - if (rspec->is_pipable) + if (rdesc->is_pipable) cur_read_offset += sizeof(struct pwm_chunk_hdr); /* Offsets in the uncompressed resource at which this chunk @@ -519,7 +519,7 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, } if (is_pipe_read && - last_offset == rspec->uncompressed_size - 1 && + last_offset == rdesc->uncompressed_size - 1 && chunk_table_size) { u8 dummy; @@ -537,10 +537,10 @@ read_compressed_wim_resource(const struct wim_resource_spec * const rspec, out_free_memory: errno_save = errno; if (decompressor) { - wimlib_free_decompressor(rspec->wim->decompressor); - rspec->wim->decompressor = decompressor; - rspec->wim->decompressor_ctype = ctype; - rspec->wim->decompressor_max_block_size = chunk_size; + wimlib_free_decompressor(rdesc->wim->decompressor); + rdesc->wim->decompressor = decompressor; + rdesc->wim->decompressor_ctype = ctype; + rdesc->wim->decompressor_max_block_size = chunk_size; } if (chunk_offsets_malloced) FREE(chunk_offsets); @@ -628,8 +628,8 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx) * Read a range of data from an uncompressed or compressed resource in a WIM * file. * - * @rspec - * Specification of the WIM resource to read from. + * @rdesc + * Description of the WIM resource to read from. * @offset * Offset within the uncompressed resource at which to start reading. * @size @@ -652,29 +652,29 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx) * or other error code returned by the @cb function. */ static int -read_partial_wim_resource(const struct wim_resource_spec *rspec, +read_partial_wim_resource(const struct wim_resource_descriptor *rdesc, u64 offset, u64 size, consume_data_callback_t cb, void *cb_ctx) { /* Sanity checks. */ wimlib_assert(offset + size >= offset); - wimlib_assert(offset + size <= rspec->uncompressed_size); + wimlib_assert(offset + size <= rdesc->uncompressed_size); DEBUG("Reading %"PRIu64" @ %"PRIu64" from WIM resource " "%"PRIu64" => %"PRIu64" @ %"PRIu64, - size, offset, rspec->uncompressed_size, - rspec->size_in_wim, rspec->offset_in_wim); + size, offset, rdesc->uncompressed_size, + rdesc->size_in_wim, rdesc->offset_in_wim); /* Trivial case. */ if (size == 0) return 0; - if (resource_is_compressed(rspec)) { + if (resource_is_compressed(rdesc)) { struct data_range range = { .offset = offset, .size = size, }; - return read_compressed_wim_resource(rspec, &range, 1, + return read_compressed_wim_resource(rdesc, &range, 1, cb, cb_ctx); } else { /* Reading uncompressed resource. For completeness, handle the @@ -684,23 +684,23 @@ read_partial_wim_resource(const struct wim_resource_spec *rspec, u64 zeroes_size; int ret; - if (likely(offset + size <= rspec->size_in_wim) || - rspec->is_pipable) + if (likely(offset + size <= rdesc->size_in_wim) || + rdesc->is_pipable) { read_size = size; zeroes_size = 0; } else { - if (offset >= rspec->size_in_wim) { + if (offset >= rdesc->size_in_wim) { read_size = 0; zeroes_size = size; } else { - read_size = rspec->size_in_wim - offset; - zeroes_size = offset + size - rspec->size_in_wim; + read_size = rdesc->size_in_wim - offset; + zeroes_size = offset + size - rdesc->size_in_wim; } } - ret = read_raw_file_data(&rspec->wim->in_fd, - rspec->offset_in_wim + offset, + ret = read_raw_file_data(&rdesc->wim->in_fd, + rdesc->offset_in_wim + offset, read_size, cb, cb_ctx); @@ -711,18 +711,18 @@ read_partial_wim_resource(const struct wim_resource_spec *rspec, } } -/* Read the specified range of uncompressed data from the specified stream, - * which must be located into a WIM file, into the specified buffer. */ +/* Read the specified range of uncompressed data from the specified blob, which + * must be located into a WIM file, into the specified buffer. */ int -read_partial_wim_stream_into_buf(const struct wim_lookup_table_entry *lte, - size_t size, u64 offset, void *_buf) +read_partial_wim_blob_into_buf(const struct blob_descriptor *blob, + size_t size, u64 offset, void *_buf) { u8 *buf = _buf; - wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); + wimlib_assert(blob->blob_location == BLOB_IN_WIM); - return read_partial_wim_resource(lte->rspec, - lte->offset_in_res + offset, + return read_partial_wim_resource(blob->rdesc, + blob->offset_in_res + offset, size, bufferer_cb, &buf); @@ -736,52 +736,46 @@ skip_chunk_cb(const void *chunk, size_t size, void *_ctx) return 0; } -/* Skip over the data of the specified stream, which must correspond to a full - * WIM resource. */ +/* Skip over the data of the specified WIM resource. */ int -skip_wim_stream(struct wim_lookup_table_entry *lte) +skip_wim_resource(struct wim_resource_descriptor *rdesc) { - wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); - wimlib_assert(!(lte->flags & WIM_RESHDR_FLAG_SOLID)); - DEBUG("Skipping stream (size=%"PRIu64")", lte->size); - return read_partial_wim_resource(lte->rspec, - 0, - lte->rspec->uncompressed_size, - skip_chunk_cb, - NULL); + DEBUG("Skipping resource (size=%"PRIu64")", rdesc->uncompressed_size); + return read_partial_wim_resource(rdesc, 0, rdesc->uncompressed_size, + skip_chunk_cb, NULL); } static int -read_wim_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, void *cb_ctx) +read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx) { - return read_partial_wim_resource(lte->rspec, lte->offset_in_res, size, + return read_partial_wim_resource(blob->rdesc, blob->offset_in_res, size, cb, cb_ctx); } -/* This function handles reading stream data that is located in an external - * file, such as a file that has been added to the WIM image through execution - * of a wimlib_add_command. +/* This function handles reading blob data that is located in an external file, + * such as a file that has been added to the WIM image through execution of a + * wimlib_add_command. * * This assumes the file can be accessed using the standard POSIX open(), * read(), and close(). On Windows this will not necessarily be the case (since * the file may need FILE_FLAG_BACKUP_SEMANTICS to be opened, or the file may be * encrypted), so Windows uses its own code for its equivalent case. */ static int -read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, +read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size, consume_data_callback_t cb, void *cb_ctx) { int ret; int raw_fd; struct filedes fd; - wimlib_assert(size <= lte->size); + wimlib_assert(size <= blob->size); - DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, lte->file_on_disk); + DEBUG("Reading %"PRIu64" bytes from \"%"TS"\"", size, blob->file_on_disk); - raw_fd = topen(lte->file_on_disk, O_BINARY | O_RDONLY); + raw_fd = topen(blob->file_on_disk, O_BINARY | O_RDONLY); if (raw_fd < 0) { - ERROR_WITH_ERRNO("Can't open \"%"TS"\"", lte->file_on_disk); + ERROR_WITH_ERRNO("Can't open \"%"TS"\"", blob->file_on_disk); return WIMLIB_ERR_OPEN; } filedes_init(&fd, raw_fd); @@ -792,23 +786,23 @@ read_file_on_disk_prefix(const struct wim_lookup_table_entry *lte, u64 size, #ifdef WITH_FUSE static int -read_staging_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, +read_staging_file_prefix(const struct blob_descriptor *blob, u64 size, consume_data_callback_t cb, void *cb_ctx) { int raw_fd; struct filedes fd; int ret; - wimlib_assert(size <= lte->size); + wimlib_assert(size <= blob->size); DEBUG("Reading %"PRIu64" bytes from staging file \"%s\"", - size, lte->staging_file_name); + size, blob->staging_file_name); - raw_fd = openat(lte->staging_dir_fd, lte->staging_file_name, + raw_fd = openat(blob->staging_dir_fd, blob->staging_file_name, O_RDONLY | O_NOFOLLOW); if (raw_fd < 0) { ERROR_WITH_ERRNO("Can't open staging file \"%s\"", - lte->staging_file_name); + blob->staging_file_name); return WIMLIB_ERR_OPEN; } filedes_init(&fd, raw_fd); @@ -818,25 +812,25 @@ read_staging_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, } #endif -/* This function handles the trivial case of reading stream data that is, in - * fact, already located in an in-memory buffer. */ +/* This function handles the trivial case of reading blob data that is, in fact, + * already located in an in-memory buffer. */ static int -read_buffer_prefix(const struct wim_lookup_table_entry *lte, +read_buffer_prefix(const struct blob_descriptor *blob, u64 size, consume_data_callback_t cb, void *cb_ctx) { - wimlib_assert(size <= lte->size); - return (*cb)(lte->attached_buffer, size, cb_ctx); + wimlib_assert(size <= blob->size); + return (*cb)(blob->attached_buffer, size, cb_ctx); } -typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry *lte, - u64 size, - consume_data_callback_t cb, - void *cb_ctx); +typedef int (*read_blob_prefix_handler_t)(const struct blob_descriptor *blob, + u64 size, + consume_data_callback_t cb, + void *cb_ctx); /* - * read_stream_prefix()- + * read_blob_prefix()- * - * Reads the first @size bytes from a generic "stream", which may be located in + * Reads the first @size bytes from a generic "blob", which may be located in * any one of several locations, such as in a WIM file (compressed or * uncompressed), in an external file, or directly in an in-memory buffer. * @@ -844,63 +838,61 @@ typedef int (*read_stream_prefix_handler_t)(const struct wim_lookup_table_entry * unspecified size. * * Returns 0 on success; nonzero on error. A nonzero value will be returned if - * the stream data cannot be successfully read (for a number of different - * reasons, depending on the stream location), or if @cb returned nonzero in - * which case that error code will be returned. + * the blob data cannot be successfully read (for a number of different reasons, + * depending on the blob location), or if @cb returned nonzero in which case + * that error code will be returned. */ static int -read_stream_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, void *cb_ctx) +read_blob_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx) { - static const read_stream_prefix_handler_t handlers[] = { - [RESOURCE_IN_WIM] = read_wim_stream_prefix, - [RESOURCE_IN_FILE_ON_DISK] = read_file_on_disk_prefix, - [RESOURCE_IN_ATTACHED_BUFFER] = read_buffer_prefix, + static const read_blob_prefix_handler_t handlers[] = { + [BLOB_IN_WIM] = read_wim_blob_prefix, + [BLOB_IN_FILE_ON_DISK] = read_file_on_disk_prefix, + [BLOB_IN_ATTACHED_BUFFER] = read_buffer_prefix, #ifdef WITH_FUSE - [RESOURCE_IN_STAGING_FILE] = read_staging_file_prefix, + [BLOB_IN_STAGING_FILE] = read_staging_file_prefix, #endif #ifdef WITH_NTFS_3G - [RESOURCE_IN_NTFS_VOLUME] = read_ntfs_file_prefix, + [BLOB_IN_NTFS_VOLUME] = read_ntfs_attribute_prefix, #endif #ifdef __WIN32__ - [RESOURCE_IN_WINNT_FILE_ON_DISK] = read_winnt_file_prefix, - [RESOURCE_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix, + [BLOB_IN_WINNT_FILE_ON_DISK] = read_winnt_stream_prefix, + [BLOB_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix, #endif }; - wimlib_assert(lte->resource_location < ARRAY_LEN(handlers) - && handlers[lte->resource_location] != NULL); - return handlers[lte->resource_location](lte, size, cb, cb_ctx); + wimlib_assert(blob->blob_location < ARRAY_LEN(handlers) + && handlers[blob->blob_location] != NULL); + return handlers[blob->blob_location](blob, size, cb, cb_ctx); } -/* Read the full uncompressed data of the specified stream into the specified - * buffer, which must have space for at least lte->size bytes. */ +/* Read the full uncompressed data of the specified blob into the specified + * buffer, which must have space for at least blob->size bytes. */ int -read_full_stream_into_buf(const struct wim_lookup_table_entry *lte, void *_buf) +read_full_blob_into_buf(const struct blob_descriptor *blob, void *_buf) { u8 *buf = _buf; - return read_stream_prefix(lte, lte->size, bufferer_cb, &buf); + return read_blob_prefix(blob, blob->size, bufferer_cb, &buf); } -/* Retrieve the full uncompressed data of the specified stream. A buffer large +/* Retrieve the full uncompressed data of the specified blob. A buffer large * enough hold the data is allocated and returned in @buf_ret. */ int -read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte, - void **buf_ret) +read_full_blob_into_alloc_buf(const struct blob_descriptor *blob, void **buf_ret) { int ret; void *buf; - if ((size_t)lte->size != lte->size) { - ERROR("Can't read %"PRIu64" byte stream into " - "memory", lte->size); + if ((size_t)blob->size != blob->size) { + ERROR("Can't read %"PRIu64" byte blob into memory", blob->size); return WIMLIB_ERR_NOMEM; } - buf = MALLOC(lte->size); + buf = MALLOC(blob->size); if (buf == NULL) return WIMLIB_ERR_NOMEM; - ret = read_full_stream_into_buf(lte, buf); + ret = read_full_blob_into_buf(blob, buf); if (ret) { FREE(buf); return ret; @@ -910,115 +902,102 @@ read_full_stream_into_alloc_buf(const struct wim_lookup_table_entry *lte, return 0; } -/* Retrieve the full uncompressed data of the specified WIM resource. A buffer - * large enough hold the data is allocated and returned in @buf_ret. */ -static int -wim_resource_spec_to_data(struct wim_resource_spec *rspec, void **buf_ret) +/* Retrieve the full uncompressed data of a WIM resource specified as a raw + * `wim_reshdr' and the corresponding WIM file. A buffer large enough hold the + * data is allocated and returned in @buf_ret. */ +int +wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret) { + struct wim_resource_descriptor rdesc; + struct blob_descriptor *blob; int ret; - struct wim_lookup_table_entry *lte; - lte = new_lookup_table_entry(); - if (lte == NULL) + wim_res_hdr_to_desc(reshdr, wim, &rdesc); + + blob = new_blob_descriptor(); + if (!blob) return WIMLIB_ERR_NOMEM; - lte_bind_wim_resource_spec(lte, rspec); - lte->flags = rspec->flags; - lte->size = rspec->uncompressed_size; - lte->offset_in_res = 0; + blob_set_is_located_in_wim_resource(blob, &rdesc); + blob->flags = rdesc.flags; + blob->size = rdesc.uncompressed_size; + blob->offset_in_res = 0; - ret = read_full_stream_into_alloc_buf(lte, buf_ret); + ret = read_full_blob_into_alloc_buf(blob, buf_ret); - lte_unbind_wim_resource_spec(lte); - free_lookup_table_entry(lte); + blob_unset_is_located_in_wim_resource(blob); + free_blob_descriptor(blob); return ret; } -/* Retrieve the full uncompressed data of a WIM resource specified as a raw - * `wim_reshdr' and the corresponding WIM file. A large enough hold the data is - * allocated and returned in @buf_ret. */ -int -wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim, void **buf_ret) -{ - DEBUG("offset_in_wim=%"PRIu64", size_in_wim=%"PRIu64", " - "uncompressed_size=%"PRIu64, - reshdr->offset_in_wim, reshdr->size_in_wim, - reshdr->uncompressed_size); - - struct wim_resource_spec rspec; - wim_res_hdr_to_spec(reshdr, wim, &rspec); - return wim_resource_spec_to_data(&rspec, buf_ret); -} - int wim_reshdr_to_hash(const struct wim_reshdr *reshdr, WIMStruct *wim, u8 hash[SHA1_HASH_SIZE]) { - struct wim_resource_spec rspec; + struct wim_resource_descriptor rdesc; int ret; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; - wim_res_hdr_to_spec(reshdr, wim, &rspec); + wim_res_hdr_to_desc(reshdr, wim, &rdesc); - lte = new_lookup_table_entry(); - if (lte == NULL) + blob = new_blob_descriptor(); + if (blob == NULL) return WIMLIB_ERR_NOMEM; - lte_bind_wim_resource_spec(lte, &rspec); - lte->flags = rspec.flags; - lte->size = rspec.uncompressed_size; - lte->offset_in_res = 0; - lte->unhashed = 1; + blob_set_is_located_in_wim_resource(blob, &rdesc); + blob->flags = rdesc.flags; + blob->size = rdesc.uncompressed_size; + blob->offset_in_res = 0; + blob->unhashed = 1; - ret = sha1_stream(lte); + ret = sha1_blob(blob); - lte_unbind_wim_resource_spec(lte); - copy_hash(hash, lte->hash); - free_lookup_table_entry(lte); + blob_unset_is_located_in_wim_resource(blob); + copy_hash(hash, blob->hash); + free_blob_descriptor(blob); return ret; } -struct streamifier_context { - struct read_stream_list_callbacks cbs; - struct wim_lookup_table_entry *cur_stream; - struct wim_lookup_table_entry *next_stream; - u64 cur_stream_offset; - struct wim_lookup_table_entry *final_stream; +struct blobifier_context { + struct read_blob_list_callbacks cbs; + struct blob_descriptor *cur_blob; + struct blob_descriptor *next_blob; + u64 cur_blob_offset; + struct blob_descriptor *final_blob; size_t list_head_offset; }; -static struct wim_lookup_table_entry * -next_stream(struct wim_lookup_table_entry *lte, size_t list_head_offset) +static struct blob_descriptor * +next_blob(struct blob_descriptor *blob, size_t list_head_offset) { struct list_head *cur; - cur = (struct list_head*)((u8*)lte + list_head_offset); + cur = (struct list_head*)((u8*)blob + list_head_offset); - return (struct wim_lookup_table_entry*)((u8*)cur->next - list_head_offset); + return (struct blob_descriptor*)((u8*)cur->next - list_head_offset); } /* A consume_data_callback_t implementation that translates raw resource data - * into streams, calling the begin_stream, consume_chunk, and end_stream - * callback functions as appropriate. */ + * into blobs, calling the begin_blob, consume_chunk, and end_blob callback + * functions as appropriate. */ static int -streamifier_cb(const void *chunk, size_t size, void *_ctx) +blobifier_cb(const void *chunk, size_t size, void *_ctx) { - struct streamifier_context *ctx = _ctx; + struct blobifier_context *ctx = _ctx; int ret; - DEBUG("%zu bytes passed to streamifier", size); + DEBUG("%zu bytes passed to blobifier", size); - wimlib_assert(ctx->cur_stream != NULL); - wimlib_assert(size <= ctx->cur_stream->size - ctx->cur_stream_offset); + wimlib_assert(ctx->cur_blob != NULL); + wimlib_assert(size <= ctx->cur_blob->size - ctx->cur_blob_offset); - if (ctx->cur_stream_offset == 0) { + if (ctx->cur_blob_offset == 0) { - /* Starting a new stream. */ - DEBUG("Begin new stream (size=%"PRIu64").", - ctx->cur_stream->size); + /* Starting a new blob. */ + DEBUG("Begin new blob (size=%"PRIu64").", ctx->cur_blob->size); - ret = (*ctx->cbs.begin_stream)(ctx->cur_stream, - ctx->cbs.begin_stream_ctx); + ret = (*ctx->cbs.begin_blob)(ctx->cur_blob, + ctx->cbs.begin_blob_ctx); if (ret) return ret; } @@ -1026,29 +1005,29 @@ streamifier_cb(const void *chunk, size_t size, void *_ctx) /* Consume the chunk. */ ret = (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); - ctx->cur_stream_offset += size; + ctx->cur_blob_offset += size; if (ret) return ret; - if (ctx->cur_stream_offset == ctx->cur_stream->size) { - /* Finished reading all the data for a stream. */ + if (ctx->cur_blob_offset == ctx->cur_blob->size) { + /* Finished reading all the data for a blob. */ - ctx->cur_stream_offset = 0; + ctx->cur_blob_offset = 0; - DEBUG("End stream (size=%"PRIu64").", ctx->cur_stream->size); - ret = (*ctx->cbs.end_stream)(ctx->cur_stream, 0, - ctx->cbs.end_stream_ctx); + DEBUG("End blob (size=%"PRIu64").", ctx->cur_blob->size); + ret = (*ctx->cbs.end_blob)(ctx->cur_blob, 0, + ctx->cbs.end_blob_ctx); if (ret) return ret; - /* Advance to next stream. */ - ctx->cur_stream = ctx->next_stream; - if (ctx->cur_stream != NULL) { - if (ctx->cur_stream != ctx->final_stream) - ctx->next_stream = next_stream(ctx->cur_stream, - ctx->list_head_offset); + /* Advance to next blob. */ + ctx->cur_blob = ctx->next_blob; + if (ctx->cur_blob != NULL) { + if (ctx->cur_blob != ctx->final_blob) + ctx->next_blob = next_blob(ctx->cur_blob, + ctx->list_head_offset); else - ctx->next_stream = NULL; + ctx->next_blob = NULL; } } return 0; @@ -1057,28 +1036,28 @@ streamifier_cb(const void *chunk, size_t size, void *_ctx) struct hasher_context { SHA_CTX sha_ctx; int flags; - struct read_stream_list_callbacks cbs; + struct read_blob_list_callbacks cbs; }; -/* Callback for starting to read a stream while calculating its SHA1 message +/* Callback for starting to read a blob while calculating its SHA-1 message * digest. */ static int -hasher_begin_stream(struct wim_lookup_table_entry *lte, void *_ctx) +hasher_begin_blob(struct blob_descriptor *blob, void *_ctx) { struct hasher_context *ctx = _ctx; sha1_init(&ctx->sha_ctx); - if (ctx->cbs.begin_stream == NULL) + if (ctx->cbs.begin_blob == NULL) return 0; else - return (*ctx->cbs.begin_stream)(lte, ctx->cbs.begin_stream_ctx); + return (*ctx->cbs.begin_blob)(blob, ctx->cbs.begin_blob_ctx); } -/* A consume_data_callback_t implementation that continues calculating the SHA1 - * message digest of the stream being read, then optionally passes the data on - * to another consume_data_callback_t implementation. This allows checking the - * SHA1 message digest of a stream being extracted, for example. */ +/* A consume_data_callback_t implementation that continues calculating the SHA-1 + * message digest of the blob being read, then optionally passes the data on to + * another consume_data_callback_t implementation. This allows checking the + * SHA-1 message digest of a blob being extracted, for example. */ static int hasher_consume_chunk(const void *chunk, size_t size, void *_ctx) { @@ -1091,126 +1070,127 @@ hasher_consume_chunk(const void *chunk, size_t size, void *_ctx) return (*ctx->cbs.consume_chunk)(chunk, size, ctx->cbs.consume_chunk_ctx); } -/* Callback for finishing reading a stream while calculating its SHA1 message +/* Callback for finishing reading a blob while calculating its SHA-1 message * digest. */ static int -hasher_end_stream(struct wim_lookup_table_entry *lte, int status, void *_ctx) +hasher_end_blob(struct blob_descriptor *blob, int status, void *_ctx) { struct hasher_context *ctx = _ctx; u8 hash[SHA1_HASH_SIZE]; int ret; if (status) { - /* Error occurred; the full stream may not have been read. */ + /* Error occurred; the full blob may not have been read. */ ret = status; goto out_next_cb; } - /* Retrieve the final SHA1 message digest. */ + /* Retrieve the final SHA-1 message digest. */ sha1_final(hash, &ctx->sha_ctx); - if (lte->unhashed) { - if (ctx->flags & COMPUTE_MISSING_STREAM_HASHES) { - /* No SHA1 message digest was previously present for the - * stream. Set it to the one just calculated. */ - DEBUG("Set SHA1 message digest for stream " - "(size=%"PRIu64").", lte->size); - copy_hash(lte->hash, hash); + if (blob->unhashed) { + if (ctx->flags & COMPUTE_MISSING_BLOB_HASHES) { + /* No SHA-1 message digest was previously present for the + * blob. Set it to the one just calculated. */ + DEBUG("Set SHA-1 message digest for blob " + "(size=%"PRIu64").", blob->size); + copy_hash(blob->hash, hash); } } else { - if (ctx->flags & VERIFY_STREAM_HASHES) { - /* The stream already had a SHA1 message digest present. Verify - * that it is the same as the calculated value. */ - if (!hashes_equal(hash, lte->hash)) { + if (ctx->flags & VERIFY_BLOB_HASHES) { + /* The blob already had a SHA-1 message digest present. + * Verify that it is the same as the calculated value. + */ + if (!hashes_equal(hash, blob->hash)) { if (wimlib_print_errors) { tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1]; tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1]; - sprint_hash(lte->hash, expected_hashstr); + sprint_hash(blob->hash, expected_hashstr); sprint_hash(hash, actual_hashstr); - ERROR("The stream is corrupted!\n" - " (Expected SHA1=%"TS",\n" - " got SHA1=%"TS")", + ERROR("The blob is corrupted!\n" + " (Expected SHA-1=%"TS",\n" + " got SHA-1=%"TS")", expected_hashstr, actual_hashstr); } ret = WIMLIB_ERR_INVALID_RESOURCE_HASH; errno = EINVAL; goto out_next_cb; } - DEBUG("SHA1 message digest okay for " - "stream (size=%"PRIu64").", lte->size); + DEBUG("SHA-1 message digest okay for " + "blob (size=%"PRIu64").", blob->size); } } ret = 0; out_next_cb: - if (ctx->cbs.end_stream == NULL) + if (ctx->cbs.end_blob == NULL) return ret; else - return (*ctx->cbs.end_stream)(lte, ret, ctx->cbs.end_stream_ctx); + return (*ctx->cbs.end_blob)(blob, ret, ctx->cbs.end_blob_ctx); } static int -read_full_stream_with_cbs(struct wim_lookup_table_entry *lte, - const struct read_stream_list_callbacks *cbs) +read_full_blob_with_cbs(struct blob_descriptor *blob, + const struct read_blob_list_callbacks *cbs) { int ret; - ret = (*cbs->begin_stream)(lte, cbs->begin_stream_ctx); + ret = (*cbs->begin_blob)(blob, cbs->begin_blob_ctx); if (ret) return ret; - ret = read_stream_prefix(lte, lte->size, cbs->consume_chunk, - cbs->consume_chunk_ctx); + ret = read_blob_prefix(blob, blob->size, cbs->consume_chunk, + cbs->consume_chunk_ctx); - return (*cbs->end_stream)(lte, ret, cbs->end_stream_ctx); + return (*cbs->end_blob)(blob, ret, cbs->end_blob_ctx); } -/* Read the full data of the specified stream, passing the data into the - * specified callbacks (all of which are optional) and either checking or - * computing the SHA1 message digest of the stream. */ +/* Read the full data of the specified blob, passing the data into the specified + * callbacks (all of which are optional) and either checking or computing the + * SHA-1 message digest of the blob. */ static int -read_full_stream_with_sha1(struct wim_lookup_table_entry *lte, - const struct read_stream_list_callbacks *cbs) +read_full_blob_with_sha1(struct blob_descriptor *blob, + const struct read_blob_list_callbacks *cbs) { struct hasher_context hasher_ctx = { - .flags = VERIFY_STREAM_HASHES | COMPUTE_MISSING_STREAM_HASHES, + .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES, .cbs = *cbs, }; - struct read_stream_list_callbacks hasher_cbs = { - .begin_stream = hasher_begin_stream, - .begin_stream_ctx = &hasher_ctx, + struct read_blob_list_callbacks hasher_cbs = { + .begin_blob = hasher_begin_blob, + .begin_blob_ctx = &hasher_ctx, .consume_chunk = hasher_consume_chunk, .consume_chunk_ctx = &hasher_ctx, - .end_stream = hasher_end_stream, - .end_stream_ctx = &hasher_ctx, + .end_blob = hasher_end_blob, + .end_blob_ctx = &hasher_ctx, }; - return read_full_stream_with_cbs(lte, &hasher_cbs); + return read_full_blob_with_cbs(blob, &hasher_cbs); } static int -read_streams_in_solid_resource(struct wim_lookup_table_entry *first_stream, - struct wim_lookup_table_entry *last_stream, - u64 stream_count, - size_t list_head_offset, - const struct read_stream_list_callbacks *sink_cbs) +read_blobs_in_solid_resource(struct blob_descriptor *first_blob, + struct blob_descriptor *last_blob, + u64 blob_count, + size_t list_head_offset, + const struct read_blob_list_callbacks *sink_cbs) { struct data_range *ranges; bool ranges_malloced; - struct wim_lookup_table_entry *cur_stream; + struct blob_descriptor *cur_blob; size_t i; int ret; u64 ranges_alloc_size; - DEBUG("Reading %"PRIu64" streams combined in same WIM resource", - stream_count); + DEBUG("Reading %"PRIu64" blobs combined in same WIM resource", + blob_count); - /* Setup data ranges array (one range per stream to read); this way - * read_compressed_wim_resource() does not need to be aware of streams. + /* Setup data ranges array (one range per blob to read); this way + * read_compressed_wim_resource() does not need to be aware of blobs. */ - ranges_alloc_size = stream_count * sizeof(ranges[0]); + ranges_alloc_size = blob_count * sizeof(ranges[0]); if (unlikely((size_t)ranges_alloc_size != ranges_alloc_size)) { - ERROR("Too many streams in one resource!"); + ERROR("Too many blobs in one resource!"); return WIMLIB_ERR_NOMEM; } if (likely(ranges_alloc_size <= STACK_MAX)) { @@ -1219,82 +1199,81 @@ read_streams_in_solid_resource(struct wim_lookup_table_entry *first_stream, } else { ranges = MALLOC(ranges_alloc_size); if (ranges == NULL) { - ERROR("Too many streams in one resource!"); + ERROR("Too many blobs in one resource!"); return WIMLIB_ERR_NOMEM; } ranges_malloced = true; } - for (i = 0, cur_stream = first_stream; - i < stream_count; - i++, cur_stream = next_stream(cur_stream, list_head_offset)) + for (i = 0, cur_blob = first_blob; + i < blob_count; + i++, cur_blob = next_blob(cur_blob, list_head_offset)) { - ranges[i].offset = cur_stream->offset_in_res; - ranges[i].size = cur_stream->size; + ranges[i].offset = cur_blob->offset_in_res; + ranges[i].size = cur_blob->size; } - struct streamifier_context streamifier_ctx = { + struct blobifier_context blobifier_ctx = { .cbs = *sink_cbs, - .cur_stream = first_stream, - .next_stream = next_stream(first_stream, list_head_offset), - .cur_stream_offset = 0, - .final_stream = last_stream, + .cur_blob = first_blob, + .next_blob = next_blob(first_blob, list_head_offset), + .cur_blob_offset = 0, + .final_blob = last_blob, .list_head_offset = list_head_offset, }; - ret = read_compressed_wim_resource(first_stream->rspec, + ret = read_compressed_wim_resource(first_blob->rdesc, ranges, - stream_count, - streamifier_cb, - &streamifier_ctx); + blob_count, + blobifier_cb, + &blobifier_ctx); if (ranges_malloced) FREE(ranges); if (ret) { - if (streamifier_ctx.cur_stream_offset != 0) { - ret = (*streamifier_ctx.cbs.end_stream) - (streamifier_ctx.cur_stream, + if (blobifier_ctx.cur_blob_offset != 0) { + ret = (*blobifier_ctx.cbs.end_blob) + (blobifier_ctx.cur_blob, ret, - streamifier_ctx.cbs.end_stream_ctx); + blobifier_ctx.cbs.end_blob_ctx); } } return ret; } /* - * Read a list of streams, each of which may be in any supported location (e.g. - * in a WIM or in an external file). Unlike read_stream_prefix() or the - * functions which call it, this function optimizes the case where multiple - * streams are combined into a single solid compressed WIM resource and reads - * them all consecutively, only decompressing the data one time. + * Read a list of blobs, each of which may be in any supported location (e.g. + * in a WIM or in an external file). This function optimizes the case where + * multiple blobs are combined into a single solid compressed WIM resource by + * reading the blobs in sequential order, only decompressing the solid resource + * one time. * - * @stream_list - * List of streams (represented as `struct wim_lookup_table_entry's) to - * read. + * @blob_list + * List of blobs to read. * @list_head_offset - * Offset of the `struct list_head' within each `struct - * wim_lookup_table_entry' that makes up the @stream_list. + * Offset of the `struct list_head' within each `struct blob_descriptor' that makes up + * the @blob_list. * @cbs - * Callback functions to accept the stream data. + * Callback functions to accept the blob data. * @flags * Bitwise OR of zero or more of the following flags: * - * VERIFY_STREAM_HASHES: - * For all streams being read that have already had SHA1 message - * digests computed, calculate the SHA1 message digest of the read + * VERIFY_BLOB_HASHES: + * For all blobs being read that have already had SHA-1 message + * digests computed, calculate the SHA-1 message digest of the read * data and compare it with the previously computed value. If they * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH. * - * COMPUTE_MISSING_STREAM_HASHES - * For all streams being read that have not yet had their SHA1 - * message digests computed, calculate and save their SHA1 message + * COMPUTE_MISSING_BLOB_HASHES + * For all blobs being read that have not yet had their SHA-1 + * message digests computed, calculate and save their SHA-1 message * digests. * - * STREAM_LIST_ALREADY_SORTED - * @stream_list is already sorted in sequential order for reading. + * BLOB_LIST_ALREADY_SORTED + * @blob_list is already sorted in sequential order for reading. * - * The callback functions are allowed to delete the current stream from the list + * The callback functions are allowed to delete the current blob from the list * if necessary. * * Returns 0 on success; a nonzero error code on failure. Failure can occur due @@ -1302,124 +1281,126 @@ read_streams_in_solid_resource(struct wim_lookup_table_entry *first_stream, * of the callback functions. */ int -read_stream_list(struct list_head *stream_list, +read_blob_list(struct list_head *blob_list, size_t list_head_offset, - const struct read_stream_list_callbacks *cbs, + const struct read_blob_list_callbacks *cbs, int flags) { int ret; struct list_head *cur, *next; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; struct hasher_context *hasher_ctx; - struct read_stream_list_callbacks *sink_cbs; + struct read_blob_list_callbacks *sink_cbs; - if (!(flags & STREAM_LIST_ALREADY_SORTED)) { - ret = sort_stream_list_by_sequential_order(stream_list, list_head_offset); + if (!(flags & BLOB_LIST_ALREADY_SORTED)) { + ret = sort_blob_list_by_sequential_order(blob_list, list_head_offset); if (ret) return ret; } - if (flags & (VERIFY_STREAM_HASHES | COMPUTE_MISSING_STREAM_HASHES)) { + if (flags & (VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES)) { hasher_ctx = alloca(sizeof(*hasher_ctx)); *hasher_ctx = (struct hasher_context) { .flags = flags, .cbs = *cbs, }; sink_cbs = alloca(sizeof(*sink_cbs)); - *sink_cbs = (struct read_stream_list_callbacks) { - .begin_stream = hasher_begin_stream, - .begin_stream_ctx = hasher_ctx, + *sink_cbs = (struct read_blob_list_callbacks) { + .begin_blob = hasher_begin_blob, + .begin_blob_ctx = hasher_ctx, .consume_chunk = hasher_consume_chunk, .consume_chunk_ctx = hasher_ctx, - .end_stream = hasher_end_stream, - .end_stream_ctx = hasher_ctx, + .end_blob = hasher_end_blob, + .end_blob_ctx = hasher_ctx, }; } else { - sink_cbs = (struct read_stream_list_callbacks*)cbs; + sink_cbs = (struct read_blob_list_callbacks*)cbs; } - for (cur = stream_list->next, next = cur->next; - cur != stream_list; + for (cur = blob_list->next, next = cur->next; + cur != blob_list; cur = next, next = cur->next) { - lte = (struct wim_lookup_table_entry*)((u8*)cur - list_head_offset); + blob = (struct blob_descriptor*)((u8*)cur - list_head_offset); - if (lte->flags & WIM_RESHDR_FLAG_SOLID && - lte->size != lte->rspec->uncompressed_size) + if (blob->flags & WIM_RESHDR_FLAG_SOLID && + blob->size != blob->rdesc->uncompressed_size) { - struct wim_lookup_table_entry *lte_next, *lte_last; + struct blob_descriptor *blob_next, *blob_last; struct list_head *next2; - u64 stream_count; + u64 blob_count; - /* The next stream is a proper sub-sequence of a WIM - * resource. See if there are other streams in the same + /* The next blob is a proper sub-sequence of a WIM + * resource. See if there are other blobs in the same * resource that need to be read. Since - * sort_stream_list_by_sequential_order() sorted the - * streams by offset in the WIM, this can be determined - * by simply scanning forward in the list. */ + * sort_blob_list_by_sequential_order() sorted the blobs + * by offset in the WIM, this can be determined by + * simply scanning forward in the list. */ - lte_last = lte; - stream_count = 1; + blob_last = blob; + blob_count = 1; for (next2 = next; - next2 != stream_list - && (lte_next = (struct wim_lookup_table_entry*) + next2 != blob_list + && (blob_next = (struct blob_descriptor*) ((u8*)next2 - list_head_offset), - lte_next->resource_location == RESOURCE_IN_WIM - && lte_next->rspec == lte->rspec); + blob_next->blob_location == BLOB_IN_WIM + && blob_next->rdesc == blob->rdesc); next2 = next2->next) { - lte_last = lte_next; - stream_count++; + blob_last = blob_next; + blob_count++; } - if (stream_count > 1) { - /* Reading multiple streams combined into a - * single WIM resource. They are in the stream - * list, sorted by offset; @lte specifies the - * first stream in the resource that needs to be - * read and @lte_last specifies the last stream - * in the resource that needs to be read. */ + if (blob_count > 1) { + /* Reading multiple blobs combined into a single + * WIM resource. They are in the blob list, + * sorted by offset; @blob specifies the first + * blob in the resource that needs to be read + * and @blob_last specifies the last blob in the + * resource that needs to be read. */ next = next2; - ret = read_streams_in_solid_resource(lte, lte_last, - stream_count, - list_head_offset, - sink_cbs); + ret = read_blobs_in_solid_resource(blob, blob_last, + blob_count, + list_head_offset, + sink_cbs); if (ret) return ret; continue; } } - ret = read_full_stream_with_cbs(lte, sink_cbs); - if (ret && ret != BEGIN_STREAM_STATUS_SKIP_STREAM) + ret = read_full_blob_with_cbs(blob, sink_cbs); + if (ret && ret != BEGIN_BLOB_STATUS_SKIP_BLOB) return ret; } return 0; } -/* Extract the first @size bytes of the specified stream. +/* + * Extract the first @size bytes of the specified blob. * - * If @size specifies the full uncompressed size of the stream, then the SHA1 - * message digest of the uncompressed stream is checked while being extracted. + * If @size specifies the full uncompressed size of the blob, then the SHA-1 + * message digest of the uncompressed blob is checked while being extracted. * - * The uncompressed data of the resource is passed in chunks of unspecified size - * to the @extract_chunk function, passing it @extract_chunk_arg. */ + * The uncompressed data of the blob is passed in chunks of unspecified size to + * the @extract_chunk function, passing it @extract_chunk_arg. + */ int -extract_stream(struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t extract_chunk, void *extract_chunk_arg) +extract_blob(struct blob_descriptor *blob, u64 size, + consume_data_callback_t extract_chunk, void *extract_chunk_arg) { - wimlib_assert(size <= lte->size); - if (size == lte->size) { - /* Do SHA1. */ - struct read_stream_list_callbacks cbs = { + wimlib_assert(size <= blob->size); + if (size == blob->size) { + /* Do SHA-1. */ + struct read_blob_list_callbacks cbs = { .consume_chunk = extract_chunk, .consume_chunk_ctx = extract_chunk_arg, }; - return read_full_stream_with_sha1(lte, &cbs); + return read_full_blob_with_sha1(blob, &cbs); } else { - /* Don't do SHA1. */ - return read_stream_prefix(lte, size, extract_chunk, - extract_chunk_arg); + /* Don't do SHA-1. */ + return read_blob_prefix(blob, size, extract_chunk, + extract_chunk_arg); } } @@ -1438,68 +1419,66 @@ extract_chunk_to_fd(const void *chunk, size_t size, void *_fd_p) return 0; } -/* Extract the first @size bytes of the specified stream to the specified file +/* Extract the first @size bytes of the specified blob to the specified file * descriptor. */ int -extract_stream_to_fd(struct wim_lookup_table_entry *lte, - struct filedes *fd, u64 size) +extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd, u64 size) { - return extract_stream(lte, size, extract_chunk_to_fd, fd); + return extract_blob(blob, size, extract_chunk_to_fd, fd); } -/* Extract the full uncompressed contents of the specified stream to the - * specified file descriptor. */ +/* Extract the full uncompressed contents of the specified blob to the specified + * file descriptor. */ int -extract_full_stream_to_fd(struct wim_lookup_table_entry *lte, - struct filedes *fd) +extract_full_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd) { - return extract_stream_to_fd(lte, fd, lte->size); + return extract_blob_to_fd(blob, fd, blob->size); } -/* Calculate the SHA1 message digest of a stream and store it in @lte->hash. */ +/* Calculate the SHA-1 message digest of a blob and store it in @blob->hash. */ int -sha1_stream(struct wim_lookup_table_entry *lte) +sha1_blob(struct blob_descriptor *blob) { - wimlib_assert(lte->unhashed); - struct read_stream_list_callbacks cbs = { + wimlib_assert(blob->unhashed); + struct read_blob_list_callbacks cbs = { }; - return read_full_stream_with_sha1(lte, &cbs); + return read_full_blob_with_sha1(blob, &cbs); } -/* Convert a short WIM resource header to a stand-alone WIM resource - * specification. +/* + * Convert a short WIM resource header to a stand-alone WIM resource descriptor. * * Note: for solid resources some fields still need to be overridden. */ void -wim_res_hdr_to_spec(const struct wim_reshdr *reshdr, WIMStruct *wim, - struct wim_resource_spec *rspec) +wim_res_hdr_to_desc(const struct wim_reshdr *reshdr, WIMStruct *wim, + struct wim_resource_descriptor *rdesc) { - rspec->wim = wim; - rspec->offset_in_wim = reshdr->offset_in_wim; - rspec->size_in_wim = reshdr->size_in_wim; - rspec->uncompressed_size = reshdr->uncompressed_size; - INIT_LIST_HEAD(&rspec->stream_list); - rspec->flags = reshdr->flags; - rspec->is_pipable = wim_is_pipable(wim); - if (rspec->flags & WIM_RESHDR_FLAG_COMPRESSED) { - rspec->compression_type = wim->compression_type; - rspec->chunk_size = wim->chunk_size; + rdesc->wim = wim; + rdesc->offset_in_wim = reshdr->offset_in_wim; + rdesc->size_in_wim = reshdr->size_in_wim; + rdesc->uncompressed_size = reshdr->uncompressed_size; + INIT_LIST_HEAD(&rdesc->blob_list); + rdesc->flags = reshdr->flags; + rdesc->is_pipable = wim_is_pipable(wim); + if (rdesc->flags & WIM_RESHDR_FLAG_COMPRESSED) { + rdesc->compression_type = wim->compression_type; + rdesc->chunk_size = wim->chunk_size; } else { - rspec->compression_type = WIMLIB_COMPRESSION_TYPE_NONE; - rspec->chunk_size = 0; + rdesc->compression_type = WIMLIB_COMPRESSION_TYPE_NONE; + rdesc->chunk_size = 0; } } -/* Convert a stand-alone resource specification to a WIM resource header. */ +/* Convert a stand-alone resource descriptor to a WIM resource header. */ void -wim_res_spec_to_hdr(const struct wim_resource_spec *rspec, +wim_res_desc_to_hdr(const struct wim_resource_descriptor *rdesc, struct wim_reshdr *reshdr) { - reshdr->offset_in_wim = rspec->offset_in_wim; - reshdr->size_in_wim = rspec->size_in_wim; - reshdr->flags = rspec->flags; - reshdr->uncompressed_size = rspec->uncompressed_size; + reshdr->offset_in_wim = rdesc->offset_in_wim; + reshdr->size_in_wim = rdesc->size_in_wim; + reshdr->flags = rdesc->flags; + reshdr->uncompressed_size = rdesc->uncompressed_size; } /* Translates a WIM resource header from the on-disk format into an in-memory diff --git a/src/solid.c b/src/solid.c index 01f22a5b..ab00b0fa 100644 --- a/src/solid.c +++ b/src/solid.c @@ -1,7 +1,7 @@ /* * solid.c * - * Heuristic sorting of streams to optimize solid compression. + * Heuristic sorting of blobs to optimize solid compression. */ /* @@ -25,10 +25,10 @@ # include "config.h" #endif +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/paths.h" #include "wimlib/solid.h" @@ -52,67 +52,65 @@ get_extension(const utf16lechar *name, size_t nbytes) /* * Sort order for solid compression: * - * 1. Streams without sort names + * 1. Blobs without sort names * - sorted by sequential order - * 2. Streams with sort names: - * a. Streams whose sort name does not have an extension + * 2. Blobs with sort names: + * a. Blobs whose sort name does not have an extension * - sorted by sort name - * b. Streams whose sort name has an extension + * b. Blobs whose sort name has an extension * - sorted primarily by extension (case insensitive), * secondarily by sort name (case insensitive) */ static int -cmp_streams_by_solid_sort_name(const void *p1, const void *p2) +cmp_blobs_by_solid_sort_name(const void *p1, const void *p2) { - const struct wim_lookup_table_entry *lte1, *lte2; + const struct blob_descriptor *blob1, *blob2; - lte1 = *(const struct wim_lookup_table_entry **)p1; - lte2 = *(const struct wim_lookup_table_entry **)p2; + blob1 = *(const struct blob_descriptor **)p1; + blob2 = *(const struct blob_descriptor **)p2; - if (lte1->solid_sort_name) { - if (!lte2->solid_sort_name) + if (blob1->solid_sort_name) { + if (!blob2->solid_sort_name) return 1; - const utf16lechar *extension1 = get_extension(lte1->solid_sort_name, - lte1->solid_sort_name_nbytes); - const utf16lechar *extension2 = get_extension(lte2->solid_sort_name, - lte2->solid_sort_name_nbytes); + const utf16lechar *extension1 = get_extension(blob1->solid_sort_name, + blob1->solid_sort_name_nbytes); + const utf16lechar *extension2 = get_extension(blob2->solid_sort_name, + blob2->solid_sort_name_nbytes); if (extension1) { if (!extension2) return 1; - int res = cmp_utf16le_strings(extension1, - utf16le_strlen(extension1) / sizeof(utf16lechar), - extension2, - utf16le_strlen(extension2) / sizeof(utf16lechar), - true); /* case insensitive */ + int res = cmp_utf16le_strings_z(extension1, + extension2, + true); /* case insensitive */ if (res) return res; } else { if (extension2) return -1; } - int res = cmp_utf16le_strings(lte1->solid_sort_name, - lte1->solid_sort_name_nbytes / sizeof(utf16lechar), - lte2->solid_sort_name, - lte2->solid_sort_name_nbytes / sizeof(utf16lechar), + int res = cmp_utf16le_strings(blob1->solid_sort_name, + blob1->solid_sort_name_nbytes / sizeof(utf16lechar), + blob2->solid_sort_name, + blob2->solid_sort_name_nbytes / sizeof(utf16lechar), true); /* case insensitive */ if (res) return res; } else { - if (lte2->solid_sort_name) + if (blob2->solid_sort_name) return -1; } - return cmp_streams_by_sequential_order(p1, p2); + return cmp_blobs_by_sequential_order(p1, p2); } static void -lte_set_solid_sort_name_from_inode(struct wim_lookup_table_entry *lte, - const struct wim_inode *inode) +blob_set_solid_sort_name_from_inode(struct blob_descriptor *blob, + const struct wim_inode *inode) { const struct wim_dentry *dentry; const utf16lechar *best_name = NULL; size_t best_name_nbytes = SIZE_MAX; - if (lte->solid_sort_name) /* Sort name already set? */ + if (blob->solid_sort_name) /* Sort name already set? */ return; /* If this file has multiple names, choose the shortest one. */ @@ -122,31 +120,31 @@ lte_set_solid_sort_name_from_inode(struct wim_lookup_table_entry *lte, best_name_nbytes = dentry->file_name_nbytes; } } - lte->solid_sort_name = utf16le_dupz(best_name, best_name_nbytes); - lte->solid_sort_name_nbytes = best_name_nbytes; + blob->solid_sort_name = utf16le_dupz(best_name, best_name_nbytes); + blob->solid_sort_name_nbytes = best_name_nbytes; } -struct temp_lookup_table { +struct temp_blob_table { struct hlist_head *table; size_t capacity; }; static int -dentry_fill_in_solid_sort_names(struct wim_dentry *dentry, void *_lookup_table) +dentry_fill_in_solid_sort_names(struct wim_dentry *dentry, void *_blob_table) { - const struct temp_lookup_table *lookup_table = _lookup_table; + const struct temp_blob_table *blob_table = _blob_table; const struct wim_inode *inode = dentry->d_inode; const u8 *hash; struct hlist_head *head; struct hlist_node *cur; - struct wim_lookup_table_entry *lte; - - hash = inode_unnamed_stream_hash(inode); - head = &lookup_table->table[load_size_t_unaligned(hash) % - lookup_table->capacity]; - hlist_for_each_entry(lte, cur, head, hash_list_2) { - if (hashes_equal(hash, lte->hash)) { - lte_set_solid_sort_name_from_inode(lte, inode); + struct blob_descriptor *blob; + + hash = inode_get_hash_of_unnamed_data_stream(inode); + head = &blob_table->table[load_size_t_unaligned(hash) % + blob_table->capacity]; + hlist_for_each_entry(blob, cur, head, hash_list_2) { + if (hashes_equal(hash, blob->hash)) { + blob_set_solid_sort_name_from_inode(blob, inode); break; } } @@ -162,55 +160,55 @@ image_fill_in_solid_sort_names(WIMStruct *wim) } int -sort_stream_list_for_solid_compression(struct list_head *stream_list) +sort_blob_list_for_solid_compression(struct list_head *blob_list) { - size_t num_streams = 0; - struct temp_lookup_table lookup_table; + size_t num_blobs = 0; + struct temp_blob_table blob_table; WIMStruct *wims[128]; int num_wims = 0; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; int ret; - /* Count the number of streams to be written. */ - list_for_each_entry(lte, stream_list, write_streams_list) - num_streams++; + /* Count the number of blobs to be written. */ + list_for_each_entry(blob, blob_list, write_blobs_list) + num_blobs++; - /* Allocate a temporary hash table for mapping stream hash => stream */ - lookup_table.capacity = num_streams; - lookup_table.table = CALLOC(lookup_table.capacity, - sizeof(lookup_table.table[0])); - if (!lookup_table.table) + /* Allocate a temporary hash table for mapping blob hash => blob */ + blob_table.capacity = num_blobs; + blob_table.table = CALLOC(blob_table.capacity, + sizeof(blob_table.table[0])); + if (!blob_table.table) return WIMLIB_ERR_NOMEM; /* - * For each stream to be written: + * For each blob to be written: * - Reset the sort name * - If it's in non-solid WIM resource, then save the WIMStruct. * - If it's in a file on disk, then set its sort name from that. */ - list_for_each_entry(lte, stream_list, write_streams_list) { - lte->solid_sort_name = NULL; - lte->solid_sort_name_nbytes = 0; - switch (lte->resource_location) { - case RESOURCE_IN_WIM: - if (lte->size != lte->rspec->uncompressed_size) + list_for_each_entry(blob, blob_list, write_blobs_list) { + blob->solid_sort_name = NULL; + blob->solid_sort_name_nbytes = 0; + switch (blob->blob_location) { + case BLOB_IN_WIM: + if (blob->size != blob->rdesc->uncompressed_size) continue; for (int i = 0; i < num_wims; i++) - if (lte->rspec->wim == wims[i]) + if (blob->rdesc->wim == wims[i]) goto found_wim; if (num_wims >= ARRAY_LEN(wims)) continue; - wims[num_wims++] = lte->rspec->wim; + wims[num_wims++] = blob->rdesc->wim; found_wim: - hlist_add_head(<e->hash_list_2, - &lookup_table.table[load_size_t_unaligned(lte->hash) % - lookup_table.capacity]); + hlist_add_head(&blob->hash_list_2, + &blob_table.table[load_size_t_unaligned(blob->hash) % + blob_table.capacity]); break; - case RESOURCE_IN_FILE_ON_DISK: + case BLOB_IN_FILE_ON_DISK: #ifdef __WIN32__ - case RESOURCE_IN_WINNT_FILE_ON_DISK: + case BLOB_IN_WINNT_FILE_ON_DISK: #endif - lte_set_solid_sort_name_from_inode(lte, lte->file_inode); + blob_set_solid_sort_name_from_inode(blob, blob->file_inode); break; default: break; @@ -218,12 +216,12 @@ sort_stream_list_for_solid_compression(struct list_head *stream_list) } /* For each WIMStruct that was found, search for dentry references to - * each stream and fill in the sort name this way. This is useful e.g. + * each blob and fill in the sort name this way. This is useful e.g. * when exporting a solid WIM file from a non-solid WIM file. */ for (int i = 0; i < num_wims; i++) { if (!wim_has_metadata(wims[i])) continue; - wims[i]->private = &lookup_table; + wims[i]->private = &blob_table; ret = for_image(wims[i], WIMLIB_ALL_IMAGES, image_fill_in_solid_sort_names); if (ret) @@ -231,14 +229,13 @@ sort_stream_list_for_solid_compression(struct list_head *stream_list) deselect_current_wim_image(wims[i]); } - ret = sort_stream_list(stream_list, - offsetof(struct wim_lookup_table_entry, - write_streams_list), - cmp_streams_by_solid_sort_name); + ret = sort_blob_list(blob_list, + offsetof(struct blob_descriptor, write_blobs_list), + cmp_blobs_by_solid_sort_name); out: - list_for_each_entry(lte, stream_list, write_streams_list) - FREE(lte->solid_sort_name); - FREE(lookup_table.table); + list_for_each_entry(blob, blob_list, write_blobs_list) + FREE(blob->solid_sort_name); + FREE(blob_table.table); return ret; } diff --git a/src/split.c b/src/split.c index 51c4a910..43d26db2 100644 --- a/src/split.c +++ b/src/split.c @@ -27,9 +27,9 @@ #include "wimlib.h" #include "wimlib/alloca.h" +#include "wimlib/blob_table.h" #include "wimlib/error.h" #include "wimlib/list.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/progress.h" #include "wimlib/resource.h" @@ -37,14 +37,14 @@ #include "wimlib/write.h" struct swm_part_info { - struct list_head stream_list; + struct list_head blob_list; u64 size; }; static void copy_part_info(struct swm_part_info *dst, struct swm_part_info *src) { - list_transfer(&src->stream_list, &dst->stream_list); + list_transfer(&src->blob_list, &dst->blob_list); dst->size = src->size; } @@ -126,7 +126,7 @@ write_split_wim(WIMStruct *orig_wim, const tchar *swm_name, 1, part_number, swm_info->num_parts, - &swm_info->parts[part_number - 1].stream_list, + &swm_info->parts[part_number - 1].blob_list, guid); orig_wim->progfunc = progfunc; if (ret) @@ -145,30 +145,30 @@ write_split_wim(WIMStruct *orig_wim, const tchar *swm_name, } static int -add_stream_to_swm(struct wim_lookup_table_entry *lte, void *_swm_info) +add_blob_to_swm(struct blob_descriptor *blob, void *_swm_info) { struct swm_info *swm_info = _swm_info; - u64 stream_size; + u64 blob_stored_size; - if (lte_is_partial(lte)) { + if (blob_is_in_solid_wim_resource(blob)) { ERROR("Splitting of WIM containing solid resources is not supported.\n" " Export it in non-solid format first."); return WIMLIB_ERR_UNSUPPORTED; } - if (lte->resource_location == RESOURCE_IN_WIM) - stream_size = lte->rspec->size_in_wim; + if (blob->blob_location == BLOB_IN_WIM) + blob_stored_size = blob->rdesc->size_in_wim; else - stream_size = lte->size; + blob_stored_size = blob->size; /* - Start first part if no parts have been started so far; - * - Start next part if adding this stream exceeds maximum part size, - * UNLESS the stream is metadata or if no streams at all have been - * added to the current part. + * - Start next part if adding this blob exceeds maximum part size, + * UNLESS the blob is metadata or if no blobs at all have been added + * to the current part. */ if (swm_info->num_parts == 0 || ((swm_info->parts[swm_info->num_parts - 1].size + - stream_size >= swm_info->max_part_size) - && !((lte->flags & WIM_RESHDR_FLAG_METADATA) || + blob_stored_size >= swm_info->max_part_size) + && !((blob->flags & WIM_RESHDR_FLAG_METADATA) || swm_info->parts[swm_info->num_parts - 1].size == 0))) { if (swm_info->num_parts == swm_info->num_alloc_parts) { @@ -188,15 +188,15 @@ add_stream_to_swm(struct wim_lookup_table_entry *lte, void *_swm_info) swm_info->num_alloc_parts = num_alloc_parts; } swm_info->num_parts++; - INIT_LIST_HEAD(&swm_info->parts[swm_info->num_parts - 1].stream_list); + INIT_LIST_HEAD(&swm_info->parts[swm_info->num_parts - 1].blob_list); swm_info->parts[swm_info->num_parts - 1].size = 0; } - swm_info->parts[swm_info->num_parts - 1].size += stream_size; - if (!(lte->flags & WIM_RESHDR_FLAG_METADATA)) { - list_add_tail(<e->write_streams_list, - &swm_info->parts[swm_info->num_parts - 1].stream_list); + swm_info->parts[swm_info->num_parts - 1].size += blob_stored_size; + if (!(blob->flags & WIM_RESHDR_FLAG_METADATA)) { + list_add_tail(&blob->write_blobs_list, + &swm_info->parts[swm_info->num_parts - 1].blob_list); } - swm_info->total_bytes += stream_size; + swm_info->total_bytes += blob_stored_size; return 0; } @@ -222,15 +222,15 @@ wimlib_split(WIMStruct *wim, const tchar *swm_name, swm_info.max_part_size = part_size; for (i = 0; i < wim->hdr.image_count; i++) { - ret = add_stream_to_swm(wim->image_metadata[i]->metadata_lte, - &swm_info); + ret = add_blob_to_swm(wim->image_metadata[i]->metadata_blob, + &swm_info); if (ret) goto out_free_swm_info; } - ret = for_lookup_table_entry_pos_sorted(wim->lookup_table, - add_stream_to_swm, - &swm_info); + ret = for_blob_in_table_sorted_by_sequential_order(wim->blob_table, + add_blob_to_swm, + &swm_info); if (ret) goto out_free_swm_info; diff --git a/src/template.c b/src/template.c index 1c0e205f..57eaca71 100644 --- a/src/template.c +++ b/src/template.c @@ -5,7 +5,7 @@ */ /* - * Copyright (C) 2013 Eric Biggers + * Copyright (C) 2013, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -26,10 +26,10 @@ #endif #include "wimlib.h" +#include "wimlib/blob_table.h" #include "wimlib/assert.h" #include "wimlib/dentry.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/util.h" @@ -38,7 +38,7 @@ static bool inode_metadata_consistent(const struct wim_inode *inode, const struct wim_inode *template_inode, - const struct wim_lookup_table *template_lookup_table) + const struct blob_table *template_blob_table) { /* Must have exact same creation time and last write time. */ if (inode->i_creation_time != template_inode->i_creation_time || @@ -50,37 +50,36 @@ inode_metadata_consistent(const struct wim_inode *inode, if (inode->i_last_access_time < template_inode->i_last_access_time) return false; - /* Must have same number of alternate data stream entries. */ - if (inode->i_num_ads != template_inode->i_num_ads) + /* Must have same number of streams. */ + if (inode->i_num_streams != template_inode->i_num_streams) return false; - /* If the stream entries for the inode are for some reason not resolved, - * then the hashes are already available and the point of this function - * is defeated. */ - if (!inode->i_resolved) - return false; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct blob_descriptor *blob, *template_blob; - /* Iterate through each stream and do some more checks. */ - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - const struct wim_lookup_table_entry *lte, *template_lte; + /* If the streams for the inode are for some reason not + * resolved, then the hashes are already available and the point + * of this function is defeated. */ + if (!inode->i_streams[i].stream_resolved) + return false; - lte = inode_stream_lte_resolved(inode, i); - template_lte = inode_stream_lte(template_inode, i, - template_lookup_table); + blob = stream_blob_resolved(&inode->i_streams[i]); + template_blob = stream_blob(&template_inode->i_streams[i], + template_blob_table); - /* Compare stream sizes. */ - if (lte && template_lte) { - if (lte->size != template_lte->size) + /* Compare blob sizes. */ + if (blob && template_blob) { + if (blob->size != template_blob->size) return false; /* If hash happens to be available, compare with template. */ - if (!lte->unhashed && !template_lte->unhashed && - !hashes_equal(lte->hash, template_lte->hash)) + if (!blob->unhashed && !template_blob->unhashed && + !hashes_equal(blob->hash, template_blob->hash)) return false; - } else if (lte && lte->size) { + } else if (blob && blob->size) { return false; - } else if (template_lte && template_lte->size) { + } else if (template_blob && template_blob->size) { return false; } } @@ -94,11 +93,11 @@ inode_metadata_consistent(const struct wim_inode *inode, /** * Given an inode @inode that has been determined to be "the same" as another * inode @template_inode in either the same WIM or another WIM, retrieve some - * useful stream information (e.g. checksums) from @template_inode. + * useful information (e.g. checksums) from @template_inode. * * This assumes that the streams for @inode have been resolved (to point - * directly to the appropriate `struct wim_lookup_table_entry's) but do not - * necessarily have checksum information filled in. + * directly to the appropriate `struct blob_descriptor's) but do not necessarily + * have checksum information filled in. */ static int inode_copy_checksums(struct wim_inode *inode, @@ -106,53 +105,49 @@ inode_copy_checksums(struct wim_inode *inode, WIMStruct *wim, WIMStruct *template_wim) { - for (unsigned i = 0; i <= inode->i_num_ads; i++) { - struct wim_lookup_table_entry *lte, *template_lte; - struct wim_lookup_table_entry *replace_lte; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct blob_descriptor *blob, *template_blob; + struct blob_descriptor *replace_blob; - lte = inode_stream_lte_resolved(inode, i); - template_lte = inode_stream_lte(template_inode, i, - template_wim->lookup_table); + blob = stream_blob_resolved(&inode->i_streams[i]); + template_blob = stream_blob(&template_inode->i_streams[i], + template_wim->blob_table); /* Only take action if both entries exist, the entry for @inode * has no checksum calculated, but the entry for @template_inode * does. */ - if (lte == NULL || template_lte == NULL || - !lte->unhashed || template_lte->unhashed) + if (blob == NULL || template_blob == NULL || + !blob->unhashed || template_blob->unhashed) continue; - wimlib_assert(lte->refcnt == inode->i_nlink); + wimlib_assert(blob->refcnt == inode->i_nlink); /* If the WIM of the template image is the same as the WIM of - * the new image, then @template_lte can be used directly. + * the new image, then @template_blob can be used directly. * - * Otherwise, look for a stream with the same hash in the WIM of - * the new image. If found, use it; otherwise re-use the entry - * being discarded, filling in the hash. */ + * Otherwise, look for a blob with the same hash in the WIM of + * the new image. If found, use it; otherwise re-use the + * blob descriptor being discarded, filling in the hash. */ if (wim == template_wim) - replace_lte = template_lte; + replace_blob = template_blob; else - replace_lte = lookup_stream(wim->lookup_table, - template_lte->hash); + replace_blob = lookup_blob(wim->blob_table, + template_blob->hash); - list_del(<e->unhashed_list); - if (replace_lte) { - free_lookup_table_entry(lte); + list_del(&blob->unhashed_list); + if (replace_blob) { + free_blob_descriptor(blob); } else { - copy_hash(lte->hash, template_lte->hash); - lte->unhashed = 0; - lookup_table_insert(wim->lookup_table, lte); - lte->refcnt = 0; - replace_lte = lte; + copy_hash(blob->hash, template_blob->hash); + blob->unhashed = 0; + blob_table_insert(wim->blob_table, blob); + blob->refcnt = 0; + replace_blob = blob; } - if (i == 0) - inode->i_lte = replace_lte; - else - inode->i_ads_entries[i - 1].lte = replace_lte; - - replace_lte->refcnt += inode->i_nlink; + stream_set_blob(&inode->i_streams[i], replace_blob); + replace_blob->refcnt += inode->i_nlink; } return 0; } @@ -190,7 +185,7 @@ dentry_reference_template(struct wim_dentry *dentry, void *_args) template_inode = template_dentry->d_inode; if (inode_metadata_consistent(inode, template_inode, - template_wim->lookup_table)) { + template_wim->blob_table)) { /*DEBUG("\"%"TS"\": No change detected", dentry->_full_path);*/ ret = inode_copy_checksums(inode, template_inode, wim, template_wim); diff --git a/src/unix_apply.c b/src/unix_apply.c index afa4b723..a0b62519 100644 --- a/src/unix_apply.c +++ b/src/unix_apply.c @@ -33,10 +33,10 @@ #include "wimlib/apply.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/error.h" #include "wimlib/file_io.h" -#include "wimlib/lookup_table.h" #include "wimlib/reparse.h" #include "wimlib/timestamp.h" #include "wimlib/unix_data.h" @@ -75,13 +75,13 @@ struct unix_apply_ctx { unsigned which_pathbuf; /* Currently open file descriptors for extraction */ - struct filedes open_fds[MAX_OPEN_STREAMS]; + struct filedes open_fds[MAX_OPEN_FILES]; /* Number of currently open file descriptors in open_fds, starting from * the beginning of the array. */ unsigned num_open_fds; - /* Buffer for reading reparse data streams into memory */ + /* Buffer for reading reparse point data into memory */ u8 reparse_data[REPARSE_DATA_MAX_SIZE]; /* Pointer to the next byte in @reparse_data to fill */ @@ -394,7 +394,7 @@ unix_extract_if_empty_file(const struct wim_dentry *dentry, /* Is this a directory, a symbolic link, or any type of nonempty file? */ if (inode_is_directory(inode) || inode_is_symlink(inode) || - inode_unnamed_lte_resolved(inode)) + inode_get_blob_for_unnamed_data_stream_resolved(inode)) return 0; /* Recognize special files in UNIX_DATA mode */ @@ -485,7 +485,8 @@ unix_count_dentries(const struct list_head *dentry_list, if (inode_is_directory(inode)) dir_count++; else if ((dentry == inode_first_extraction_dentry(inode)) && - !inode_unnamed_lte_resolved(inode)) + !inode_is_symlink(inode) && + !inode_get_blob_for_unnamed_data_stream_resolved(inode)) empty_file_count++; } @@ -500,14 +501,14 @@ unix_create_symlink(const struct wim_inode *inode, const char *path, { char link_target[REPARSE_DATA_MAX_SIZE]; int ret; - struct wim_lookup_table_entry lte_override; + struct blob_descriptor blob_override; - lte_override.resource_location = RESOURCE_IN_ATTACHED_BUFFER; - lte_override.attached_buffer = (void *)rpdata; - lte_override.size = rpdatalen; + blob_override.blob_location = BLOB_IN_ATTACHED_BUFFER; + blob_override.attached_buffer = (void *)rpdata; + blob_override.size = rpdatalen; ret = wim_inode_readlink(inode, link_target, - sizeof(link_target) - 1, <e_override); + sizeof(link_target) - 1, &blob_override); if (ret < 0) { errno = -ret; return WIMLIB_ERR_READLINK; @@ -546,30 +547,35 @@ unix_cleanup_open_fds(struct unix_apply_ctx *ctx, unsigned offset) } static int -unix_begin_extract_stream_instance(const struct wim_lookup_table_entry *stream, - const struct wim_inode *inode, - struct unix_apply_ctx *ctx) +unix_begin_extract_blob_instance(const struct blob_descriptor *blob, + const struct wim_inode *inode, + const struct wim_inode_stream *strm, + struct unix_apply_ctx *ctx) { const struct wim_dentry *first_dentry; const char *first_path; int fd; - if (inode_is_symlink(inode)) { + if (unlikely(strm->stream_type == STREAM_TYPE_REPARSE_POINT)) { /* On UNIX, symbolic links must be created with symlink(), which * requires that the full link target be available. */ - if (stream->size > REPARSE_DATA_MAX_SIZE) { + if (blob->size > REPARSE_DATA_MAX_SIZE) { ERROR_WITH_ERRNO("Reparse data of \"%s\" has size " "%"PRIu64" bytes (exceeds %u bytes)", inode_first_full_path(inode), - stream->size, REPARSE_DATA_MAX_SIZE); + blob->size, REPARSE_DATA_MAX_SIZE); return WIMLIB_ERR_INVALID_REPARSE_DATA; } ctx->reparse_ptr = ctx->reparse_data; return 0; } - /* This should be ensured by extract_stream_list() */ - wimlib_assert(ctx->num_open_fds < MAX_OPEN_STREAMS); + wimlib_assert(stream_is_unnamed_data_stream(strm)); + + /* Unnamed data stream of "regular" file */ + + /* This should be ensured by extract_blob_list() */ + wimlib_assert(ctx->num_open_fds < MAX_OPEN_FILES); first_dentry = inode_first_extraction_dentry(inode); first_path = unix_build_extraction_path(first_dentry, ctx); @@ -585,18 +591,18 @@ retry_create: return unix_create_hardlinks(inode, first_dentry, first_path, ctx); } -/* Called when starting to read a single-instance stream for extraction */ +/* Called when starting to read a blob for extraction */ static int -unix_begin_extract_stream(struct wim_lookup_table_entry *stream, void *_ctx) +unix_begin_extract_blob(struct blob_descriptor *blob, void *_ctx) { struct unix_apply_ctx *ctx = _ctx; - const struct stream_owner *owners = stream_owners(stream); - int ret; - - for (u32 i = 0; i < stream->out_refcnt; i++) { - const struct wim_inode *inode = owners[i].inode; + const struct blob_extraction_target *targets = blob_extraction_targets(blob); - ret = unix_begin_extract_stream_instance(stream, inode, ctx); + for (u32 i = 0; i < blob->out_refcnt; i++) { + int ret = unix_begin_extract_blob_instance(blob, + targets[i].inode, + targets[i].stream, + ctx); if (ret) { ctx->reparse_ptr = NULL; unix_cleanup_open_fds(ctx, 0); @@ -606,8 +612,7 @@ unix_begin_extract_stream(struct wim_lookup_table_entry *stream, void *_ctx) return 0; } -/* Called when the next chunk of a single-instance stream has been read for - * extraction */ +/* Called when the next chunk of a blob has been read for extraction */ static int unix_extract_chunk(const void *chunk, size_t size, void *_ctx) { @@ -626,15 +631,14 @@ unix_extract_chunk(const void *chunk, size_t size, void *_ctx) return 0; } -/* Called when a single-instance stream has been fully read for extraction */ +/* Called when a blob has been fully read for extraction */ static int -unix_end_extract_stream(struct wim_lookup_table_entry *stream, int status, - void *_ctx) +unix_end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx) { struct unix_apply_ctx *ctx = _ctx; int ret; unsigned j; - const struct stream_owner *owners = stream_owners(stream); + const struct blob_extraction_target *targets = blob_extraction_targets(blob); ctx->reparse_ptr = NULL; @@ -645,8 +649,8 @@ unix_end_extract_stream(struct wim_lookup_table_entry *stream, int status, j = 0; ret = 0; - for (u32 i = 0; i < stream->out_refcnt; i++) { - struct wim_inode *inode = owners[i].inode; + for (u32 i = 0; i < blob->out_refcnt; i++) { + struct wim_inode *inode = targets[i].inode; if (inode_is_symlink(inode)) { /* We finally have the symlink data, so we can create @@ -661,7 +665,7 @@ unix_end_extract_stream(struct wim_lookup_table_entry *stream, int status, path = unix_build_inode_extraction_path(inode, ctx); ret = unix_create_symlink(inode, path, ctx->reparse_data, - stream->size, + blob->size, rpfix, ctx->target_abspath, ctx->target_abspath_nchars); @@ -742,7 +746,7 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx) /* Extract directories and empty regular files. Directories are needed * because we can't extract any other files until their directories * exist. Empty files are needed because they don't have - * representatives in the stream list. */ + * representatives in the blob list. */ unix_count_dentries(dentry_list, &dir_count, &empty_file_count); @@ -772,15 +776,15 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx) /* Extract nonempty regular files and symbolic links. */ - struct read_stream_list_callbacks cbs = { - .begin_stream = unix_begin_extract_stream, - .begin_stream_ctx = ctx, + struct read_blob_list_callbacks cbs = { + .begin_blob = unix_begin_extract_blob, + .begin_blob_ctx = ctx, .consume_chunk = unix_extract_chunk, .consume_chunk_ctx = ctx, - .end_stream = unix_end_extract_stream, - .end_stream_ctx = ctx, + .end_blob = unix_end_extract_blob, + .end_blob_ctx = ctx, }; - ret = extract_stream_list(&ctx->common, &cbs); + ret = extract_blob_list(&ctx->common, &cbs); if (ret) goto out; diff --git a/src/unix_capture.c b/src/unix_capture.c index 488996aa..a61d91f2 100644 --- a/src/unix_capture.c +++ b/src/unix_capture.c @@ -32,10 +32,10 @@ #include #include +#include "wimlib/blob_table.h" #include "wimlib/capture.h" #include "wimlib/dentry.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/reparse.h" #include "wimlib/timestamp.h" #include "wimlib/unix_data.h" @@ -101,31 +101,36 @@ my_fdopendir(int *dirfd_p) static int unix_scan_regular_file(const char *path, u64 size, struct wim_inode *inode, - struct list_head *unhashed_streams) + struct list_head *unhashed_blobs) { - struct wim_lookup_table_entry *lte; - char *file_on_disk; + struct blob_descriptor *blob; + struct wim_inode_stream *strm; inode->i_attributes = FILE_ATTRIBUTE_NORMAL; - /* Empty files do not have to have a lookup table entry. */ - if (!size) - return 0; + if (size) { + char *file_on_disk = STRDUP(path); + if (!file_on_disk) + return WIMLIB_ERR_NOMEM; + blob = new_blob_descriptor(); + if (!blob) { + FREE(file_on_disk); + return WIMLIB_ERR_NOMEM; + } + blob->file_on_disk = file_on_disk; + blob->file_inode = inode; + blob->blob_location = BLOB_IN_FILE_ON_DISK; + blob->size = size; + } else { + blob = NULL; + } - file_on_disk = STRDUP(path); - if (!file_on_disk) - return WIMLIB_ERR_NOMEM; - lte = new_lookup_table_entry(); - if (!lte) { - FREE(file_on_disk); + strm = inode_add_stream(inode, STREAM_TYPE_DATA, NO_STREAM_NAME, blob); + if (!strm) { + free_blob_descriptor(blob); return WIMLIB_ERR_NOMEM; } - lte->file_on_disk = file_on_disk; - lte->file_inode = inode; - lte->resource_location = RESOURCE_IN_FILE_ON_DISK; - lte->size = size; - add_unhashed_stream(lte, inode, 0, unhashed_streams); - inode->i_lte = lte; + prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs); return 0; } @@ -307,7 +312,7 @@ unix_scan_symlink(const char *full_path, int dirfd, const char *relpath, if (ret) return ret; } - ret = wim_inode_set_symlink(inode, dest, params->lookup_table); + ret = wim_inode_set_symlink(inode, dest, params->blob_table); if (ret) return ret; @@ -396,7 +401,6 @@ unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret, inode->i_last_write_time = time_t_to_wim_timestamp(stbuf.st_mtime); inode->i_last_access_time = time_t_to_wim_timestamp(stbuf.st_atime); #endif - inode->i_resolved = 1; if (params->add_flags & WIMLIB_ADD_FLAG_UNIX_DATA) { struct wimlib_unix_data unix_data; @@ -418,7 +422,7 @@ unix_build_dentry_tree_recursive(struct wim_dentry **tree_ret, if (S_ISREG(stbuf.st_mode)) { ret = unix_scan_regular_file(full_path, stbuf.st_size, - inode, params->unhashed_streams); + inode, params->unhashed_blobs); } else if (S_ISDIR(stbuf.st_mode)) { ret = unix_scan_directory(tree, full_path, full_path_len, dirfd, relpath, params); @@ -438,7 +442,7 @@ out_progress: ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL); out: if (unlikely(ret)) { - free_dentry_tree(tree, params->lookup_table); + free_dentry_tree(tree, params->blob_table); tree = NULL; ret = report_capture_error(params, ret, full_path); } diff --git a/src/update_image.c b/src/update_image.c index b973c21d..b2eceecf 100644 --- a/src/update_image.c +++ b/src/update_image.c @@ -55,12 +55,12 @@ #include "wimlib/alloca.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/capture.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #ifdef WITH_NTFS_3G # include "wimlib/ntfs_3g.h" /* for do_ntfs_umount() */ @@ -134,8 +134,8 @@ struct update_command_journal { /* Location of the WIM image's root pointer. */ struct wim_dentry **root_p; - /* Pointer to the lookup table of the WIM (may needed for rollback) */ - struct wim_lookup_table *lookup_table; + /* Pointer to the blob table of the WIM (may needed for rollback) */ + struct blob_table *blob_table; /* List of dentries that are currently unlinked from the WIM image. * These must be freed when no longer needed for commit or rollback. */ @@ -157,7 +157,7 @@ init_update_primitive_list(struct update_primitive_list *l) * commands. */ static struct update_command_journal * new_update_command_journal(size_t num_cmds, struct wim_dentry **root_p, - struct wim_lookup_table *lookup_table) + struct blob_table *blob_table) { struct update_command_journal *j; @@ -166,7 +166,7 @@ new_update_command_journal(size_t num_cmds, struct wim_dentry **root_p, j->num_cmds = num_cmds; j->cur_cmd = 0; j->root_p = root_p; - j->lookup_table = lookup_table; + j->blob_table = blob_table; INIT_LIST_HEAD(&j->orphans); for (size_t i = 0; i < num_cmds; i++) init_update_primitive_list(&j->cmd_prims[i]); @@ -186,7 +186,7 @@ free_update_command_journal(struct update_command_journal *j) orphan = list_first_entry(&j->orphans, struct wim_dentry, tmp_list); list_del(&orphan->tmp_list); - free_dentry_tree(orphan, j->lookup_table); + free_dentry_tree(orphan, j->blob_table); } for (size_t i = 0; i < j->num_cmds; i++) @@ -295,7 +295,7 @@ rollback_name_change(utf16lechar *old_name, FREE(*name_ptr); if (old_name) { *name_ptr = old_name; - *name_nbytes_ptr = utf16le_strlen(old_name); + *name_nbytes_ptr = utf16le_len_bytes(old_name); } else { *name_ptr = NULL; *name_nbytes_ptr = 0; @@ -532,7 +532,7 @@ handle_conflict(struct wim_dentry *branch, struct wim_dentry *existing, return ret; } } - free_dentry_tree(branch, j->lookup_table); + free_dentry_tree(branch, j->blob_table); return 0; } else if (add_flags & WIMLIB_ADD_FLAG_NO_REPLACE) { /* Can't replace nondirectory file */ @@ -705,7 +705,7 @@ attach_branch(struct wim_dentry *branch, const tchar *target_tstr, out_free_target: tstr_put_utf16le(target); out_free_branch: - free_dentry_tree(branch, j->lookup_table); + free_dentry_tree(branch, j->blob_table); out: return ret; } @@ -782,7 +782,7 @@ execute_add_command(struct update_command_journal *j, const struct wimlib_update_command *add_cmd, struct wim_inode_table *inode_table, struct wim_sd_set *sd_set, - struct list_head *unhashed_streams) + struct list_head *unhashed_blobs) { int ret; int add_flags; @@ -825,8 +825,8 @@ execute_add_command(struct update_command_journal *j, if (ret) goto out; - params.lookup_table = wim->lookup_table; - params.unhashed_streams = unhashed_streams; + params.blob_table = wim->blob_table; + params.unhashed_blobs = unhashed_blobs; params.inode_table = inode_table; params.sd_set = sd_set; params.config = &config; @@ -851,7 +851,7 @@ execute_add_command(struct update_command_journal *j, ret = call_progress(params.progfunc, WIMLIB_PROGRESS_MSG_SCAN_END, ¶ms.progress, params.progctx); if (ret) { - free_dentry_tree(branch, wim->lookup_table); + free_dentry_tree(branch, wim->blob_table); goto out_cleanup_after_capture; } @@ -860,7 +860,7 @@ execute_add_command(struct update_command_journal *j, { ERROR("\"%"TS"\" is not a directory!", fs_source_path); ret = WIMLIB_ERR_NOTDIR; - free_dentry_tree(branch, wim->lookup_table); + free_dentry_tree(branch, wim->blob_table); goto out_cleanup_after_capture; } @@ -1036,7 +1036,7 @@ rename_wim_path(WIMStruct *wim, const tchar *from, const tchar *to, return -ENOMEM; if (dst) { unlink_dentry(dst); - free_dentry_tree(dst, wim->lookup_table); + free_dentry_tree(dst, wim->blob_table); } unlink_dentry(src); dentry_add_child(parent_of_dst, src); @@ -1122,7 +1122,7 @@ execute_update_commands(WIMStruct *wim, { struct wim_inode_table *inode_table; struct wim_sd_set *sd_set; - struct list_head unhashed_streams; + struct list_head unhashed_blobs; struct update_command_journal *j; union wimlib_progress_info info; int ret; @@ -1142,7 +1142,7 @@ execute_update_commands(WIMStruct *wim, if (ret) goto out_destroy_inode_table; - INIT_LIST_HEAD(&unhashed_streams); + INIT_LIST_HEAD(&unhashed_blobs); } else { inode_table = NULL; sd_set = NULL; @@ -1152,7 +1152,7 @@ execute_update_commands(WIMStruct *wim, */ j = new_update_command_journal(num_cmds, &wim_get_current_image_metadata(wim)->root_dentry, - wim->lookup_table); + wim->blob_table); if (!j) { ret = WIMLIB_ERR_NOMEM; goto out_destroy_sd_set; @@ -1176,7 +1176,7 @@ execute_update_commands(WIMStruct *wim, switch (cmds[i].op) { case WIMLIB_UPDATE_OP_ADD: ret = execute_add_command(j, wim, &cmds[i], inode_table, - sd_set, &unhashed_streams); + sd_set, &unhashed_blobs); break; case WIMLIB_UPDATE_OP_DELETE: ret = execute_delete_command(j, wim, &cmds[i]); @@ -1204,7 +1204,7 @@ execute_update_commands(WIMStruct *wim, imd = wim_get_current_image_metadata(wim); - list_splice_tail(&unhashed_streams, &imd->unhashed_streams); + list_splice_tail(&unhashed_blobs, &imd->unhashed_blobs); inode_table_prepare_inode_list(inode_table, &imd->inode_list); } goto out_destroy_sd_set; diff --git a/src/util.c b/src/util.c index 11116672..24228096 100644 --- a/src/util.c +++ b/src/util.c @@ -170,15 +170,6 @@ void *mempcpy(void *dst, const void *src, size_t n) } #endif -size_t -utf16le_strlen(const utf16lechar *s) -{ - const utf16lechar *p = s; - while (*p) - p++; - return (p - s) * sizeof(utf16lechar); -} - static bool seeded = false; static void diff --git a/src/verify.c b/src/verify.c index c06e7029..81aa96d0 100644 --- a/src/verify.c +++ b/src/verify.c @@ -25,21 +25,21 @@ # include "config.h" #endif +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/progress.h" #include "wimlib/security.h" static int -append_lte_to_list(struct wim_lookup_table_entry *lte, void *_list) +append_blob_to_list(struct blob_descriptor *blob, void *_list) { - list_add(<e->extraction_list, (struct list_head *)_list); + list_add(&blob->extraction_list, (struct list_head *)_list); return 0; } -struct verify_stream_list_ctx { +struct verify_blob_list_ctx { wimlib_progress_func_t progfunc; void *progctx; union wimlib_progress_info *progress; @@ -47,16 +47,16 @@ struct verify_stream_list_ctx { }; static int -end_verify_stream(struct wim_lookup_table_entry *lte, int status, void *_ctx) +end_verify_blob(struct blob_descriptor *blob, int status, void *_ctx) { - struct verify_stream_list_ctx *ctx = _ctx; + struct verify_blob_list_ctx *ctx = _ctx; union wimlib_progress_info *progress = ctx->progress; if (status) return status; progress->verify_streams.completed_streams++; - progress->verify_streams.completed_bytes += lte->size; + progress->verify_streams.completed_bytes += blob->size; /* Handle rate-limiting of progress messages */ @@ -93,14 +93,14 @@ end_verify_stream(struct wim_lookup_table_entry *lte, int status, void *_ctx) } static int -verify_image_streams_present(struct wim_image_metadata *imd, - struct wim_lookup_table *lookup_table) +verify_image_blobs_present(struct wim_image_metadata *imd, + struct blob_table *blob_table) { struct wim_inode *inode; int ret; image_for_each_inode(inode, imd) { - ret = inode_resolve_streams(inode, lookup_table, false); + ret = inode_resolve_streams(inode, blob_table, false); if (ret) return ret; } @@ -112,13 +112,13 @@ WIMLIBAPI int wimlib_verify_wim(WIMStruct *wim, int verify_flags) { int ret; - LIST_HEAD(stream_list); + LIST_HEAD(blob_list); union wimlib_progress_info progress; - struct verify_stream_list_ctx ctx; - struct wim_lookup_table_entry *lte; - struct read_stream_list_callbacks cbs = { - .end_stream = end_verify_stream, - .end_stream_ctx = &ctx, + struct verify_blob_list_ctx ctx; + struct blob_descriptor *blob; + struct read_blob_list_callbacks cbs = { + .end_blob = end_verify_blob, + .end_blob_ctx = &ctx, }; /* Check parameters */ @@ -150,8 +150,8 @@ wimlib_verify_wim(WIMStruct *wim, int verify_flags) if (ret) return ret; - ret = verify_image_streams_present(wim_get_current_image_metadata(wim), - wim->lookup_table); + ret = verify_image_blobs_present(wim_get_current_image_metadata(wim), + wim->blob_table); if (ret) return ret; @@ -165,16 +165,16 @@ wimlib_verify_wim(WIMStruct *wim, int verify_flags) wim->filename); } - /* Verify the streams */ + /* Verify the blobs: SHA-1 message digests must match */ - for_lookup_table_entry(wim->lookup_table, append_lte_to_list, &stream_list); + for_blob_in_table(wim->blob_table, append_blob_to_list, &blob_list); memset(&progress, 0, sizeof(progress)); progress.verify_streams.wimfile = wim->filename; - list_for_each_entry(lte, &stream_list, extraction_list) { + list_for_each_entry(blob, &blob_list, extraction_list) { progress.verify_streams.total_streams++; - progress.verify_streams.total_bytes += lte->size; + progress.verify_streams.total_bytes += blob->size; } ctx.progfunc = wim->progfunc; @@ -187,8 +187,7 @@ wimlib_verify_wim(WIMStruct *wim, int verify_flags) if (ret) return ret; - return read_stream_list(&stream_list, - offsetof(struct wim_lookup_table_entry, - extraction_list), - &cbs, VERIFY_STREAM_HASHES); + return read_blob_list(&blob_list, + offsetof(struct blob_descriptor, extraction_list), + &cbs, VERIFY_BLOB_HASHES); } diff --git a/src/wim.c b/src/wim.c index 4fc02efb..5feb7a44 100644 --- a/src/wim.c +++ b/src/wim.c @@ -33,12 +33,12 @@ #include "wimlib.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/bitops.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/file_io.h" #include "wimlib/integrity.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #ifdef WITH_NTFS_3G # include "wimlib/ntfs_3g.h" /* for do_ntfs_umount() */ @@ -182,7 +182,6 @@ WIMLIBAPI int wimlib_create_new_wim(int ctype, WIMStruct **wim_ret) { WIMStruct *wim; - struct wim_lookup_table *table; int ret; ret = wimlib_global_init(WIMLIB_INIT_FLAG_ASSUME_UTF8); @@ -197,12 +196,11 @@ wimlib_create_new_wim(int ctype, WIMStruct **wim_ret) if (ret) goto out_free_wim; - table = new_lookup_table(9001); - if (!table) { + wim->blob_table = new_blob_table(9001); + if (!wim->blob_table) { ret = WIMLIB_ERR_NOMEM; goto out_free_wim; } - wim->lookup_table = table; wim->compression_type = ctype; wim->out_compression_type = ctype; wim->chunk_size = wim->hdr.chunk_size; @@ -217,24 +215,24 @@ out_free_wim: static void destroy_image_metadata(struct wim_image_metadata *imd, - struct wim_lookup_table *table, - bool free_metadata_lte) + struct blob_table *table, + bool free_metadata_blob_descriptor) { free_dentry_tree(imd->root_dentry, table); imd->root_dentry = NULL; free_wim_security_data(imd->security_data); imd->security_data = NULL; - if (free_metadata_lte) { - free_lookup_table_entry(imd->metadata_lte); - imd->metadata_lte = NULL; + if (free_metadata_blob_descriptor) { + free_blob_descriptor(imd->metadata_blob); + imd->metadata_blob = NULL; } if (!table) { - struct wim_lookup_table_entry *lte, *tmp; - list_for_each_entry_safe(lte, tmp, &imd->unhashed_streams, unhashed_list) - free_lookup_table_entry(lte); + struct blob_descriptor *blob, *tmp; + list_for_each_entry_safe(blob, tmp, &imd->unhashed_blobs, unhashed_list) + free_blob_descriptor(blob); } - INIT_LIST_HEAD(&imd->unhashed_streams); + INIT_LIST_HEAD(&imd->unhashed_blobs); INIT_LIST_HEAD(&imd->inode_list); #ifdef WITH_NTFS_3G if (imd->ntfs_vol) { @@ -245,8 +243,7 @@ destroy_image_metadata(struct wim_image_metadata *imd, } void -put_image_metadata(struct wim_image_metadata *imd, - struct wim_lookup_table *table) +put_image_metadata(struct wim_image_metadata *imd, struct blob_table *table) { if (imd && --imd->refcnt == 0) { destroy_image_metadata(imd, table, true); @@ -280,7 +277,7 @@ new_image_metadata(void) if (imd) { imd->refcnt = 1; INIT_LIST_HEAD(&imd->inode_list); - INIT_LIST_HEAD(&imd->unhashed_streams); + INIT_LIST_HEAD(&imd->unhashed_blobs); } return imd; } @@ -364,7 +361,7 @@ deselect_current_wim_image(WIMStruct *wim) return; imd = wim_get_current_image_metadata(wim); if (!imd->modified) { - wimlib_assert(list_empty(&imd->unhashed_streams)); + wimlib_assert(list_empty(&imd->unhashed_blobs)); destroy_image_metadata(imd, NULL, false); } wim->current_image = WIMLIB_NO_IMAGE; @@ -617,7 +614,7 @@ open_wim_file(const tchar *filename, struct filedes *fd_ret) /* * Begins the reading of a WIM file; opens the file and reads its header and - * lookup table, and optionally checks the integrity. + * blob table, and optionally checks the integrity. */ static int begin_read(WIMStruct *wim, const void *wim_filename_or_fd, int open_flags) @@ -733,8 +730,8 @@ begin_read(WIMStruct *wim, const void *wim_filename_or_fd, int open_flags) } if (open_flags & WIMLIB_OPEN_FLAG_FROM_PIPE) { - wim->lookup_table = new_lookup_table(9001); - if (!wim->lookup_table) + wim->blob_table = new_blob_table(9001); + if (!wim->blob_table) return WIMLIB_ERR_NOMEM; } else { @@ -750,7 +747,7 @@ begin_read(WIMStruct *wim, const void *wim_filename_or_fd, int open_flags) return WIMLIB_ERR_IMAGE_COUNT; } - ret = read_wim_lookup_table(wim); + ret = read_blob_table(wim); if (ret) return ret; } @@ -812,27 +809,26 @@ wimlib_open_wim(const tchar *wimfile, int open_flags, WIMStruct **wim_ret) NULL, NULL); } -/* Checksum all streams that are unhashed (other than the metadata streams), - * merging them into the lookup table as needed. This is a no-op unless the - * library has previously used to add or mount an image using the same - * WIMStruct. */ +/* Checksum all blobs that are unhashed (other than the metadata blobs), merging + * them into the blob table as needed. This is a no-op unless files have been + * added to an image in the same WIMStruct. */ int -wim_checksum_unhashed_streams(WIMStruct *wim) +wim_checksum_unhashed_blobs(WIMStruct *wim) { int ret; if (!wim_has_metadata(wim)) return 0; for (int i = 0; i < wim->hdr.image_count; i++) { - struct wim_lookup_table_entry *lte, *tmp; + struct blob_descriptor *blob, *tmp; struct wim_image_metadata *imd = wim->image_metadata[i]; - image_for_each_unhashed_stream_safe(lte, tmp, imd) { - struct wim_lookup_table_entry *new_lte; - ret = hash_unhashed_stream(lte, wim->lookup_table, &new_lte); + image_for_each_unhashed_blob_safe(blob, tmp, imd) { + struct blob_descriptor *new_blob; + ret = hash_unhashed_blob(blob, wim->blob_table, &new_blob); if (ret) return ret; - if (new_lte != lte) - free_lookup_table_entry(lte); + if (new_blob != blob) + free_blob_descriptor(blob); } } return 0; @@ -890,7 +886,7 @@ wimlib_free(WIMStruct *wim) if (filedes_valid(&wim->out_fd)) filedes_close(&wim->out_fd); - free_lookup_table(wim->lookup_table); + free_blob_table(wim->blob_table); wimlib_free_decompressor(wim->decompressor); diff --git a/src/wimboot.c b/src/wimboot.c index 9a61ba51..b06816e2 100644 --- a/src/wimboot.c +++ b/src/wimboot.c @@ -36,8 +36,8 @@ #include "wimlib/win32_common.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/util.h" #include "wimlib/wimboot.h" #include "wimlib/win32.h" @@ -1057,12 +1057,12 @@ out: * * @h * Open handle to the file, with GENERIC_WRITE access. - * @lte - * Unnamed data stream of the file. + * @blob + * The blob for the unnamed data stream of the file. * @data_source_id * Allocated identifier for the WIM data source on the destination volume. - * @lookup_table_hash - * SHA-1 message digest of the WIM's lookup table. + * @blob_table_hash + * SHA-1 message digest of the WIM's blob table. * @wof_running * %true if the WOF driver appears to be available and working; %false if * not. @@ -1071,9 +1071,9 @@ out: */ bool wimboot_set_pointer(HANDLE h, - const struct wim_lookup_table_entry *lte, + const struct blob_descriptor *blob, u64 data_source_id, - const u8 lookup_table_hash[SHA1_HASH_SIZE], + const u8 blob_table_hash[SHA1_HASH_SIZE], bool wof_running) { DWORD bytes_returned; @@ -1096,9 +1096,9 @@ wimboot_set_pointer(HANDLE h, in.wim_info.version = WIM_PROVIDER_CURRENT_VERSION; in.wim_info.flags = 0; in.wim_info.data_source_id = data_source_id; - copy_hash(in.wim_info.resource_hash, lte->hash); + copy_hash(in.wim_info.unnamed_data_stream_hash, blob->hash); - /* lookup_table_hash is not necessary */ + /* blob_table_hash is not necessary */ if (!DeviceIoControl(h, FSCTL_SET_EXTERNAL_BACKING, &in, sizeof(in), NULL, 0, @@ -1148,11 +1148,11 @@ wimboot_set_pointer(HANDLE h, in.wim_info.version = 2; in.wim_info.flags = 0; in.wim_info.data_source_id = data_source_id; - copy_hash(in.wim_info.resource_hash, lte->hash); - copy_hash(in.wim_info.wim_lookup_table_hash, lookup_table_hash); - in.wim_info.stream_uncompressed_size = lte->size; - in.wim_info.stream_compressed_size = lte->rspec->size_in_wim; - in.wim_info.stream_offset_in_wim = lte->rspec->offset_in_wim; + copy_hash(in.wim_info.unnamed_data_stream_hash, blob->hash); + copy_hash(in.wim_info.blob_table_hash, blob_table_hash); + in.wim_info.unnamed_data_stream_uncompressed_size = blob->size; + in.wim_info.unnamed_data_stream_compressed_size = blob->rdesc->size_in_wim; + in.wim_info.unnamed_data_stream_offset_in_wim = blob->rdesc->offset_in_wim; if (!DeviceIoControl(h, FSCTL_SET_REPARSE_POINT, &in, sizeof(in), NULL, 0, &bytes_returned, NULL)) @@ -1167,7 +1167,7 @@ wimboot_set_pointer(HANDLE h, return false; if (!SetFilePointerEx(h, - (LARGE_INTEGER){ .QuadPart = lte->size}, + (LARGE_INTEGER){ .QuadPart = blob->size}, NULL, FILE_BEGIN)) return false; diff --git a/src/win32_apply.c b/src/win32_apply.c index 4c65fefe..a662cbf3 100644 --- a/src/win32_apply.c +++ b/src/win32_apply.c @@ -29,10 +29,11 @@ #include "wimlib/apply.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/capture.h" /* for mangle_pat() and match_pattern_list() */ #include "wimlib/dentry.h" +#include "wimlib/encoding.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/paths.h" #include "wimlib/reparse.h" @@ -52,7 +53,7 @@ struct win32_apply_ctx { u64 data_source_id; struct string_set *prepopulate_pats; void *mem_prepopulate_pats; - u8 wim_lookup_table_hash[SHA1_HASH_SIZE]; + u8 blob_table_hash[SHA1_HASH_SIZE]; bool wof_running; bool tried_to_load_prepopulate_list; } wimboot; @@ -78,7 +79,7 @@ struct win32_apply_ctx { * target-relative NT paths */ wchar_t *print_buffer; - /* Allocated buffer for reading stream data when it cannot be extracted + /* Allocated buffer for reading blob data when it cannot be extracted * directly */ u8 *data_buffer; @@ -103,20 +104,20 @@ struct win32_apply_ctx { /* Array of open handles to filesystem streams currently being written */ - HANDLE open_handles[MAX_OPEN_STREAMS]; + HANDLE open_handles[MAX_OPEN_FILES]; /* Number of handles in @open_handles currently open (filled in from the * beginning of the array) */ unsigned num_open_handles; /* List of dentries, joined by @tmp_list, that need to have reparse data - * extracted as soon as the whole stream has been read into - * @data_buffer. */ + * extracted as soon as the whole blob has been read into @data_buffer. + * */ struct list_head reparse_dentries; /* List of dentries, joined by @tmp_list, that need to have raw - * encrypted data extracted as soon as the whole stream has been read - * into @data_buffer. */ + * encrypted data extracted as soon as the whole blob has been read into + * @data_buffer. */ struct list_head encrypted_dentries; /* Number of files for which we didn't have permission to set the full @@ -276,7 +277,7 @@ load_prepopulate_pats(struct win32_apply_ctx *ctx) { const wchar_t *path = L"\\Windows\\System32\\WimBootCompress.ini"; struct wim_dentry *dentry; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; int ret; void *buf; struct string_set *s; @@ -290,13 +291,14 @@ load_prepopulate_pats(struct win32_apply_ctx *ctx) (dentry->d_inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT | FILE_ATTRIBUTE_ENCRYPTED)) || - !(lte = inode_unnamed_lte(dentry->d_inode, ctx->common.wim->lookup_table))) + !(blob = inode_get_blob_for_unnamed_data_stream(dentry->d_inode, + ctx->common.wim->blob_table))) { WARNING("%ls does not exist in WIM image!", path); return WIMLIB_ERR_PATH_DOES_NOT_EXIST; } - ret = read_full_stream_into_alloc_buf(lte, &buf); + ret = read_full_blob_into_alloc_buf(blob, &buf); if (ret) return ret; @@ -309,7 +311,7 @@ load_prepopulate_pats(struct win32_apply_ctx *ctx) sec.name = T("PrepopulateList"); sec.strings = s; - ret = do_load_text_file(path, buf, lte->size, &mem, &sec, 1, + ret = do_load_text_file(path, buf, blob->size, &mem, &sec, 1, LOAD_TEXT_FILE_REMOVE_QUOTES | LOAD_TEXT_FILE_NO_WARNINGS, mangle_pat); @@ -374,7 +376,7 @@ will_externally_back_inode(struct wim_inode *inode, struct win32_apply_ctx *ctx, { struct list_head *next; struct wim_dentry *dentry; - struct wim_lookup_table_entry *stream; + struct blob_descriptor *blob; int ret; if (inode->i_can_externally_back) @@ -390,15 +392,15 @@ will_externally_back_inode(struct wim_inode *inode, struct win32_apply_ctx *ctx, FILE_ATTRIBUTE_ENCRYPTED)) return WIM_BACKING_NOT_POSSIBLE; - stream = inode_unnamed_lte_resolved(inode); + blob = inode_get_blob_for_unnamed_data_stream_resolved(inode); /* Note: Microsoft's WoF driver errors out if it tries to satisfy a * read, with ending offset >= 4 GiB, from an externally backed file. */ - if (!stream || - stream->resource_location != RESOURCE_IN_WIM || - stream->rspec->wim != ctx->common.wim || - stream->size != stream->rspec->uncompressed_size || - stream->size > 4200000000) + if (!blob || + blob->blob_location != BLOB_IN_WIM || + blob->rdesc->wim != ctx->common.wim || + blob->size != blob->rdesc->uncompressed_size || + blob->size > 4200000000) return WIM_BACKING_NOT_POSSIBLE; /* @@ -475,9 +477,9 @@ set_external_backing(HANDLE h, struct wim_inode *inode, struct win32_apply_ctx * } else { /* Externally backing. */ if (unlikely(!wimboot_set_pointer(h, - inode_unnamed_lte_resolved(inode), + inode_get_blob_for_unnamed_data_stream_resolved(inode), ctx->wimboot.data_source_id, - ctx->wimboot.wim_lookup_table_hash, + ctx->wimboot.blob_table_hash, ctx->wimboot.wof_running))) { const DWORD err = GetLastError(); @@ -491,11 +493,11 @@ set_external_backing(HANDLE h, struct wim_inode *inode, struct win32_apply_ctx * } } -/* Calculates the SHA-1 message digest of the WIM's lookup table. */ +/* Calculates the SHA-1 message digest of the WIM's blob table. */ static int -hash_lookup_table(WIMStruct *wim, u8 hash[SHA1_HASH_SIZE]) +hash_blob_table(WIMStruct *wim, u8 hash[SHA1_HASH_SIZE]) { - return wim_reshdr_to_hash(&wim->hdr.lookup_table_reshdr, wim, hash); + return wim_reshdr_to_hash(&wim->hdr.blob_table_reshdr, wim, hash); } /* Prepare for doing a "WIMBoot" extraction by loading patterns from @@ -514,8 +516,7 @@ start_wimboot_extraction(struct win32_apply_ctx *ctx) if (!wim_info_get_wimboot(wim->wim_info, wim->current_image)) WARNING("Image is not marked as WIMBoot compatible!"); - ret = hash_lookup_table(ctx->common.wim, - ctx->wimboot.wim_lookup_table_hash); + ret = hash_blob_table(ctx->common.wim, ctx->wimboot.blob_table_hash); if (ret) return ret; @@ -629,8 +630,11 @@ static size_t inode_longest_named_data_stream_spec(const struct wim_inode *inode) { size_t max = 0; - for (u16 i = 0; i < inode->i_num_ads; i++) { - size_t len = inode->i_ads_entries[i].stream_name_nbytes; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + if (!stream_is_named_data_stream(strm)) + continue; + size_t len = utf16le_len_bytes(strm->stream_name); if (len > max) max = len; } @@ -1296,14 +1300,15 @@ retry: return WIMLIB_ERR_OPEN; } -/* Create empty named data streams. +/* + * Create empty named data streams for the specified file, if there are any. * - * Since these won't have 'struct wim_lookup_table_entry's, they won't show up - * in the call to extract_stream_list(). Hence the need for the special case. + * Since these won't have blob descriptors, they won't show up in the call to + * extract_blob_list(). Hence the need for the special case. */ static int -create_any_empty_ads(const struct wim_dentry *dentry, - struct win32_apply_ctx *ctx) +create_empty_named_data_streams(const struct wim_dentry *dentry, + struct win32_apply_ctx *ctx) { const struct wim_inode *inode = dentry->d_inode; bool path_modified = false; @@ -1312,24 +1317,17 @@ create_any_empty_ads(const struct wim_dentry *dentry, if (!ctx->common.supported_features.named_data_streams) return 0; - for (u16 i = 0; i < inode->i_num_ads; i++) { - const struct wim_ads_entry *entry; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; HANDLE h; - entry = &inode->i_ads_entries[i]; - - /* Not named? */ - if (!entry->stream_name_nbytes) - continue; - - /* Not empty? */ - if (entry->lte) + if (!stream_is_named_data_stream(strm) || + stream_blob_resolved(strm) != NULL) continue; build_extraction_path_with_ads(dentry, ctx, - entry->stream_name, - entry->stream_name_nbytes / - sizeof(wchar_t)); + strm->stream_name, + utf16le_len_chars(strm->stream_name)); path_modified = true; ret = supersede_file_or_stream(ctx, &h); if (ret) @@ -1425,7 +1423,7 @@ create_directories(struct list_head *dentry_list, ret = create_directory(dentry, ctx); if (!ret) - ret = create_any_empty_ads(dentry, ctx); + ret = create_empty_named_data_streams(dentry, ctx); ret = check_apply_error(dentry, ctx, ret); if (ret) @@ -1462,7 +1460,7 @@ create_nondirectory_inode(HANDLE *h_ret, const struct wim_dentry *dentry, if (ret) goto out_close; - ret = create_any_empty_ads(dentry, ctx); + ret = create_empty_named_data_streams(dentry, ctx); if (ret) goto out_close; @@ -1620,21 +1618,21 @@ close_handles(struct win32_apply_ctx *ctx) (*func_NtClose)(ctx->open_handles[i]); } -/* Prepare to read the next stream, which has size @stream_size, into an - * in-memory buffer. */ +/* Prepare to read the next blob, which has size @blob_size, into an in-memory + * buffer. */ static bool -prepare_data_buffer(struct win32_apply_ctx *ctx, u64 stream_size) +prepare_data_buffer(struct win32_apply_ctx *ctx, u64 blob_size) { - if (stream_size > ctx->data_buffer_size) { + if (blob_size > ctx->data_buffer_size) { /* Larger buffer needed. */ void *new_buffer; - if ((size_t)stream_size != stream_size) + if ((size_t)blob_size != blob_size) return false; - new_buffer = REALLOC(ctx->data_buffer, stream_size); + new_buffer = REALLOC(ctx->data_buffer, blob_size); if (!new_buffer) return false; ctx->data_buffer = new_buffer; - ctx->data_buffer_size = stream_size; + ctx->data_buffer_size = blob_size; } /* On the first call this changes data_buffer_ptr from NULL, which tells * extract_chunk() that the data buffer needs to be filled while reading @@ -1644,82 +1642,64 @@ prepare_data_buffer(struct win32_apply_ctx *ctx, u64 stream_size) } static int -begin_extract_stream_instance(const struct wim_lookup_table_entry *stream, - struct wim_dentry *dentry, - const wchar_t *stream_name, - struct win32_apply_ctx *ctx) +begin_extract_blob_instance(const struct blob_descriptor *blob, + struct wim_dentry *dentry, + const struct wim_inode_stream *strm, + struct win32_apply_ctx *ctx) { - const struct wim_inode *inode = dentry->d_inode; - size_t stream_name_nchars = 0; FILE_ALLOCATION_INFORMATION alloc_info; HANDLE h; NTSTATUS status; - if (unlikely(stream_name)) - stream_name_nchars = wcslen(stream_name); - - if (unlikely(stream_name_nchars)) { - build_extraction_path_with_ads(dentry, ctx, - stream_name, stream_name_nchars); - } else { - build_extraction_path(dentry, ctx); + if (unlikely(strm->stream_type == STREAM_TYPE_REPARSE_POINT)) { + /* We can't write the reparse point stream directly; we must set + * it with FSCTL_SET_REPARSE_POINT, which requires that all the + * data be available. So, stage the data in a buffer. */ + if (!prepare_data_buffer(ctx, blob->size)) + return WIMLIB_ERR_NOMEM; + list_add_tail(&dentry->tmp_list, &ctx->reparse_dentries); + return 0; } - - /* Encrypted file? */ - if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) - && (stream_name_nchars == 0)) - { - if (!ctx->common.supported_features.encrypted_files) - return 0; - - /* We can't write encrypted file streams directly; we must use + if (unlikely(strm->stream_type == STREAM_TYPE_EFSRPC_RAW_DATA)) { + /* We can't write encrypted files directly; we must use * WriteEncryptedFileRaw(), which requires providing the data * through a callback function. This can't easily be combined * with our own callback-based approach. * - * The current workaround is to simply read the stream into - * memory and write the encrypted file from that. + * The current workaround is to simply read the blob into memory + * and write the encrypted file from that. * * TODO: This isn't sufficient for extremely large encrypted * files. Perhaps we should create an extra thread to write * such files... */ - if (!prepare_data_buffer(ctx, stream->size)) + if (!prepare_data_buffer(ctx, blob->size)) return WIMLIB_ERR_NOMEM; list_add_tail(&dentry->tmp_list, &ctx->encrypted_dentries); return 0; } - /* Reparse point? - * - * Note: FILE_ATTRIBUTE_REPARSE_POINT is tested *after* - * FILE_ATTRIBUTE_ENCRYPTED since the WIM format does not store both EFS - * data and reparse data for the same file, and the EFS data takes - * precedence. */ - if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) - && (stream_name_nchars == 0)) - { - if (!ctx->common.supported_features.reparse_points) - return 0; - - /* We can't write the reparse stream directly; we must set it - * with FSCTL_SET_REPARSE_POINT, which requires that all the - * data be available. So, stage the data in a buffer. */ + /* It's a data stream (may be unnamed or named). */ + wimlib_assert(strm->stream_type == STREAM_TYPE_DATA); - if (!prepare_data_buffer(ctx, stream->size)) - return WIMLIB_ERR_NOMEM; - list_add_tail(&dentry->tmp_list, &ctx->reparse_dentries); - return 0; - } - - if (ctx->num_open_handles == MAX_OPEN_STREAMS) { + if (ctx->num_open_handles == MAX_OPEN_FILES) { /* XXX: Fix this. But because of the checks in - * extract_stream_list(), this can now only happen on a - * filesystem that does not support hard links. */ + * extract_blob_list(), this can now only happen on a filesystem + * that does not support hard links. */ ERROR("Can't extract data: too many open files!"); return WIMLIB_ERR_UNSUPPORTED; } + + if (unlikely(stream_is_named(strm))) { + build_extraction_path_with_ads(dentry, ctx, + strm->stream_name, + utf16le_len_chars(strm->stream_name)); + } else { + build_extraction_path(dentry, ctx); + } + + /* Open a new handle */ status = do_create_file(&h, FILE_WRITE_DATA | SYNCHRONIZE, @@ -1736,7 +1716,7 @@ begin_extract_stream_instance(const struct wim_lookup_table_entry *stream, ctx->open_handles[ctx->num_open_handles++] = h; /* Allocate space for the data. */ - alloc_info.AllocationSize.QuadPart = stream->size; + alloc_info.AllocationSize.QuadPart = blob->size; (*func_NtSetInformationFile)(h, &ctx->iosb, &alloc_info, sizeof(alloc_info), FileAllocationInformation); @@ -2018,12 +1998,12 @@ retry: return 0; } -/* Called when starting to read a stream for extraction on Windows */ +/* Called when starting to read a blob for extraction on Windows */ static int -begin_extract_stream(struct wim_lookup_table_entry *stream, void *_ctx) +begin_extract_blob(struct blob_descriptor *blob, void *_ctx) { struct win32_apply_ctx *ctx = _ctx; - const struct stream_owner *owners = stream_owners(stream); + const struct blob_extraction_target *targets = blob_extraction_targets(blob); int ret; ctx->num_open_handles = 0; @@ -2031,22 +2011,21 @@ begin_extract_stream(struct wim_lookup_table_entry *stream, void *_ctx) INIT_LIST_HEAD(&ctx->reparse_dentries); INIT_LIST_HEAD(&ctx->encrypted_dentries); - for (u32 i = 0; i < stream->out_refcnt; i++) { - const struct wim_inode *inode = owners[i].inode; - const wchar_t *stream_name = owners[i].stream_name; + for (u32 i = 0; i < blob->out_refcnt; i++) { + const struct wim_inode *inode = targets[i].inode; + const struct wim_inode_stream *strm = targets[i].stream; struct wim_dentry *dentry; - /* A copy of the stream needs to be extracted to @inode. */ + /* A copy of the blob needs to be extracted to @inode. */ if (ctx->common.supported_features.hard_links) { dentry = inode_first_extraction_dentry(inode); - ret = begin_extract_stream_instance(stream, dentry, - stream_name, ctx); + ret = begin_extract_blob_instance(blob, dentry, strm, ctx); ret = check_apply_error(dentry, ctx, ret); if (ret) goto fail; } else { - /* Hard links not supported. Extract the stream + /* Hard links not supported. Extract the blob * separately to each alias of the inode. */ struct list_head *next; @@ -2054,10 +2033,7 @@ begin_extract_stream(struct wim_lookup_table_entry *stream, void *_ctx) do { dentry = list_entry(next, struct wim_dentry, d_extraction_alias_node); - ret = begin_extract_stream_instance(stream, - dentry, - stream_name, - ctx); + ret = begin_extract_blob_instance(blob, dentry, strm, ctx); ret = check_apply_error(dentry, ctx, ret); if (ret) goto fail; @@ -2073,8 +2049,8 @@ fail: return ret; } -/* Called when the next chunk of a stream has been read for extraction on - * Windows */ +/* Called when the next chunk of a blob has been read for extraction on Windows + */ static int extract_chunk(const void *chunk, size_t size, void *_ctx) { @@ -2108,9 +2084,9 @@ extract_chunk(const void *chunk, size_t size, void *_ctx) return 0; } -/* Called when a stream has been fully read for extraction on Windows */ +/* Called when a blob has been fully read for extraction on Windows */ static int -end_extract_stream(struct wim_lookup_table_entry *stream, int status, void *_ctx) +end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx) { struct win32_apply_ctx *ctx = _ctx; int ret; @@ -2125,26 +2101,27 @@ end_extract_stream(struct wim_lookup_table_entry *stream, int status, void *_ctx return 0; if (!list_empty(&ctx->reparse_dentries)) { - if (stream->size > REPARSE_DATA_MAX_SIZE) { + if (blob->size > REPARSE_DATA_MAX_SIZE) { dentry = list_first_entry(&ctx->reparse_dentries, struct wim_dentry, tmp_list); build_extraction_path(dentry, ctx); ERROR("Reparse data of \"%ls\" has size " "%"PRIu64" bytes (exceeds %u bytes)", - current_path(ctx), stream->size, + current_path(ctx), blob->size, REPARSE_DATA_MAX_SIZE); ret = WIMLIB_ERR_INVALID_REPARSE_DATA; return check_apply_error(dentry, ctx, ret); } - /* In the WIM format, reparse streams are just the reparse data - * and omit the header. But we can reconstruct the header. */ - memcpy(ctx->rpbuf.rpdata, ctx->data_buffer, stream->size); - ctx->rpbuf.rpdatalen = stream->size; + /* In the WIM format, reparse point streams are just the reparse + * data and omit the header. But we can reconstruct the header. + */ + memcpy(ctx->rpbuf.rpdata, ctx->data_buffer, blob->size); + ctx->rpbuf.rpdatalen = blob->size; ctx->rpbuf.rpreserved = 0; list_for_each_entry(dentry, &ctx->reparse_dentries, tmp_list) { ctx->rpbuf.rptag = dentry->d_inode->i_reparse_tag; ret = set_reparse_data(dentry, &ctx->rpbuf, - stream->size + REPARSE_DATA_OFFSET, + blob->size + REPARSE_DATA_OFFSET, ctx); ret = check_apply_error(dentry, ctx, ret); if (ret) @@ -2153,7 +2130,7 @@ end_extract_stream(struct wim_lookup_table_entry *stream, int status, void *_ctx } if (!list_empty(&ctx->encrypted_dentries)) { - ctx->encrypted_size = stream->size; + ctx->encrypted_size = blob->size; list_for_each_entry(dentry, &ctx->encrypted_dentries, tmp_list) { ret = extract_encrypted_file(dentry, ctx); ret = check_apply_error(dentry, ctx, ret); @@ -2536,15 +2513,15 @@ win32_extract(struct list_head *dentry_list, struct apply_ctx *_ctx) if (ret) goto out; - struct read_stream_list_callbacks cbs = { - .begin_stream = begin_extract_stream, - .begin_stream_ctx = ctx, + struct read_blob_list_callbacks cbs = { + .begin_blob = begin_extract_blob, + .begin_blob_ctx = ctx, .consume_chunk = extract_chunk, .consume_chunk_ctx = ctx, - .end_stream = end_extract_stream, - .end_stream_ctx = ctx, + .end_blob = end_extract_blob, + .end_blob_ctx = ctx, }; - ret = extract_stream_list(&ctx->common, &cbs); + ret = extract_blob_list(&ctx->common, &cbs); if (ret) goto out; diff --git a/src/win32_capture.c b/src/win32_capture.c index 7196fe22..c7a1d4a4 100644 --- a/src/win32_capture.c +++ b/src/win32_capture.c @@ -30,12 +30,12 @@ #include "wimlib/win32_common.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/capture.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/endianness.h" #include "wimlib/error.h" -#include "wimlib/lookup_table.h" #include "wimlib/paths.h" #include "wimlib/reparse.h" @@ -103,10 +103,10 @@ retry: } /* Read the first @size bytes from the file, or named data stream of a file, - * from which the stream entry @lte was created. */ + * described by @blob. */ int -read_winnt_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, - consume_data_callback_t cb, void *cb_ctx) +read_winnt_stream_prefix(const struct blob_descriptor *blob, u64 size, + consume_data_callback_t cb, void *cb_ctx) { const wchar_t *path; HANDLE h; @@ -116,7 +116,7 @@ read_winnt_file_prefix(const struct wim_lookup_table_entry *lte, u64 size, int ret; /* This is an NT namespace path. */ - path = lte->file_on_disk; + path = blob->file_on_disk; status = winnt_openat(NULL, path, wcslen(path), FILE_READ_DATA | SYNCHRONIZE, &h); @@ -184,7 +184,7 @@ win32_encrypted_export_cb(unsigned char *data, void *_ctx, unsigned long len) } int -read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte, +read_win32_encrypted_file_prefix(const struct blob_descriptor *blob, u64 size, consume_data_callback_t cb, void *cb_ctx) { @@ -194,7 +194,7 @@ read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte, int ret; DWORD flags = 0; - if (lte->file_inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) + if (blob->file_inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) flags |= CREATE_FOR_DIR; export_ctx.read_prefix_cb = cb; @@ -202,11 +202,11 @@ read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte, export_ctx.wimlib_err_code = 0; export_ctx.bytes_remaining = size; - err = OpenEncryptedFileRaw(lte->file_on_disk, flags, &file_ctx); + err = OpenEncryptedFileRaw(blob->file_on_disk, flags, &file_ctx); if (err != ERROR_SUCCESS) { win32_error(err, L"Failed to open encrypted file \"%ls\" for raw read", - printable_path(lte->file_on_disk)); + printable_path(blob->file_on_disk)); return WIMLIB_ERR_OPEN; } err = ReadEncryptedFileRaw(win32_encrypted_export_cb, @@ -216,14 +216,14 @@ read_win32_encrypted_file_prefix(const struct wim_lookup_table_entry *lte, if (ret == 0) { win32_error(err, L"Failed to read encrypted file \"%ls\"", - printable_path(lte->file_on_disk)); + printable_path(blob->file_on_disk)); ret = WIMLIB_ERR_READ; } } else if (export_ctx.bytes_remaining != 0) { ERROR("Only could read %"PRIu64" of %"PRIu64" bytes from " "encrypted file \"%ls\"", size - export_ctx.bytes_remaining, size, - printable_path(lte->file_on_disk)); + printable_path(blob->file_on_disk)); ret = WIMLIB_ERR_READ; } else { ret = 0; @@ -760,7 +760,7 @@ winnt_get_reparse_data(HANDLE h, const wchar_t *path, return WIMLIB_ERR_READ; } - if (unlikely(bytes_returned < 8)) { + if (unlikely(bytes_returned < REPARSE_DATA_OFFSET)) { ERROR("\"%ls\": Reparse point data is invalid", printable_path(path)); return WIMLIB_ERR_INVALID_REPARSE_DATA; @@ -824,43 +824,52 @@ win32_get_encrypted_file_size(const wchar_t *path, bool is_dir, u64 *size_ret) } static int -winnt_load_encrypted_stream_info(struct wim_inode *inode, const wchar_t *nt_path, - struct list_head *unhashed_streams) +winnt_load_efsrpc_raw_data(struct wim_inode *inode, const wchar_t *nt_path, + struct list_head *unhashed_blobs) { - struct wim_lookup_table_entry *lte = new_lookup_table_entry(); + struct blob_descriptor *blob; + struct wim_inode_stream *strm; int ret; - if (unlikely(!lte)) - return WIMLIB_ERR_NOMEM; + blob = new_blob_descriptor(); + if (!blob) + goto err_nomem; - lte->file_on_disk = WCSDUP(nt_path); - if (unlikely(!lte->file_on_disk)) { - free_lookup_table_entry(lte); - return WIMLIB_ERR_NOMEM; - } - lte->resource_location = RESOURCE_WIN32_ENCRYPTED; + blob->file_on_disk = WCSDUP(nt_path); + if (!blob->file_on_disk) + goto err_nomem; + blob->blob_location = BLOB_WIN32_ENCRYPTED; /* OpenEncryptedFileRaw() expects a Win32 name. */ - wimlib_assert(!wmemcmp(lte->file_on_disk, L"\\??\\", 4)); - lte->file_on_disk[1] = L'\\'; + wimlib_assert(!wmemcmp(blob->file_on_disk, L"\\??\\", 4)); + blob->file_on_disk[1] = L'\\'; + + blob->file_inode = inode; - ret = win32_get_encrypted_file_size(lte->file_on_disk, + ret = win32_get_encrypted_file_size(blob->file_on_disk, (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY), - <e->size); - if (unlikely(ret)) { - free_lookup_table_entry(lte); - return ret; - } + &blob->size); + if (ret) + goto err; - lte->file_inode = inode; - add_unhashed_stream(lte, inode, 0, unhashed_streams); - inode->i_lte = lte; + strm = inode_add_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA, + NO_STREAM_NAME, blob); + if (!strm) + goto err_nomem; + + prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs); return 0; + +err_nomem: + ret = WIMLIB_ERR_NOMEM; +err: + free_blob_descriptor(blob); + return ret; } static bool -get_data_stream_name(const wchar_t *raw_stream_name, size_t raw_stream_name_nchars, - const wchar_t **stream_name_ret, size_t *stream_name_nchars_ret) +get_data_stream_name(wchar_t *raw_stream_name, size_t raw_stream_name_nchars, + wchar_t **stream_name_ret, size_t *stream_name_nchars_ret) { const wchar_t *sep, *type, *end; @@ -891,6 +900,9 @@ get_data_stream_name(const wchar_t *raw_stream_name, size_t raw_stream_name_ncha return true; } +/* Build the path to the stream. For unnamed streams, this is simply the path + * to the file. For named streams, this is the path to the file, followed by a + * colon, followed by the stream name. */ static wchar_t * build_stream_path(const wchar_t *path, size_t path_nchars, const wchar_t *stream_name, size_t stream_name_nchars) @@ -916,77 +928,57 @@ build_stream_path(const wchar_t *path, size_t path_nchars, } static int -winnt_scan_stream(const wchar_t *path, size_t path_nchars, - const wchar_t *raw_stream_name, size_t raw_stream_name_nchars, - u64 stream_size, - struct wim_inode *inode, struct list_head *unhashed_streams) +winnt_scan_data_stream(const wchar_t *path, size_t path_nchars, + wchar_t *raw_stream_name, size_t raw_stream_name_nchars, + u64 stream_size, + struct wim_inode *inode, struct list_head *unhashed_blobs) { - const wchar_t *stream_name; + wchar_t *stream_name; size_t stream_name_nchars; - struct wim_ads_entry *ads_entry; - wchar_t *stream_path; - struct wim_lookup_table_entry *lte; - u32 stream_id; + struct blob_descriptor *blob; + struct wim_inode_stream *strm; /* Given the raw stream name (which is something like - * :streamname:$DATA), extract just the stream name part. + * :streamname:$DATA), extract just the stream name part (streamname). * Ignore any non-$DATA streams. */ if (!get_data_stream_name(raw_stream_name, raw_stream_name_nchars, &stream_name, &stream_name_nchars)) return 0; - /* If this is a named stream, allocate an ADS entry for it. */ - if (stream_name_nchars) { - ads_entry = inode_add_ads_utf16le(inode, stream_name, - stream_name_nchars * - sizeof(wchar_t)); - if (!ads_entry) - return WIMLIB_ERR_NOMEM; - } else if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT | - FILE_ATTRIBUTE_ENCRYPTED)) - { - /* Ignore unnamed data stream of reparse point or encrypted file - */ - return 0; + stream_name[stream_name_nchars] = L'\0'; + + /* If the stream is non-empty, set up a blob descriptor for it. */ + if (stream_size != 0) { + blob = new_blob_descriptor(); + if (!blob) + goto err_nomem; + blob->file_on_disk = build_stream_path(path, + path_nchars, + stream_name, + stream_name_nchars); + if (!blob->file_on_disk) + goto err_nomem; + blob->blob_location = BLOB_IN_WINNT_FILE_ON_DISK; + blob->size = stream_size; + blob->file_inode = inode; } else { - ads_entry = NULL; + blob = NULL; } - /* If the stream is empty, no lookup table entry is needed. */ - if (stream_size == 0) - return 0; - - /* Build the path to the stream. For unnamed streams, this is simply - * the path to the file. For named streams, this is the path to the - * file, followed by a colon, followed by the stream name. */ - stream_path = build_stream_path(path, path_nchars, - stream_name, stream_name_nchars); - if (!stream_path) - return WIMLIB_ERR_NOMEM; + strm = inode_add_stream(inode, STREAM_TYPE_DATA, stream_name, blob); + if (!strm) + goto err_nomem; - /* Set up the lookup table entry for the stream. */ - lte = new_lookup_table_entry(); - if (!lte) { - FREE(stream_path); - return WIMLIB_ERR_NOMEM; - } - lte->file_on_disk = stream_path; - lte->resource_location = RESOURCE_IN_WINNT_FILE_ON_DISK; - lte->size = stream_size; - if (ads_entry) { - stream_id = ads_entry->stream_id; - ads_entry->lte = lte; - } else { - stream_id = 0; - inode->i_lte = lte; - } - lte->file_inode = inode; - add_unhashed_stream(lte, inode, stream_id, unhashed_streams); + prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs); return 0; + +err_nomem: + free_blob_descriptor(blob); + return WIMLIB_ERR_NOMEM; } /* - * Load information about the streams of an open file into a WIM inode. + * Load information about the data streams of an open file into a WIM inode. * * We use the NtQueryInformationFile() system call instead of FindFirstStream() * and FindNextStream(). This is done for two reasons: @@ -1000,9 +992,9 @@ winnt_scan_stream(const wchar_t *path, size_t path_nchars, * already present in Windows XP. */ static int -winnt_scan_streams(HANDLE h, const wchar_t *path, size_t path_nchars, - struct wim_inode *inode, struct list_head *unhashed_streams, - u64 file_size, u32 vol_flags) +winnt_scan_data_streams(HANDLE h, const wchar_t *path, size_t path_nchars, + struct wim_inode *inode, struct list_head *unhashed_blobs, + u64 file_size, u32 vol_flags) { int ret; u8 _buf[1024] _aligned_attribute(8); @@ -1010,7 +1002,7 @@ winnt_scan_streams(HANDLE h, const wchar_t *path, size_t path_nchars, size_t bufsize; IO_STATUS_BLOCK iosb; NTSTATUS status; - const FILE_STREAM_INFORMATION *info; + FILE_STREAM_INFORMATION *info; buf = _buf; bufsize = sizeof(_buf); @@ -1063,14 +1055,14 @@ winnt_scan_streams(HANDLE h, const wchar_t *path, size_t path_nchars, } /* Parse one or more stream information structures. */ - info = (const FILE_STREAM_INFORMATION *)buf; + info = (FILE_STREAM_INFORMATION *)buf; for (;;) { /* Load the stream information. */ - ret = winnt_scan_stream(path, path_nchars, - info->StreamName, - info->StreamNameLength / 2, - info->StreamSize.QuadPart, - inode, unhashed_streams); + ret = winnt_scan_data_stream(path, path_nchars, + info->StreamName, + info->StreamNameLength / 2, + info->StreamSize.QuadPart, + inode, unhashed_blobs); if (ret) goto out_free_buf; @@ -1079,8 +1071,8 @@ winnt_scan_streams(HANDLE h, const wchar_t *path, size_t path_nchars, break; } /* Advance to next stream information. */ - info = (const FILE_STREAM_INFORMATION *) - ((const u8 *)info + info->NextEntryOffset); + info = (FILE_STREAM_INFORMATION *) + ((u8 *)info + info->NextEntryOffset); } ret = 0; goto out_free_buf; @@ -1095,8 +1087,8 @@ unnamed_only: goto out_free_buf; } - ret = winnt_scan_stream(path, path_nchars, L"::$DATA", 7, - file_size, inode, unhashed_streams); + ret = winnt_scan_data_stream(path, path_nchars, L"::$DATA", 7, + file_size, inode, unhashed_blobs); out_free_buf: /* Free buffer if allocated on heap. */ if (unlikely(buf != _buf)) @@ -1121,9 +1113,6 @@ winnt_build_dentry_tree_recursive(struct wim_dentry **root_ret, int ret; NTSTATUS status; FILE_ALL_INFORMATION file_info; - u8 *rpbuf; - u16 rpbuflen; - u16 not_rpfixed; ACCESS_MASK requestedPerms; ret = try_exclude(full_path, full_path_nchars, params); @@ -1248,25 +1237,6 @@ retry_open: } } - /* If this is a reparse point, read the reparse data. */ - if (unlikely(file_info.BasicInformation.FileAttributes & - FILE_ATTRIBUTE_REPARSE_POINT)) - { - rpbuf = alloca(REPARSE_POINT_MAX_SIZE); - ret = winnt_get_reparse_data(h, full_path, params, - rpbuf, &rpbuflen); - switch (ret) { - case RP_FIXED: - not_rpfixed = 0; - break; - case RP_NOT_FIXED: - not_rpfixed = 1; - break; - default: - goto out; - } - } - /* Create a WIM dentry with an associated inode, which may be shared. * * However, we need to explicitly check for directories and files with @@ -1314,7 +1284,6 @@ retry_open: inode->i_creation_time = file_info.BasicInformation.CreationTime.QuadPart; inode->i_last_write_time = file_info.BasicInformation.LastWriteTime.QuadPart; inode->i_last_access_time = file_info.BasicInformation.LastAccessTime.QuadPart; - inode->i_resolved = 1; /* Get the file's security descriptor, unless we are capturing in * NO_ACLS mode or the volume does not support security descriptors. */ @@ -1333,17 +1302,41 @@ retry_open: } } - /* Load information about the unnamed data stream and any named data - * streams. */ - ret = winnt_scan_streams(h, - full_path, - full_path_nchars, - inode, - params->unhashed_streams, - file_info.StandardInformation.EndOfFile.QuadPart, - vol_flags); - if (ret) - goto out; + /* If this is a reparse point, load the reparse data. */ + if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)) { + if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) { + /* See comment above assign_stream_types_encrypted() */ + WARNING("Ignoring reparse data of encrypted file \"%ls\"", + printable_path(full_path)); + } else { + u8 rpbuf[REPARSE_POINT_MAX_SIZE] _aligned_attribute(8); + u16 rpbuflen; + + ret = winnt_get_reparse_data(h, full_path, params, + rpbuf, &rpbuflen); + switch (ret) { + case RP_FIXED: + inode->i_not_rpfixed = 0; + break; + case RP_NOT_FIXED: + inode->i_not_rpfixed = 1; + break; + default: + goto out; + } + inode->i_reparse_tag = le32_to_cpu(*(le32*)rpbuf); + if (!inode_add_stream_with_data(inode, + STREAM_TYPE_REPARSE_POINT, + NO_STREAM_NAME, + rpbuf + REPARSE_DATA_OFFSET, + rpbuflen - REPARSE_DATA_OFFSET, + params->blob_table)) + { + ret = WIMLIB_ERR_NOMEM; + goto out; + } + } + } if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) { /* Load information about the raw encrypted data. This is @@ -1356,27 +1349,33 @@ retry_open: * needed. */ (*func_NtClose)(h); h = NULL; - ret = winnt_load_encrypted_stream_info(inode, full_path, - params->unhashed_streams); + ret = winnt_load_efsrpc_raw_data(inode, full_path, + params->unhashed_blobs); + if (ret) + goto out; + } else { + /* + * Load information about data streams (unnamed and named). + * + * Skip this step for encrypted files, since the data from + * ReadEncryptedFileRaw() already contains all data streams (and + * they do in fact all get restored by WriteEncryptedFileRaw().) + * + * Note: WIMGAPI (as of Windows 8.1) gets wrong and stores both + * the EFSRPC data and the named data stream(s)...! + */ + ret = winnt_scan_data_streams(h, + full_path, + full_path_nchars, + inode, + params->unhashed_blobs, + file_info.StandardInformation.EndOfFile.QuadPart, + vol_flags); if (ret) goto out; } - if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)) { - if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) { - WARNING("Ignoring reparse data of encrypted reparse point file \"%ls\"", - printable_path(full_path)); - } else { - /* Reparse point: set the reparse data (already read). */ - - inode->i_not_rpfixed = not_rpfixed; - inode->i_reparse_tag = le32_to_cpu(*(le32*)rpbuf); - ret = inode_set_unnamed_stream(inode, rpbuf + 8, rpbuflen - 8, - params->lookup_table); - if (ret) - goto out; - } - } else if (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) { + if (inode_is_directory(inode)) { /* Directory: recurse to children. */ @@ -1419,7 +1418,7 @@ out: if (likely(h)) (*func_NtClose)(h); if (unlikely(ret)) { - free_dentry_tree(root, params->lookup_table); + free_dentry_tree(root, params->blob_table); root = NULL; ret = report_capture_error(params, ret, full_path); } diff --git a/src/write.c b/src/write.c index 842ca9b1..03f30559 100644 --- a/src/write.c +++ b/src/write.c @@ -39,6 +39,7 @@ #include "wimlib/alloca.h" #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/chunk_compressor.h" #include "wimlib/endianness.h" #include "wimlib/error.h" @@ -46,7 +47,6 @@ #include "wimlib/header.h" #include "wimlib/inode.h" #include "wimlib/integrity.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/paths.h" #include "wimlib/progress.h" @@ -91,19 +91,20 @@ struct filter_context { WIMStruct *wim; }; -/* Determine specified stream should be filtered out from the write. +/* + * Determine whether the specified blob should be filtered out from the write. * * Return values: * - * < 0 : The stream should be hard-filtered; that is, not included in the - * output WIM at all. - * 0 : The stream should not be filtered out. - * > 0 : The stream should be soft-filtered; that is, it already exists in the + * < 0 : The blob should be hard-filtered; that is, not included in the output + * WIM file at all. + * 0 : The blob should not be filtered out. + * > 0 : The blob should be soft-filtered; that is, it already exists in the * WIM file and may not need to be written again. */ static int -stream_filtered(const struct wim_lookup_table_entry *lte, - const struct filter_context *ctx) +blob_filtered(const struct blob_descriptor *blob, + const struct filter_context *ctx) { int write_flags; WIMStruct *wim; @@ -115,27 +116,27 @@ stream_filtered(const struct wim_lookup_table_entry *lte, wim = ctx->wim; if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE && - lte->resource_location == RESOURCE_IN_WIM && - lte->rspec->wim == wim) + blob->blob_location == BLOB_IN_WIM && + blob->rdesc->wim == wim) return 1; if (write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS && - lte->resource_location == RESOURCE_IN_WIM && - lte->rspec->wim != wim) + blob->blob_location == BLOB_IN_WIM && + blob->rdesc->wim != wim) return -1; return 0; } static bool -stream_hard_filtered(const struct wim_lookup_table_entry *lte, - struct filter_context *ctx) +blob_hard_filtered(const struct blob_descriptor *blob, + struct filter_context *ctx) { - return stream_filtered(lte, ctx) < 0; + return blob_filtered(blob, ctx) < 0; } static inline int -may_soft_filter_streams(const struct filter_context *ctx) +may_soft_filter_blobs(const struct filter_context *ctx) { if (ctx == NULL) return 0; @@ -143,7 +144,7 @@ may_soft_filter_streams(const struct filter_context *ctx) } static inline int -may_hard_filter_streams(const struct filter_context *ctx) +may_hard_filter_blobs(const struct filter_context *ctx) { if (ctx == NULL) return 0; @@ -151,20 +152,18 @@ may_hard_filter_streams(const struct filter_context *ctx) } static inline int -may_filter_streams(const struct filter_context *ctx) +may_filter_blobs(const struct filter_context *ctx) { - return (may_soft_filter_streams(ctx) || - may_hard_filter_streams(ctx)); + return (may_soft_filter_blobs(ctx) || may_hard_filter_blobs(ctx)); } - /* Return true if the specified resource is compressed and the compressed data * can be reused with the specified output parameters. */ static bool -can_raw_copy(const struct wim_lookup_table_entry *lte, +can_raw_copy(const struct blob_descriptor *blob, int write_resource_flags, int out_ctype, u32 out_chunk_size) { - const struct wim_resource_spec *rspec; + const struct wim_resource_descriptor *rdesc; if (write_resource_flags & WRITE_RESOURCE_FLAG_RECOMPRESS) return false; @@ -172,25 +171,25 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE) return false; - if (lte->resource_location != RESOURCE_IN_WIM) + if (blob->blob_location != BLOB_IN_WIM) return false; - rspec = lte->rspec; + rdesc = blob->rdesc; - if (rspec->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)) + if (rdesc->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)) return false; - if (rspec->flags & WIM_RESHDR_FLAG_COMPRESSED) { + if (rdesc->flags & WIM_RESHDR_FLAG_COMPRESSED) { /* Normal compressed resource: Must use same compression type * and chunk size. */ - return (rspec->compression_type == out_ctype && - rspec->chunk_size == out_chunk_size); + return (rdesc->compression_type == out_ctype && + rdesc->chunk_size == out_chunk_size); } - if ((rspec->flags & WIM_RESHDR_FLAG_SOLID) && + if ((rdesc->flags & WIM_RESHDR_FLAG_SOLID) && (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)) { - /* Solid resource: Such resources may contain multiple streams, + /* Solid resource: Such resources may contain multiple blobs, * and in general only a subset of them need to be written. As * a heuristic, re-use the raw data if more than two-thirds the * uncompressed size is being written. */ @@ -200,14 +199,14 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, * check if they are compatible with @out_ctype and * @out_chunk_size. */ - struct wim_lookup_table_entry *res_stream; + struct blob_descriptor *res_blob; u64 write_size = 0; - list_for_each_entry(res_stream, &rspec->stream_list, rspec_node) - if (res_stream->will_be_in_output_wim) - write_size += res_stream->size; + list_for_each_entry(res_blob, &rdesc->blob_list, rdesc_node) + if (res_blob->will_be_in_output_wim) + write_size += res_blob->size; - return (write_size > rspec->uncompressed_size * 2 / 3); + return (write_size > rdesc->uncompressed_size * 2 / 3); } return false; @@ -223,64 +222,63 @@ filter_resource_flags(u8 flags) } static void -stream_set_out_reshdr_for_reuse(struct wim_lookup_table_entry *lte) +blob_set_out_reshdr_for_reuse(struct blob_descriptor *blob) { - const struct wim_resource_spec *rspec; + const struct wim_resource_descriptor *rdesc; - wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); - rspec = lte->rspec; + wimlib_assert(blob->blob_location == BLOB_IN_WIM); + rdesc = blob->rdesc; - if (rspec->flags & WIM_RESHDR_FLAG_SOLID) { + if (rdesc->flags & WIM_RESHDR_FLAG_SOLID) { - wimlib_assert(lte->flags & WIM_RESHDR_FLAG_SOLID); + wimlib_assert(blob->flags & WIM_RESHDR_FLAG_SOLID); - lte->out_reshdr.offset_in_wim = lte->offset_in_res; - lte->out_reshdr.uncompressed_size = 0; - lte->out_reshdr.size_in_wim = lte->size; + blob->out_reshdr.offset_in_wim = blob->offset_in_res; + blob->out_reshdr.uncompressed_size = 0; + blob->out_reshdr.size_in_wim = blob->size; - lte->out_res_offset_in_wim = rspec->offset_in_wim; - lte->out_res_size_in_wim = rspec->size_in_wim; - lte->out_res_uncompressed_size = rspec->uncompressed_size; + blob->out_res_offset_in_wim = rdesc->offset_in_wim; + blob->out_res_size_in_wim = rdesc->size_in_wim; + blob->out_res_uncompressed_size = rdesc->uncompressed_size; } else { - wimlib_assert(!(lte->flags & WIM_RESHDR_FLAG_SOLID)); + wimlib_assert(!(blob->flags & WIM_RESHDR_FLAG_SOLID)); - lte->out_reshdr.offset_in_wim = rspec->offset_in_wim; - lte->out_reshdr.uncompressed_size = rspec->uncompressed_size; - lte->out_reshdr.size_in_wim = rspec->size_in_wim; + blob->out_reshdr.offset_in_wim = rdesc->offset_in_wim; + blob->out_reshdr.uncompressed_size = rdesc->uncompressed_size; + blob->out_reshdr.size_in_wim = rdesc->size_in_wim; } - lte->out_reshdr.flags = lte->flags; + blob->out_reshdr.flags = blob->flags; } -/* Write the header for a stream in a pipable WIM. */ +/* Write the header for a blob in a pipable WIM. */ static int -write_pwm_stream_header(const struct wim_lookup_table_entry *lte, - struct filedes *out_fd, - int additional_reshdr_flags) +write_pwm_blob_header(const struct blob_descriptor *blob, + struct filedes *out_fd, int additional_reshdr_flags) { - struct pwm_stream_hdr stream_hdr; + struct pwm_blob_hdr blob_hdr; u32 reshdr_flags; int ret; - stream_hdr.magic = cpu_to_le64(PWM_STREAM_MAGIC); - stream_hdr.uncompressed_size = cpu_to_le64(lte->size); + blob_hdr.magic = cpu_to_le64(PWM_BLOB_MAGIC); + blob_hdr.uncompressed_size = cpu_to_le64(blob->size); if (additional_reshdr_flags & PWM_RESHDR_FLAG_UNHASHED) { - zero_out_hash(stream_hdr.hash); + zero_out_hash(blob_hdr.hash); } else { - wimlib_assert(!lte->unhashed); - copy_hash(stream_hdr.hash, lte->hash); + wimlib_assert(!blob->unhashed); + copy_hash(blob_hdr.hash, blob->hash); } - reshdr_flags = filter_resource_flags(lte->flags); + reshdr_flags = filter_resource_flags(blob->flags); reshdr_flags |= additional_reshdr_flags; - stream_hdr.flags = cpu_to_le32(reshdr_flags); - ret = full_write(out_fd, &stream_hdr, sizeof(stream_hdr)); + blob_hdr.flags = cpu_to_le32(reshdr_flags); + ret = full_write(out_fd, &blob_hdr, sizeof(blob_hdr)); if (ret) ERROR_WITH_ERRNO("Write error"); return ret; } -struct write_streams_progress_data { +struct write_blobs_progress_data { wimlib_progress_func_t progfunc; void *progctx; union wimlib_progress_info progress; @@ -288,10 +286,8 @@ struct write_streams_progress_data { }; static int -do_write_streams_progress(struct write_streams_progress_data *progress_data, - u64 complete_size, - u32 complete_count, - bool discarded) +do_write_blobs_progress(struct write_blobs_progress_data *progress_data, + u64 complete_size, u32 complete_count, bool discarded) { union wimlib_progress_info *progress = &progress_data->progress; int ret; @@ -348,13 +344,13 @@ do_write_streams_progress(struct write_streams_progress_data *progress_data, return 0; } -struct write_streams_ctx { - /* File descriptor the streams are being written to. */ +struct write_blobs_ctx { + /* File descriptor to which the blobs are being written. */ struct filedes *out_fd; - /* Lookup table for the WIMStruct on whose behalf the streams are being + /* Blob table for the WIMStruct on whose behalf the blobs are being * written. */ - struct wim_lookup_table *lookup_table; + struct blob_table *blob_table; /* Compression format to use. */ int out_ctype; @@ -362,11 +358,11 @@ struct write_streams_ctx { /* Maximum uncompressed chunk size in compressed resources to use. */ u32 out_chunk_size; - /* Flags that affect how the streams will be written. */ + /* Flags that affect how the blobs will be written. */ int write_resource_flags; /* Data used for issuing WRITE_STREAMS progress. */ - struct write_streams_progress_data progress_data; + struct write_blobs_progress_data progress_data; struct filter_context *filter_ctx; @@ -387,21 +383,21 @@ struct write_streams_ctx { /* Number of bytes in @cur_chunk_buf that are currently filled. */ size_t cur_chunk_buf_filled; - /* List of streams that currently have chunks being compressed. */ - struct list_head pending_streams; + /* List of blobs that currently have chunks being compressed. */ + struct list_head blobs_being_compressed; - /* List of streams in the solid resource. Streams are moved here after - * @pending_streams only when writing a solid resource. */ - struct list_head solid_streams; + /* List of blobs in the solid resource. Blobs are moved here after + * @blobs_being_compressed only when writing a solid resource. */ + struct list_head blobs_in_solid_resource; - /* Current uncompressed offset in the stream being read. */ - u64 cur_read_stream_offset; + /* Current uncompressed offset in the blob being read. */ + u64 cur_read_blob_offset; - /* Uncompressed size of the stream currently being read. */ - u64 cur_read_stream_size; + /* Uncompressed size of the blob currently being read. */ + u64 cur_read_blob_size; - /* Current uncompressed offset in the stream being written. */ - u64 cur_write_stream_offset; + /* Current uncompressed offset in the blob being written. */ + u64 cur_write_blob_offset; /* Uncompressed size of resource currently being written. */ u64 cur_write_res_size; @@ -424,7 +420,7 @@ struct write_streams_ctx { /* Reserve space for the chunk table and prepare to accumulate the chunk table * in memory. */ static int -begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) +begin_chunk_table(struct write_blobs_ctx *ctx, u64 res_expected_size) { u64 expected_num_chunks; u64 expected_num_chunk_entries; @@ -433,8 +429,8 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) /* Calculate the number of chunks and chunk entries that should be * needed for the resource. These normally will be the final values, - * but in SOLID mode some of the streams we're planning to write into - * the resource may be duplicates, and therefore discarded, potentially + * but in SOLID mode some of the blobs we're planning to write into the + * resource may be duplicates, and therefore discarded, potentially * decreasing the number of chunk entries needed. */ expected_num_chunks = DIV_ROUND_UP(res_expected_size, ctx->out_chunk_size); expected_num_chunk_entries = expected_num_chunks; @@ -485,7 +481,7 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) } static int -begin_write_resource(struct write_streams_ctx *ctx, u64 res_expected_size) +begin_write_resource(struct write_blobs_ctx *ctx, u64 res_expected_size) { int ret; @@ -500,13 +496,13 @@ begin_write_resource(struct write_streams_ctx *ctx, u64 res_expected_size) /* Output file descriptor is now positioned at the offset at which to * write the first chunk of the resource. */ ctx->chunks_start_offset = ctx->out_fd->offset; - ctx->cur_write_stream_offset = 0; + ctx->cur_write_blob_offset = 0; ctx->cur_write_res_size = res_expected_size; return 0; } static int -end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, +end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size, u64 *res_start_offset_ret, u64 *res_store_size_ret) { size_t actual_num_chunks; @@ -612,14 +608,14 @@ write_error: /* Finish writing a WIM resource by writing or updating the chunk table (if not * writing the data uncompressed) and loading its metadata into @out_reshdr. */ static int -end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr) +end_write_resource(struct write_blobs_ctx *ctx, struct wim_reshdr *out_reshdr) { int ret; u64 res_size_in_wim; u64 res_uncompressed_size; u64 res_offset_in_wim; - wimlib_assert(ctx->cur_write_stream_offset == ctx->cur_write_res_size || + wimlib_assert(ctx->cur_write_blob_offset == ctx->cur_write_res_size || (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)); res_uncompressed_size = ctx->cur_write_res_size; @@ -640,7 +636,7 @@ end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr) return 0; } -/* No more data streams of the file at @path are needed. */ +/* Call when no more data from the file at @path is needed. */ static int done_with_file(const tchar *path, wimlib_progress_func_t progfunc, void *progctx) { @@ -652,28 +648,17 @@ done_with_file(const tchar *path, wimlib_progress_func_t progfunc, void *progctx &info, progctx); } -static inline bool -is_file_stream(const struct wim_lookup_table_entry *lte) -{ - return lte->resource_location == RESOURCE_IN_FILE_ON_DISK -#ifdef __WIN32__ - || lte->resource_location == RESOURCE_IN_WINNT_FILE_ON_DISK - || lte->resource_location == RESOURCE_WIN32_ENCRYPTED -#endif - ; -} - static int -do_done_with_stream(struct wim_lookup_table_entry *lte, - wimlib_progress_func_t progfunc, void *progctx) +do_done_with_blob(struct blob_descriptor *blob, + wimlib_progress_func_t progfunc, void *progctx) { int ret; struct wim_inode *inode; - if (!lte->may_send_done_with_file) + if (!blob->may_send_done_with_file) return 0; - inode = lte->file_inode; + inode = blob->file_inode; wimlib_assert(inode != NULL); wimlib_assert(inode->num_remaining_streams > 0); @@ -683,14 +668,14 @@ do_done_with_stream(struct wim_lookup_table_entry *lte, #ifdef __WIN32__ /* XXX: This logic really should be somewhere else. */ - /* We want the path to the file, but lte->file_on_disk might actually + /* We want the path to the file, but blob->file_on_disk might actually * refer to a named data stream. Temporarily strip the named data * stream from the path. */ wchar_t *p_colon = NULL; wchar_t *p_question_mark = NULL; const wchar_t *p_stream_name; - p_stream_name = path_stream_name(lte->file_on_disk); + p_stream_name = path_stream_name(blob->file_on_disk); if (unlikely(p_stream_name)) { p_colon = (wchar_t *)(p_stream_name - 1); wimlib_assert(*p_colon == L':'); @@ -698,13 +683,13 @@ do_done_with_stream(struct wim_lookup_table_entry *lte, } /* We also should use a fake Win32 path instead of a NT path */ - if (!wcsncmp(lte->file_on_disk, L"\\??\\", 4)) { - p_question_mark = <e->file_on_disk[1]; + if (!wcsncmp(blob->file_on_disk, L"\\??\\", 4)) { + p_question_mark = &blob->file_on_disk[1]; *p_question_mark = L'\\'; } #endif - ret = done_with_file(lte->file_on_disk, progfunc, progctx); + ret = done_with_file(blob->file_on_disk, progfunc, progctx); #ifdef __WIN32__ if (p_colon) @@ -717,115 +702,111 @@ do_done_with_stream(struct wim_lookup_table_entry *lte, /* Handle WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES mode. */ static inline int -done_with_stream(struct wim_lookup_table_entry *lte, - struct write_streams_ctx *ctx) +done_with_blob(struct blob_descriptor *blob, struct write_blobs_ctx *ctx) { if (likely(!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE))) return 0; - return do_done_with_stream(lte, ctx->progress_data.progfunc, - ctx->progress_data.progctx); + return do_done_with_blob(blob, ctx->progress_data.progfunc, + ctx->progress_data.progctx); } -/* Begin processing a stream for writing. */ +/* Begin processing a blob for writing. */ static int -write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx) +write_blob_begin_read(struct blob_descriptor *blob, void *_ctx) { - struct write_streams_ctx *ctx = _ctx; + struct write_blobs_ctx *ctx = _ctx; int ret; - wimlib_assert(lte->size > 0); + wimlib_assert(blob->size > 0); - ctx->cur_read_stream_offset = 0; - ctx->cur_read_stream_size = lte->size; + ctx->cur_read_blob_offset = 0; + ctx->cur_read_blob_size = blob->size; - /* As an optimization, we allow some streams to be "unhashed", meaning - * their SHA1 message digests are unknown. This is the case with - * streams that are added by scanning a directry tree with - * wimlib_add_image(), for example. Since WIM uses single-instance - * streams, we don't know whether such each such stream really need to - * written until it is actually checksummed, unless it has a unique - * size. In such cases we read and checksum the stream in this - * function, thereby advancing ahead of read_stream_list(), which will - * still provide the data again to write_stream_process_chunk(). This - * is okay because an unhashed stream cannot be in a WIM resource, which - * might be costly to decompress. */ - if (ctx->lookup_table != NULL && lte->unhashed && !lte->unique_size) { + /* As an optimization, we allow some blobs to be "unhashed", meaning + * their SHA-1 message digests are unknown. This is the case with blobs + * that are added by scanning a directory tree with wimlib_add_image(), + * for example. Since WIM uses single-instance blobs, we don't know + * whether such each such blob really need to written until it is + * actually checksummed, unless it has a unique size. In such cases we + * read and checksum the blob in this function, thereby advancing ahead + * of read_blob_list(), which will still provide the data again to + * write_blob_process_chunk(). This is okay because an unhashed blob + * cannot be in a WIM resource, which might be costly to decompress. */ + if (ctx->blob_table != NULL && blob->unhashed && !blob->unique_size) { - struct wim_lookup_table_entry *lte_new; + struct blob_descriptor *new_blob; - ret = hash_unhashed_stream(lte, ctx->lookup_table, <e_new); + ret = hash_unhashed_blob(blob, ctx->blob_table, &new_blob); if (ret) return ret; - if (lte_new != lte) { - /* Duplicate stream detected. */ + if (new_blob != blob) { + /* Duplicate blob detected. */ - if (lte_new->will_be_in_output_wim || - stream_filtered(lte_new, ctx->filter_ctx)) + if (new_blob->will_be_in_output_wim || + blob_filtered(new_blob, ctx->filter_ctx)) { - /* The duplicate stream is already being - * included in the output WIM, or it would be - * filtered out if it had been. Skip writing - * this stream (and reading it again) entirely, - * passing its output reference count to the - * duplicate stream in the former case. */ - DEBUG("Discarding duplicate stream of " - "length %"PRIu64, lte->size); - ret = do_write_streams_progress(&ctx->progress_data, - lte->size, - 1, true); - list_del(<e->write_streams_list); - list_del(<e->lookup_table_list); - if (lte_new->will_be_in_output_wim) - lte_new->out_refcnt += lte->out_refcnt; + /* The duplicate blob is already being included + * in the output WIM, or it would be filtered + * out if it had been. Skip writing this blob + * (and reading it again) entirely, passing its + * output reference count to the duplicate blob + * in the former case. */ + DEBUG("Discarding duplicate blob of " + "length %"PRIu64, blob->size); + ret = do_write_blobs_progress(&ctx->progress_data, + blob->size, 1, true); + list_del(&blob->write_blobs_list); + list_del(&blob->blob_table_list); + if (new_blob->will_be_in_output_wim) + new_blob->out_refcnt += blob->out_refcnt; if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) - ctx->cur_write_res_size -= lte->size; + ctx->cur_write_res_size -= blob->size; if (!ret) - ret = done_with_stream(lte, ctx); - free_lookup_table_entry(lte); + ret = done_with_blob(blob, ctx); + free_blob_descriptor(blob); if (ret) return ret; - return BEGIN_STREAM_STATUS_SKIP_STREAM; + return BEGIN_BLOB_STATUS_SKIP_BLOB; } else { - /* The duplicate stream can validly be written, + /* The duplicate blob can validly be written, * but was not marked as such. Discard the - * current stream entry and use the duplicate, - * but actually freeing the current entry must - * wait until read_stream_list() has finished - * reading its data. */ - DEBUG("Stream duplicate, but not already " + * current blob descriptor and use the + * duplicate, but actually freeing the current + * blob descriptor must wait until + * read_blob_list() has finished reading its + * data. */ + DEBUG("Blob duplicate, but not already " "selected for writing."); - list_replace(<e->write_streams_list, - <e_new->write_streams_list); - list_replace(<e->lookup_table_list, - <e_new->lookup_table_list); - lte->will_be_in_output_wim = 0; - lte_new->out_refcnt = lte->out_refcnt; - lte_new->will_be_in_output_wim = 1; - lte_new->may_send_done_with_file = 0; - lte = lte_new; + list_replace(&blob->write_blobs_list, + &new_blob->write_blobs_list); + list_replace(&blob->blob_table_list, + &new_blob->blob_table_list); + blob->will_be_in_output_wim = 0; + new_blob->out_refcnt = blob->out_refcnt; + new_blob->will_be_in_output_wim = 1; + new_blob->may_send_done_with_file = 0; + blob = new_blob; } } } - list_move_tail(<e->write_streams_list, &ctx->pending_streams); + list_move_tail(&blob->write_blobs_list, &ctx->blobs_being_compressed); return 0; } -/* Rewrite a stream that was just written compressed as uncompressed instead. - * This function is optional, but if a stream did not compress to less than its - * original size, it might as well be written uncompressed. */ +/* Rewrite a blob that was just written compressed as uncompressed instead. + */ static int -write_stream_uncompressed(struct wim_lookup_table_entry *lte, - struct filedes *out_fd) +write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd) { int ret; - u64 begin_offset = lte->out_reshdr.offset_in_wim; + u64 begin_offset = blob->out_reshdr.offset_in_wim; u64 end_offset = out_fd->offset; if (filedes_seek(out_fd, begin_offset) == -1) return 0; - ret = extract_full_stream_to_fd(lte, out_fd); + ret = extract_full_blob_to_fd(blob, out_fd); if (ret) { /* Error reading the uncompressed data. */ if (out_fd->offset == begin_offset && @@ -835,15 +816,14 @@ write_stream_uncompressed(struct wim_lookup_table_entry *lte, * seeked to the end of the compressed resource, so * don't issue a hard error; just keep the compressed * resource instead. */ - WARNING("Recovered compressed stream of " - "size %"PRIu64", continuing on.", - lte->size); + WARNING("Recovered compressed blob of " + "size %"PRIu64", continuing on.", blob->size); return 0; } return ret; } - wimlib_assert(out_fd->offset - begin_offset == lte->size); + wimlib_assert(out_fd->offset - begin_offset == blob->size); if (out_fd->offset < end_offset && 0 != ftruncate(out_fd->fd, out_fd->offset)) @@ -853,22 +833,22 @@ write_stream_uncompressed(struct wim_lookup_table_entry *lte, return WIMLIB_ERR_WRITE; } - lte->out_reshdr.size_in_wim = lte->size; - lte->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED | - WIM_RESHDR_FLAG_SOLID); + blob->out_reshdr.size_in_wim = blob->size; + blob->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED | + WIM_RESHDR_FLAG_SOLID); return 0; } -/* Returns true if the specified stream should be truncated from the WIM file - * and re-written as uncompressed. lte->out_reshdr must be filled in from the - * initial write of the stream. */ +/* Returns true if the specified blob, which was written as a non-solid + * resource, should be truncated from the WIM file and re-written uncompressed. + * blob->out_reshdr must be filled in from the initial write of the blob. */ static bool -should_rewrite_stream_uncompressed(const struct write_streams_ctx *ctx, - const struct wim_lookup_table_entry *lte) +should_rewrite_blob_uncompressed(const struct write_blobs_ctx *ctx, + const struct blob_descriptor *blob) { /* If the compressed data is smaller than the uncompressed data, prefer * the compressed data. */ - if (lte->out_reshdr.size_in_wim < lte->out_reshdr.uncompressed_size) + if (blob->out_reshdr.size_in_wim < blob->out_reshdr.uncompressed_size) return false; /* If we're not actually writing compressed data, then there's no need @@ -881,34 +861,34 @@ should_rewrite_stream_uncompressed(const struct write_streams_ctx *ctx, if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) return false; - /* If the stream that would need to be re-read is located in a solid + /* If the blob that would need to be re-read is located in a solid * resource in another WIM file, then re-reading it would be costly. So * don't do it. * * Exception: if the compressed size happens to be *exactly* the same as - * the uncompressed size, then the stream *must* be written uncompressed + * the uncompressed size, then the blob *must* be written uncompressed * in order to remain compatible with the Windows Overlay Filesystem * Filter Driver (WOF). * * TODO: we are currently assuming that the optimization for - * single-chunk resources in maybe_rewrite_stream_uncompressed() - * prevents this case from being triggered too often. To fully prevent - * excessive decompressions in degenerate cases, we really should - * obtain the uncompressed data by decompressing the compressed data we - * wrote to the output file. + * single-chunk resources in maybe_rewrite_blob_uncompressed() prevents + * this case from being triggered too often. To fully prevent excessive + * decompressions in degenerate cases, we really should obtain the + * uncompressed data by decompressing the compressed data we wrote to + * the output file. */ - if ((lte->flags & WIM_RESHDR_FLAG_SOLID) && - (lte->out_reshdr.size_in_wim != lte->out_reshdr.uncompressed_size)) + if ((blob->flags & WIM_RESHDR_FLAG_SOLID) && + (blob->out_reshdr.size_in_wim != blob->out_reshdr.uncompressed_size)) return false; return true; } static int -maybe_rewrite_stream_uncompressed(struct write_streams_ctx *ctx, - struct wim_lookup_table_entry *lte) +maybe_rewrite_blob_uncompressed(struct write_blobs_ctx *ctx, + struct blob_descriptor *blob) { - if (!should_rewrite_stream_uncompressed(ctx, lte)) + if (!should_rewrite_blob_uncompressed(ctx, blob)) return 0; /* Regular (non-solid) WIM resources with exactly one chunk and @@ -917,50 +897,49 @@ maybe_rewrite_stream_uncompressed(struct write_streams_ctx *ctx, * in the chunk table and the only chunk must be stored uncompressed. * In this case, there's no need to rewrite anything. */ if (ctx->chunk_index == 1 && - lte->out_reshdr.size_in_wim == lte->out_reshdr.uncompressed_size) + blob->out_reshdr.size_in_wim == blob->out_reshdr.uncompressed_size) { - lte->out_reshdr.flags &= ~WIM_RESHDR_FLAG_COMPRESSED; + blob->out_reshdr.flags &= ~WIM_RESHDR_FLAG_COMPRESSED; return 0; } - return write_stream_uncompressed(lte, ctx->out_fd); + return write_blob_uncompressed(blob, ctx->out_fd); } /* Write the next chunk of (typically compressed) data to the output WIM, * handling the writing of the chunk table. */ static int -write_chunk(struct write_streams_ctx *ctx, const void *cchunk, +write_chunk(struct write_blobs_ctx *ctx, const void *cchunk, size_t csize, size_t usize) { int ret; - - struct wim_lookup_table_entry *lte; - u32 completed_stream_count; + struct blob_descriptor *blob; + u32 completed_blob_count; u32 completed_size; - lte = list_entry(ctx->pending_streams.next, - struct wim_lookup_table_entry, write_streams_list); + blob = list_entry(ctx->blobs_being_compressed.next, + struct blob_descriptor, write_blobs_list); - if (ctx->cur_write_stream_offset == 0 && + if (ctx->cur_write_blob_offset == 0 && !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)) { - /* Starting to write a new stream in non-solid mode. */ + /* Starting to write a new blob in non-solid mode. */ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { int additional_reshdr_flags = 0; if (ctx->compressor != NULL) additional_reshdr_flags |= WIM_RESHDR_FLAG_COMPRESSED; - DEBUG("Writing pipable WIM stream header " + DEBUG("Writing pipable WIM blob header " "(offset=%"PRIu64")", ctx->out_fd->offset); - ret = write_pwm_stream_header(lte, ctx->out_fd, - additional_reshdr_flags); + ret = write_pwm_blob_header(blob, ctx->out_fd, + additional_reshdr_flags); if (ret) return ret; } - ret = begin_write_resource(ctx, lte->size); + ret = begin_write_resource(ctx, blob->size); if (ret) return ret; } @@ -988,69 +967,68 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, if (ret) goto write_error; - ctx->cur_write_stream_offset += usize; + ctx->cur_write_blob_offset += usize; completed_size = usize; - completed_stream_count = 0; + completed_blob_count = 0; if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { /* Wrote chunk in solid mode. It may have finished multiple - * streams. */ - struct wim_lookup_table_entry *next_lte; + * blobs. */ + struct blob_descriptor *next_blob; - while (lte && ctx->cur_write_stream_offset >= lte->size) { + while (blob && ctx->cur_write_blob_offset >= blob->size) { - ctx->cur_write_stream_offset -= lte->size; + ctx->cur_write_blob_offset -= blob->size; - if (ctx->cur_write_stream_offset) - next_lte = list_entry(lte->write_streams_list.next, - struct wim_lookup_table_entry, - write_streams_list); + if (ctx->cur_write_blob_offset) + next_blob = list_entry(blob->write_blobs_list.next, + struct blob_descriptor, + write_blobs_list); else - next_lte = NULL; + next_blob = NULL; - ret = done_with_stream(lte, ctx); + ret = done_with_blob(blob, ctx); if (ret) return ret; - list_move_tail(<e->write_streams_list, &ctx->solid_streams); - completed_stream_count++; + list_move_tail(&blob->write_blobs_list, &ctx->blobs_in_solid_resource); + completed_blob_count++; - lte = next_lte; + blob = next_blob; } } else { /* Wrote chunk in non-solid mode. It may have finished a - * stream. */ - if (ctx->cur_write_stream_offset == lte->size) { + * blob. */ + if (ctx->cur_write_blob_offset == blob->size) { - wimlib_assert(ctx->cur_write_stream_offset == + wimlib_assert(ctx->cur_write_blob_offset == ctx->cur_write_res_size); - ret = end_write_resource(ctx, <e->out_reshdr); + ret = end_write_resource(ctx, &blob->out_reshdr); if (ret) return ret; - lte->out_reshdr.flags = filter_resource_flags(lte->flags); + blob->out_reshdr.flags = filter_resource_flags(blob->flags); if (ctx->compressor != NULL) - lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED; + blob->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED; - ret = maybe_rewrite_stream_uncompressed(ctx, lte); + ret = maybe_rewrite_blob_uncompressed(ctx, blob); if (ret) return ret; - wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size); + wimlib_assert(blob->out_reshdr.uncompressed_size == blob->size); - ctx->cur_write_stream_offset = 0; + ctx->cur_write_blob_offset = 0; - ret = done_with_stream(lte, ctx); + ret = done_with_blob(blob, ctx); if (ret) return ret; - list_del(<e->write_streams_list); - completed_stream_count++; + list_del(&blob->write_blobs_list); + completed_blob_count++; } } - return do_write_streams_progress(&ctx->progress_data, - completed_size, completed_stream_count, - false); + return do_write_blobs_progress(&ctx->progress_data, completed_size, + completed_blob_count, false); write_error: ERROR_WITH_ERRNO("Write error"); @@ -1058,7 +1036,7 @@ write_error: } static int -prepare_chunk_buffer(struct write_streams_ctx *ctx) +prepare_chunk_buffer(struct write_blobs_ctx *ctx) { /* While we are unable to get a new chunk buffer due to too many chunks * already outstanding, retrieve and write the next compressed chunk. */ @@ -1086,9 +1064,9 @@ prepare_chunk_buffer(struct write_streams_ctx *ctx) /* Process the next chunk of data to be written to a WIM resource. */ static int -write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) +write_blob_process_chunk(const void *chunk, size_t size, void *_ctx) { - struct write_streams_ctx *ctx = _ctx; + struct write_blobs_ctx *ctx = _ctx; int ret; const u8 *chunkptr, *chunkend; @@ -1099,7 +1077,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) ret = write_chunk(ctx, chunk, size, size); if (ret) return ret; - ctx->cur_read_stream_offset += size; + ctx->cur_read_blob_offset += size; return 0; } @@ -1123,8 +1101,8 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) } else { needed_chunk_size = min(ctx->out_chunk_size, ctx->cur_chunk_buf_filled + - (ctx->cur_read_stream_size - - ctx->cur_read_stream_offset)); + (ctx->cur_read_blob_size - + ctx->cur_read_blob_offset)); } bytes_consumed = min(chunkend - chunkptr, @@ -1134,7 +1112,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) chunkptr, bytes_consumed); chunkptr += bytes_consumed; - ctx->cur_read_stream_offset += bytes_consumed; + ctx->cur_read_blob_offset += bytes_consumed; ctx->cur_chunk_buf_filled += bytes_consumed; if (ctx->cur_chunk_buf_filled == needed_chunk_size) { @@ -1147,67 +1125,67 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) return 0; } -/* Finish processing a stream for writing. It may not have been completely +/* Finish processing a blob for writing. It may not have been completely * written yet, as the chunk_compressor implementation may still have chunks * buffered or being compressed. */ static int -write_stream_end_read(struct wim_lookup_table_entry *lte, int status, void *_ctx) +write_blob_end_read(struct blob_descriptor *blob, int status, void *_ctx) { - struct write_streams_ctx *ctx = _ctx; + struct write_blobs_ctx *ctx = _ctx; - wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size || status); + wimlib_assert(ctx->cur_read_blob_offset == ctx->cur_read_blob_size || status); - if (!lte->will_be_in_output_wim) { - /* The 'lte' stream was a duplicate. Now that its data has - * finished being read, it is being discarded in favor of the - * duplicate entry. It therefore is no longer needed, and we - * can fire the DONE_WITH_FILE callback because the file will - * not be read again. + if (!blob->will_be_in_output_wim) { + /* The blob was a duplicate. Now that its data has finished + * being read, it is being discarded in favor of the duplicate + * entry. It therefore is no longer needed, and we can fire the + * DONE_WITH_FILE callback because the file will not be read + * again. * * Note: we can't yet fire DONE_WITH_FILE for non-duplicate - * streams, since it needs to be possible to re-read the file if + * blobs, since it needs to be possible to re-read the file if * it does not compress to less than its original size. */ if (!status) - status = done_with_stream(lte, ctx); - free_lookup_table_entry(lte); - } else if (!status && lte->unhashed && ctx->lookup_table != NULL) { - /* The 'lte' stream was not a duplicate and was previously - * unhashed. Since we passed COMPUTE_MISSING_STREAM_HASHES to - * read_stream_list(), lte->hash is now computed and valid. So - * turn this stream into a "hashed" stream. */ - list_del(<e->unhashed_list); - lookup_table_insert(ctx->lookup_table, lte); - lte->unhashed = 0; + status = done_with_blob(blob, ctx); + free_blob_descriptor(blob); + } else if (!status && blob->unhashed && ctx->blob_table != NULL) { + /* The blob was not a duplicate and was previously unhashed. + * Since we passed COMPUTE_MISSING_BLOB_HASHES to + * read_blob_list(), blob->hash is now computed and valid. So + * turn this blob into a "hashed" blob. */ + list_del(&blob->unhashed_list); + blob_table_insert(ctx->blob_table, blob); + blob->unhashed = 0; } return status; } -/* Compute statistics about a list of streams that will be written. +/* Compute statistics about a list of blobs that will be written. * - * Assumes the streams are sorted such that all streams located in each distinct - * WIM (specified by WIMStruct) are together. */ + * Assumes the blobs are sorted such that all blobs located in each distinct WIM + * (specified by WIMStruct) are together. */ static void -compute_stream_list_stats(struct list_head *stream_list, - struct write_streams_ctx *ctx) +compute_blob_list_stats(struct list_head *blob_list, + struct write_blobs_ctx *ctx) { - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; u64 total_bytes = 0; - u64 num_streams = 0; + u64 num_blobs = 0; u64 total_parts = 0; WIMStruct *prev_wim_part = NULL; - list_for_each_entry(lte, stream_list, write_streams_list) { - num_streams++; - total_bytes += lte->size; - if (lte->resource_location == RESOURCE_IN_WIM) { - if (prev_wim_part != lte->rspec->wim) { - prev_wim_part = lte->rspec->wim; + list_for_each_entry(blob, blob_list, write_blobs_list) { + num_blobs++; + total_bytes += blob->size; + if (blob->blob_location == BLOB_IN_WIM) { + if (prev_wim_part != blob->rdesc->wim) { + prev_wim_part = blob->rdesc->wim; total_parts++; } } } ctx->progress_data.progress.write_streams.total_bytes = total_bytes; - ctx->progress_data.progress.write_streams.total_streams = num_streams; + ctx->progress_data.progress.write_streams.total_streams = num_blobs; ctx->progress_data.progress.write_streams.completed_bytes = 0; ctx->progress_data.progress.write_streams.completed_streams = 0; ctx->progress_data.progress.write_streams.compression_type = ctx->out_ctype; @@ -1216,41 +1194,41 @@ compute_stream_list_stats(struct list_head *stream_list, ctx->progress_data.next_progress = 0; } -/* Find streams in @stream_list that can be copied to the output WIM in raw form - * rather than compressed. Delete these streams from @stream_list and move them - * to @raw_copy_streams. Return the total uncompressed size of the streams that - * need to be compressed. */ +/* Find blobs in @blob_list that can be copied to the output WIM in raw form + * rather than compressed. Delete these blobs from @blob_list and move them to + * @raw_copy_blobs. Return the total uncompressed size of the blobs that need + * to be compressed. */ static u64 -find_raw_copy_streams(struct list_head *stream_list, - int write_resource_flags, - int out_ctype, - u32 out_chunk_size, - struct list_head *raw_copy_streams) +find_raw_copy_blobs(struct list_head *blob_list, + int write_resource_flags, + int out_ctype, + u32 out_chunk_size, + struct list_head *raw_copy_blobs) { - struct wim_lookup_table_entry *lte, *tmp; + struct blob_descriptor *blob, *tmp; u64 num_bytes_to_compress = 0; - INIT_LIST_HEAD(raw_copy_streams); + INIT_LIST_HEAD(raw_copy_blobs); /* Initialize temporary raw_copy_ok flag. */ - list_for_each_entry(lte, stream_list, write_streams_list) - if (lte->resource_location == RESOURCE_IN_WIM) - lte->rspec->raw_copy_ok = 0; + list_for_each_entry(blob, blob_list, write_blobs_list) + if (blob->blob_location == BLOB_IN_WIM) + blob->rdesc->raw_copy_ok = 0; - list_for_each_entry_safe(lte, tmp, stream_list, write_streams_list) { - if (lte->resource_location == RESOURCE_IN_WIM && - lte->rspec->raw_copy_ok) + list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) { + if (blob->blob_location == BLOB_IN_WIM && + blob->rdesc->raw_copy_ok) { - list_move_tail(<e->write_streams_list, - raw_copy_streams); - } else if (can_raw_copy(lte, write_resource_flags, - out_ctype, out_chunk_size)) + list_move_tail(&blob->write_blobs_list, + raw_copy_blobs); + } else if (can_raw_copy(blob, write_resource_flags, + out_ctype, out_chunk_size)) { - lte->rspec->raw_copy_ok = 1; - list_move_tail(<e->write_streams_list, - raw_copy_streams); + blob->rdesc->raw_copy_ok = 1; + list_move_tail(&blob->write_blobs_list, + raw_copy_blobs); } else { - num_bytes_to_compress += lte->size; + num_bytes_to_compress += blob->size; } } @@ -1260,7 +1238,7 @@ find_raw_copy_streams(struct list_head *stream_list, /* Copy a raw compressed resource located in another WIM file to the WIM file * being written. */ static int -write_raw_copy_resource(struct wim_resource_spec *in_rspec, +write_raw_copy_resource(struct wim_resource_descriptor *in_rdesc, struct filedes *out_fd) { u64 cur_read_offset; @@ -1269,26 +1247,26 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, size_t bytes_to_read; int ret; struct filedes *in_fd; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; u64 out_offset_in_wim; DEBUG("Copying raw compressed data (size_in_wim=%"PRIu64", " "uncompressed_size=%"PRIu64")", - in_rspec->size_in_wim, in_rspec->uncompressed_size); + in_rdesc->size_in_wim, in_rdesc->uncompressed_size); /* Copy the raw data. */ - cur_read_offset = in_rspec->offset_in_wim; - end_read_offset = cur_read_offset + in_rspec->size_in_wim; + cur_read_offset = in_rdesc->offset_in_wim; + end_read_offset = cur_read_offset + in_rdesc->size_in_wim; out_offset_in_wim = out_fd->offset; - if (in_rspec->is_pipable) { - if (cur_read_offset < sizeof(struct pwm_stream_hdr)) + if (in_rdesc->is_pipable) { + if (cur_read_offset < sizeof(struct pwm_blob_hdr)) return WIMLIB_ERR_INVALID_PIPABLE_WIM; - cur_read_offset -= sizeof(struct pwm_stream_hdr); - out_offset_in_wim += sizeof(struct pwm_stream_hdr); + cur_read_offset -= sizeof(struct pwm_blob_hdr); + out_offset_in_wim += sizeof(struct pwm_blob_hdr); } - in_fd = &in_rspec->wim->in_fd; + in_fd = &in_rdesc->wim->in_fd; wimlib_assert(cur_read_offset != end_read_offset); do { @@ -1306,13 +1284,13 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, } while (cur_read_offset != end_read_offset); - list_for_each_entry(lte, &in_rspec->stream_list, rspec_node) { - if (lte->will_be_in_output_wim) { - stream_set_out_reshdr_for_reuse(lte); - if (in_rspec->flags & WIM_RESHDR_FLAG_SOLID) - lte->out_res_offset_in_wim = out_offset_in_wim; + list_for_each_entry(blob, &in_rdesc->blob_list, rdesc_node) { + if (blob->will_be_in_output_wim) { + blob_set_out_reshdr_for_reuse(blob); + if (in_rdesc->flags & WIM_RESHDR_FLAG_SOLID) + blob->out_res_offset_in_wim = out_offset_in_wim; else - lte->out_reshdr.offset_in_wim = out_offset_in_wim; + blob->out_reshdr.offset_in_wim = out_offset_in_wim; } } @@ -1322,27 +1300,26 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, /* Copy a list of raw compressed resources located in other WIM file(s) to the * WIM file being written. */ static int -write_raw_copy_resources(struct list_head *raw_copy_streams, +write_raw_copy_resources(struct list_head *raw_copy_blobs, struct filedes *out_fd, - struct write_streams_progress_data *progress_data) + struct write_blobs_progress_data *progress_data) { - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; int ret; - list_for_each_entry(lte, raw_copy_streams, write_streams_list) - lte->rspec->raw_copy_ok = 1; + list_for_each_entry(blob, raw_copy_blobs, write_blobs_list) + blob->rdesc->raw_copy_ok = 1; - list_for_each_entry(lte, raw_copy_streams, write_streams_list) { - if (lte->rspec->raw_copy_ok) { - /* Write each solid resource only one time, no matter - * how many streams reference it. */ - ret = write_raw_copy_resource(lte->rspec, out_fd); + list_for_each_entry(blob, raw_copy_blobs, write_blobs_list) { + if (blob->rdesc->raw_copy_ok) { + /* Write each solid resource only one time. */ + ret = write_raw_copy_resource(blob->rdesc, out_fd); if (ret) return ret; - lte->rspec->raw_copy_ok = 0; + blob->rdesc->raw_copy_ok = 0; } - ret = do_write_streams_progress(progress_data, lte->size, - 1, false); + ret = do_write_blobs_progress(progress_data, blob->size, + 1, false); if (ret) return ret; } @@ -1351,7 +1328,7 @@ write_raw_copy_resources(struct list_head *raw_copy_streams, /* Wait for and write all chunks pending in the compressor. */ static int -finish_remaining_chunks(struct write_streams_ctx *ctx) +finish_remaining_chunks(struct write_blobs_ctx *ctx) { const void *cdata; u32 csize; @@ -1377,53 +1354,53 @@ finish_remaining_chunks(struct write_streams_ctx *ctx) } static void -remove_zero_length_streams(struct list_head *stream_list) -{ - struct wim_lookup_table_entry *lte, *tmp; - - list_for_each_entry_safe(lte, tmp, stream_list, write_streams_list) { - wimlib_assert(lte->will_be_in_output_wim); - if (lte->size == 0) { - list_del(<e->write_streams_list); - lte->out_reshdr.offset_in_wim = 0; - lte->out_reshdr.size_in_wim = 0; - lte->out_reshdr.uncompressed_size = 0; - lte->out_reshdr.flags = filter_resource_flags(lte->flags); +remove_empty_blobs(struct list_head *blob_list) +{ + struct blob_descriptor *blob, *tmp; + + list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) { + wimlib_assert(blob->will_be_in_output_wim); + if (blob->size == 0) { + list_del(&blob->write_blobs_list); + blob->out_reshdr.offset_in_wim = 0; + blob->out_reshdr.size_in_wim = 0; + blob->out_reshdr.uncompressed_size = 0; + blob->out_reshdr.flags = filter_resource_flags(blob->flags); } } } static void -init_done_with_file_info(struct list_head *stream_list) +init_done_with_file_info(struct list_head *blob_list) { - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; - list_for_each_entry(lte, stream_list, write_streams_list) { - if (is_file_stream(lte)) { - lte->file_inode->num_remaining_streams = 0; - lte->may_send_done_with_file = 1; + list_for_each_entry(blob, blob_list, write_blobs_list) { + if (blob_is_in_file(blob)) { + blob->file_inode->num_remaining_streams = 0; + blob->may_send_done_with_file = 1; } else { - lte->may_send_done_with_file = 0; + blob->may_send_done_with_file = 0; } } - list_for_each_entry(lte, stream_list, write_streams_list) - if (lte->may_send_done_with_file) - lte->file_inode->num_remaining_streams++; + list_for_each_entry(blob, blob_list, write_blobs_list) + if (blob->may_send_done_with_file) + blob->file_inode->num_remaining_streams++; } /* - * Write a list of streams to the output WIM file. + * Write a list of blobs to the output WIM file. * - * @stream_list - * The list of streams to write, specified by a list of `struct - * wim_lookup_table_entry's linked by the 'write_streams_list' member. + * @blob_list + * The list of blobs to write, specified by a list of 'struct blob_descriptor' linked + * by the 'write_blobs_list' member. * * @out_fd - * The file descriptor, opened for writing, to which to write the streams. + * The file descriptor, opened for writing, to which to write the blobs. * * @write_resource_flags - * Flags to modify how the streams are written: + * Flags to modify how the blobs are written: * * WRITE_RESOURCE_FLAG_RECOMPRESS: * Force compression of all resources, even if they could otherwise @@ -1436,103 +1413,100 @@ init_done_with_file_info(struct list_head *stream_list) * @out_fd will be performed (so it may be a pipe). * * WRITE_RESOURCE_FLAG_SOLID: - * Combine all the streams into a single resource rather than - * writing them in separate resources. This flag is only valid if - * the WIM version number has been, or will be, set to - * WIM_VERSION_SOLID. This flag may not be combined with - * WRITE_RESOURCE_FLAG_PIPABLE. + * Combine all the blobs into a single resource rather than writing + * them in separate resources. This flag is only valid if the WIM + * version number has been, or will be, set to WIM_VERSION_SOLID. + * This flag may not be combined with WRITE_RESOURCE_FLAG_PIPABLE. * * @out_ctype - * Compression format to use to write the output streams, specified as one - * of the WIMLIB_COMPRESSION_TYPE_* constants. - * WIMLIB_COMPRESSION_TYPE_NONE is allowed. + * Compression format to use in the output resources, specified as one of + * the WIMLIB_COMPRESSION_TYPE_* constants. WIMLIB_COMPRESSION_TYPE_NONE + * is allowed. * * @out_chunk_size - * Chunk size to use to write the streams. It must be a valid chunk size - * for the specified compression format @out_ctype, unless @out_ctype is - * WIMLIB_COMPRESSION_TYPE_NONE, in which case this parameter is ignored. + * Compression chunk size to use in the output resources. It must be a + * valid chunk size for the specified compression format @out_ctype, unless + * @out_ctype is WIMLIB_COMPRESSION_TYPE_NONE, in which case this parameter + * is ignored. * * @num_threads * Number of threads to use to compress data. If 0, a default number of * threads will be chosen. The number of threads still may be decreased * from the specified value if insufficient memory is detected. * - * @lookup_table - * If on-the-fly deduplication of unhashed streams is desired, this - * parameter must be pointer to the lookup table for the WIMStruct on whose - * behalf the streams are being written. Otherwise, this parameter can be - * NULL. + * @blob_table + * If on-the-fly deduplication of unhashed blobs is desired, this parameter + * must be pointer to the blob table for the WIMStruct on whose behalf the + * blobs are being written. Otherwise, this parameter can be NULL. * * @filter_ctx - * If on-the-fly deduplication of unhashed streams is desired, this - * parameter can be a pointer to a context for stream filtering used to - * detect whether the duplicate stream has been hard-filtered or not. If - * no streams are hard-filtered or no streams are unhashed, this parameter - * can be NULL. + * If on-the-fly deduplication of unhashed blobs is desired, this parameter + * can be a pointer to a context for blob filtering used to detect whether + * the duplicate blob has been hard-filtered or not. If no blobs are + * hard-filtered or no blobs are unhashed, this parameter can be NULL. * - * This function will write the streams in @stream_list to resources in + * This function will write the blobs in @blob_list to resources in * consecutive positions in the output WIM file, or to a single solid resource * if WRITE_RESOURCE_FLAG_SOLID was specified in @write_resource_flags. In both - * cases, the @out_reshdr of the `struct wim_lookup_table_entry' for each stream - * written will be updated to specify its location, size, and flags in the - * output WIM. In the solid resource case, WIM_RESHDR_FLAG_SOLID will be set in - * the @flags field of each @out_reshdr, and furthermore @out_res_offset_in_wim - * and @out_res_size_in_wim of each @out_reshdr will be set to the offset and - * size, respectively, in the output WIM of the solid resource containing the - * corresponding stream. - * - * Each of the streams to write may be in any location supported by the - * resource-handling code (specifically, read_stream_list()), such as the - * contents of external file that has been logically added to the output WIM, or - * a stream in another WIM file that has been imported, or even a stream in the - * "same" WIM file of which a modified copy is being written. In the case that - * a stream is already in a WIM file and uses compatible compression parameters, - * by default this function will re-use the raw data instead of decompressing - * it, then recompressing it; however, with WRITE_RESOURCE_FLAG_RECOMPRESS + * cases, the @out_reshdr of the `struct blob_descriptor' for each blob written will be + * updated to specify its location, size, and flags in the output WIM. In the + * solid resource case, WIM_RESHDR_FLAG_SOLID will be set in the @flags field of + * each @out_reshdr, and furthermore @out_res_offset_in_wim and + * @out_res_size_in_wim of each @out_reshdr will be set to the offset and size, + * respectively, in the output WIM of the solid resource containing the + * corresponding blob. + * + * Each of the blobs to write may be in any location supported by the + * resource-handling code (specifically, read_blob_list()), such as the contents + * of external file that has been logically added to the output WIM, or a blob + * in another WIM file that has been imported, or even a blob in the "same" WIM + * file of which a modified copy is being written. In the case that a blob is + * already in a WIM file and uses compatible compression parameters, by default + * this function will re-use the raw data instead of decompressing it, then + * recompressing it; however, with WRITE_RESOURCE_FLAG_RECOMPRESS * specified in @write_resource_flags, this is not done. * * As a further requirement, this function requires that the - * @will_be_in_output_wim member be set to 1 on all streams in @stream_list as - * well as any other streams not in @stream_list that will be in the output WIM - * file, but set to 0 on any other streams in the output WIM's lookup table or - * sharing a solid resource with a stream in @stream_list. Still furthermore, - * if on-the-fly deduplication of streams is possible, then all streams in - * @stream_list must also be linked by @lookup_table_list along with any other - * streams that have @will_be_in_output_wim set. - * - * This function handles on-the-fly deduplication of streams for which SHA1 - * message digests have not yet been calculated. Such streams may or may not - * need to be written. If @lookup_table is non-NULL, then each stream in - * @stream_list that has @unhashed set but not @unique_size set is checksummed - * immediately before it would otherwise be read for writing in order to - * determine if it is identical to another stream already being written or one - * that would be filtered out of the output WIM using stream_filtered() with the - * context @filter_ctx. Each such duplicate stream will be removed from - * @stream_list, its reference count transfered to the pre-existing duplicate - * stream, its memory freed, and will not be written. Alternatively, if a - * stream in @stream_list is a duplicate with any stream in @lookup_table that - * has not been marked for writing or would not be hard-filtered, it is freed - * and the pre-existing duplicate is written instead, taking ownership of the - * reference count and slot in the @lookup_table_list. - * - * Returns 0 if every stream was either written successfully or did not need to - * be written; otherwise returns a non-zero error code. + * @will_be_in_output_wim member be set to 1 on all blobs in @blob_list as well + * as any other blobs not in @blob_list that will be in the output WIM file, but + * set to 0 on any other blobs in the output WIM's blob table or sharing a solid + * resource with a blob in @blob_list. Still furthermore, if on-the-fly + * deduplication of blobs is possible, then all blobs in @blob_list must also be + * linked by @blob_table_list along with any other blobs that have + * @will_be_in_output_wim set. + * + * This function handles on-the-fly deduplication of blobs for which SHA-1 + * message digests have not yet been calculated. Such blobs may or may not need + * to be written. If @blob_table is non-NULL, then each blob in @blob_list that + * has @unhashed set but not @unique_size set is checksummed immediately before + * it would otherwise be read for writing in order to determine if it is + * identical to another blob already being written or one that would be filtered + * out of the output WIM using blob_filtered() with the context @filter_ctx. + * Each such duplicate blob will be removed from @blob_list, its reference count + * transfered to the pre-existing duplicate blob, its memory freed, and will not + * be written. Alternatively, if a blob in @blob_list is a duplicate with any + * blob in @blob_table that has not been marked for writing or would not be + * hard-filtered, it is freed and the pre-existing duplicate is written instead, + * taking ownership of the reference count and slot in the @blob_table_list. + * + * Returns 0 if every blob was either written successfully or did not need to be + * written; otherwise returns a non-zero error code. */ static int -write_stream_list(struct list_head *stream_list, - struct filedes *out_fd, - int write_resource_flags, - int out_ctype, - u32 out_chunk_size, - unsigned num_threads, - struct wim_lookup_table *lookup_table, - struct filter_context *filter_ctx, - wimlib_progress_func_t progfunc, - void *progctx) +write_blob_list(struct list_head *blob_list, + struct filedes *out_fd, + int write_resource_flags, + int out_ctype, + u32 out_chunk_size, + unsigned num_threads, + struct blob_table *blob_table, + struct filter_context *filter_ctx, + wimlib_progress_func_t progfunc, + void *progctx) { int ret; - struct write_streams_ctx ctx; - struct list_head raw_copy_streams; + struct write_blobs_ctx ctx; + struct list_head raw_copy_blobs; wimlib_assert((write_resource_flags & (WRITE_RESOURCE_FLAG_SOLID | @@ -1540,64 +1514,64 @@ write_stream_list(struct list_head *stream_list, (WRITE_RESOURCE_FLAG_SOLID | WRITE_RESOURCE_FLAG_PIPABLE)); - remove_zero_length_streams(stream_list); + remove_empty_blobs(blob_list); - if (list_empty(stream_list)) { - DEBUG("No streams to write."); + if (list_empty(blob_list)) { + DEBUG("No blobs to write."); return 0; } /* If needed, set auxiliary information so that we can detect when the * library has finished using each external file. */ if (unlikely(write_resource_flags & WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)) - init_done_with_file_info(stream_list); + init_done_with_file_info(blob_list); memset(&ctx, 0, sizeof(ctx)); ctx.out_fd = out_fd; - ctx.lookup_table = lookup_table; + ctx.blob_table = blob_table; ctx.out_ctype = out_ctype; ctx.out_chunk_size = out_chunk_size; ctx.write_resource_flags = write_resource_flags; ctx.filter_ctx = filter_ctx; /* - * We normally sort the streams to write by a "sequential" order that is + * We normally sort the blobs to write by a "sequential" order that is * optimized for reading. But when using solid compression, we instead - * sort the streams by file extension and file name (when applicable; - * and we don't do this for streams from solid resources) so that - * similar files are grouped together, which improves the compression - * ratio. This is somewhat of a hack since a stream does not - * necessarily correspond one-to-one with a filename, nor is there any - * guarantee that two files with similar names or extensions are - * actually similar in content. A potential TODO is to sort the streams - * based on some measure of similarity of their actual contents. + * sort the blobs by file extension and file name (when applicable; and + * we don't do this for blobs from solid resources) so that similar + * files are grouped together, which improves the compression ratio. + * This is somewhat of a hack since a blob does not necessarily + * correspond one-to-one with a filename, nor is there any guarantee + * that two files with similar names or extensions are actually similar + * in content. A potential TODO is to sort the blobs based on some + * measure of similarity of their actual contents. */ - ret = sort_stream_list_by_sequential_order(stream_list, - offsetof(struct wim_lookup_table_entry, - write_streams_list)); + ret = sort_blob_list_by_sequential_order(blob_list, + offsetof(struct blob_descriptor, + write_blobs_list)); if (ret) return ret; - compute_stream_list_stats(stream_list, &ctx); + compute_blob_list_stats(blob_list, &ctx); if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) { - ret = sort_stream_list_for_solid_compression(stream_list); + ret = sort_blob_list_for_solid_compression(blob_list); if (unlikely(ret)) - WARNING("Failed to sort streams for solid compression. Continuing anyways."); + WARNING("Failed to sort blobs for solid compression. Continuing anyways."); } ctx.progress_data.progfunc = progfunc; ctx.progress_data.progctx = progctx; - ctx.num_bytes_to_compress = find_raw_copy_streams(stream_list, - write_resource_flags, - out_ctype, - out_chunk_size, - &raw_copy_streams); + ctx.num_bytes_to_compress = find_raw_copy_blobs(blob_list, + write_resource_flags, + out_ctype, + out_chunk_size, + &raw_copy_blobs); - DEBUG("Writing stream list " + DEBUG("Writing blob list " "(offset = %"PRIu64", write_resource_flags=0x%08x, " "out_ctype=%d, out_chunk_size=%u, num_threads=%u, " "total_bytes=%"PRIu64", num_bytes_to_compress=%"PRIu64")", @@ -1648,8 +1622,8 @@ write_stream_list(struct list_head *stream_list, DEBUG("Actually using %u threads", ctx.progress_data.progress.write_streams.num_threads); - INIT_LIST_HEAD(&ctx.pending_streams); - INIT_LIST_HEAD(&ctx.solid_streams); + INIT_LIST_HEAD(&ctx.blobs_being_compressed); + INIT_LIST_HEAD(&ctx.blobs_in_solid_resource); ret = call_progress(ctx.progress_data.progfunc, WIMLIB_PROGRESS_MSG_WRITE_STREAMS, @@ -1664,24 +1638,24 @@ write_stream_list(struct list_head *stream_list, goto out_destroy_context; } - /* Read the list of streams needing to be compressed, using the - * specified callbacks to execute processing of the data. */ + /* Read the list of blobs needing to be compressed, using the specified + * callbacks to execute processing of the data. */ - struct read_stream_list_callbacks cbs = { - .begin_stream = write_stream_begin_read, - .begin_stream_ctx = &ctx, - .consume_chunk = write_stream_process_chunk, + struct read_blob_list_callbacks cbs = { + .begin_blob = write_blob_begin_read, + .begin_blob_ctx = &ctx, + .consume_chunk = write_blob_process_chunk, .consume_chunk_ctx = &ctx, - .end_stream = write_stream_end_read, - .end_stream_ctx = &ctx, + .end_blob = write_blob_end_read, + .end_blob_ctx = &ctx, }; - ret = read_stream_list(stream_list, - offsetof(struct wim_lookup_table_entry, write_streams_list), - &cbs, - STREAM_LIST_ALREADY_SORTED | - VERIFY_STREAM_HASHES | - COMPUTE_MISSING_STREAM_HASHES); + ret = read_blob_list(blob_list, + offsetof(struct blob_descriptor, write_blobs_list), + &cbs, + BLOB_LIST_ALREADY_SORTED | + VERIFY_BLOB_HASHES | + COMPUTE_MISSING_BLOB_HASHES); if (ret) goto out_destroy_context; @@ -1692,7 +1666,7 @@ write_stream_list(struct list_head *stream_list, if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { struct wim_reshdr reshdr; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; u64 offset_in_res; ret = end_write_resource(&ctx, &reshdr); @@ -1705,16 +1679,16 @@ write_stream_list(struct list_head *stream_list, reshdr.uncompressed_size); offset_in_res = 0; - list_for_each_entry(lte, &ctx.solid_streams, write_streams_list) { - lte->out_reshdr.size_in_wim = lte->size; - lte->out_reshdr.flags = filter_resource_flags(lte->flags); - lte->out_reshdr.flags |= WIM_RESHDR_FLAG_SOLID; - lte->out_reshdr.uncompressed_size = 0; - lte->out_reshdr.offset_in_wim = offset_in_res; - lte->out_res_offset_in_wim = reshdr.offset_in_wim; - lte->out_res_size_in_wim = reshdr.size_in_wim; - lte->out_res_uncompressed_size = reshdr.uncompressed_size; - offset_in_res += lte->size; + list_for_each_entry(blob, &ctx.blobs_in_solid_resource, write_blobs_list) { + blob->out_reshdr.size_in_wim = blob->size; + blob->out_reshdr.flags = filter_resource_flags(blob->flags); + blob->out_reshdr.flags |= WIM_RESHDR_FLAG_SOLID; + blob->out_reshdr.uncompressed_size = 0; + blob->out_reshdr.offset_in_wim = offset_in_res; + blob->out_res_offset_in_wim = reshdr.offset_in_wim; + blob->out_res_size_in_wim = reshdr.size_in_wim; + blob->out_res_uncompressed_size = reshdr.uncompressed_size; + offset_in_res += blob->size; } wimlib_assert(offset_in_res == reshdr.uncompressed_size); } @@ -1722,7 +1696,7 @@ write_stream_list(struct list_head *stream_list, out_write_raw_copy_resources: /* Copy any compressed resources for which the raw data can be reused * without decompression. */ - ret = write_raw_copy_resources(&raw_copy_streams, ctx.out_fd, + ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd, &ctx.progress_data); out_destroy_context: @@ -1734,24 +1708,23 @@ out_destroy_context: } static int -is_stream_in_solid_resource(struct wim_lookup_table_entry *lte, void *_ignore) +is_blob_in_solid_resource(struct blob_descriptor *blob, void *_ignore) { - return lte_is_partial(lte); + return blob_is_in_solid_wim_resource(blob); } static bool wim_has_solid_resources(WIMStruct *wim) { - return for_lookup_table_entry(wim->lookup_table, - is_stream_in_solid_resource, NULL); + return for_blob_in_table(wim->blob_table, is_blob_in_solid_resource, NULL); } static int -wim_write_stream_list(WIMStruct *wim, - struct list_head *stream_list, - int write_flags, - unsigned num_threads, - struct filter_context *filter_ctx) +wim_write_blob_list(WIMStruct *wim, + struct list_head *blob_list, + int write_flags, + unsigned num_threads, + struct filter_context *filter_ctx) { int out_ctype; u32 out_chunk_size; @@ -1760,9 +1733,9 @@ wim_write_stream_list(WIMStruct *wim, write_resource_flags = write_flags_to_resource_flags(write_flags); /* wimlib v1.7.0: create a solid WIM file by default if the WIM version - * has been set to WIM_VERSION_SOLID and at least one stream in the - * WIM's lookup table is located in a solid resource (may be the same - * WIM, or a different one in the case of export). */ + * has been set to WIM_VERSION_SOLID and at least one blob in the WIM's + * blob table is located in a solid resource (may be the same WIM, or a + * different one in the case of export). */ if (wim->hdr.wim_version == WIM_VERSION_SOLID && wim_has_solid_resources(wim)) { @@ -1777,38 +1750,38 @@ wim_write_stream_list(WIMStruct *wim, out_ctype = wim->out_compression_type; } - return write_stream_list(stream_list, - &wim->out_fd, - write_resource_flags, - out_ctype, - out_chunk_size, - num_threads, - wim->lookup_table, - filter_ctx, - wim->progfunc, - wim->progctx); + return write_blob_list(blob_list, + &wim->out_fd, + write_resource_flags, + out_ctype, + out_chunk_size, + num_threads, + wim->blob_table, + filter_ctx, + wim->progfunc, + wim->progctx); } static int -write_wim_resource(struct wim_lookup_table_entry *lte, +write_wim_resource(struct blob_descriptor *blob, struct filedes *out_fd, int out_ctype, u32 out_chunk_size, int write_resource_flags) { - LIST_HEAD(stream_list); - list_add(<e->write_streams_list, &stream_list); - lte->will_be_in_output_wim = 1; - return write_stream_list(&stream_list, - out_fd, - write_resource_flags & ~WRITE_RESOURCE_FLAG_SOLID, - out_ctype, - out_chunk_size, - 1, - NULL, - NULL, - NULL, - NULL); + LIST_HEAD(blob_list); + list_add(&blob->write_blobs_list, &blob_list); + blob->will_be_in_output_wim = 1; + return write_blob_list(&blob_list, + out_fd, + write_resource_flags & ~WRITE_RESOURCE_FLAG_SOLID, + out_ctype, + out_chunk_size, + 1, + NULL, + NULL, + NULL, + NULL); } int @@ -1821,51 +1794,51 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, int write_resource_flags) { int ret; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; - /* Set up a temporary lookup table entry to provide to + /* Set up a temporary blob descriptor to provide to * write_wim_resource(). */ - lte = new_lookup_table_entry(); - if (lte == NULL) + blob = new_blob_descriptor(); + if (blob == NULL) return WIMLIB_ERR_NOMEM; - lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; - lte->attached_buffer = (void*)buf; - lte->size = buf_size; - lte->flags = reshdr_flags; + blob->blob_location = BLOB_IN_ATTACHED_BUFFER; + blob->attached_buffer = (void*)buf; + blob->size = buf_size; + blob->flags = reshdr_flags; if (write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { - sha1_buffer(buf, buf_size, lte->hash); - lte->unhashed = 0; + sha1_buffer(buf, buf_size, blob->hash); + blob->unhashed = 0; } else { - lte->unhashed = 1; + blob->unhashed = 1; } - ret = write_wim_resource(lte, out_fd, out_ctype, out_chunk_size, + ret = write_wim_resource(blob, out_fd, out_ctype, out_chunk_size, write_resource_flags); if (ret) - goto out_free_lte; + goto out_free_blob; - copy_reshdr(out_reshdr, <e->out_reshdr); + copy_reshdr(out_reshdr, &blob->out_reshdr); if (hash) - copy_hash(hash, lte->hash); + copy_hash(hash, blob->hash); ret = 0; -out_free_lte: - lte->resource_location = RESOURCE_NONEXISTENT; - free_lookup_table_entry(lte); +out_free_blob: + blob->blob_location = BLOB_NONEXISTENT; + free_blob_descriptor(blob); return ret; } -struct stream_size_table { +struct blob_size_table { struct hlist_head *array; size_t num_entries; size_t capacity; }; static int -init_stream_size_table(struct stream_size_table *tab, size_t capacity) +init_blob_size_table(struct blob_size_table *tab, size_t capacity) { tab->array = CALLOC(capacity, sizeof(tab->array[0])); if (tab->array == NULL) @@ -1876,110 +1849,107 @@ init_stream_size_table(struct stream_size_table *tab, size_t capacity) } static void -destroy_stream_size_table(struct stream_size_table *tab) +destroy_blob_size_table(struct blob_size_table *tab) { FREE(tab->array); } static int -stream_size_table_insert(struct wim_lookup_table_entry *lte, void *_tab) +blob_size_table_insert(struct blob_descriptor *blob, void *_tab) { - struct stream_size_table *tab = _tab; + struct blob_size_table *tab = _tab; size_t pos; - struct wim_lookup_table_entry *same_size_lte; + struct blob_descriptor *same_size_blob; struct hlist_node *tmp; - pos = hash_u64(lte->size) % tab->capacity; - lte->unique_size = 1; - hlist_for_each_entry(same_size_lte, tmp, &tab->array[pos], hash_list_2) { - if (same_size_lte->size == lte->size) { - lte->unique_size = 0; - same_size_lte->unique_size = 0; + pos = hash_u64(blob->size) % tab->capacity; + blob->unique_size = 1; + hlist_for_each_entry(same_size_blob, tmp, &tab->array[pos], hash_list_2) { + if (same_size_blob->size == blob->size) { + blob->unique_size = 0; + same_size_blob->unique_size = 0; break; } } - hlist_add_head(<e->hash_list_2, &tab->array[pos]); + hlist_add_head(&blob->hash_list_2, &tab->array[pos]); tab->num_entries++; return 0; } -struct find_streams_ctx { +struct find_blobs_ctx { WIMStruct *wim; int write_flags; - struct list_head stream_list; - struct stream_size_table stream_size_tab; + struct list_head blob_list; + struct blob_size_table blob_size_tab; }; static void -reference_stream_for_write(struct wim_lookup_table_entry *lte, - struct list_head *stream_list, u32 nref) +reference_blob_for_write(struct blob_descriptor *blob, + struct list_head *blob_list, u32 nref) { - if (!lte->will_be_in_output_wim) { - lte->out_refcnt = 0; - list_add_tail(<e->write_streams_list, stream_list); - lte->will_be_in_output_wim = 1; + if (!blob->will_be_in_output_wim) { + blob->out_refcnt = 0; + list_add_tail(&blob->write_blobs_list, blob_list); + blob->will_be_in_output_wim = 1; } - lte->out_refcnt += nref; + blob->out_refcnt += nref; } static int -fully_reference_stream_for_write(struct wim_lookup_table_entry *lte, - void *_stream_list) +fully_reference_blob_for_write(struct blob_descriptor *blob, void *_blob_list) { - struct list_head *stream_list = _stream_list; - lte->will_be_in_output_wim = 0; - reference_stream_for_write(lte, stream_list, lte->refcnt); + struct list_head *blob_list = _blob_list; + blob->will_be_in_output_wim = 0; + reference_blob_for_write(blob, blob_list, blob->refcnt); return 0; } static int -inode_find_streams_to_reference(const struct wim_inode *inode, - const struct wim_lookup_table *table, - struct list_head *stream_list) +inode_find_blobs_to_reference(const struct wim_inode *inode, + const struct blob_table *table, + struct list_head *blob_list) { - struct wim_lookup_table_entry *lte; - unsigned i; - wimlib_assert(inode->i_nlink > 0); - for (i = 0; i <= inode->i_num_ads; i++) { - lte = inode_stream_lte(inode, i, table); - if (lte) - reference_stream_for_write(lte, stream_list, - inode->i_nlink); - else if (!is_zero_hash(inode_stream_hash(inode, i))) + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct blob_descriptor *blob; + + blob = stream_blob(&inode->i_streams[i], table); + if (blob) + reference_blob_for_write(blob, blob_list, inode->i_nlink); + else if (!is_zero_hash(stream_hash(&inode->i_streams[i]))) return WIMLIB_ERR_RESOURCE_NOT_FOUND; } return 0; } static int -do_stream_set_not_in_output_wim(struct wim_lookup_table_entry *lte, void *_ignore) +do_blob_set_not_in_output_wim(struct blob_descriptor *blob, void *_ignore) { - lte->will_be_in_output_wim = 0; + blob->will_be_in_output_wim = 0; return 0; } static int -image_find_streams_to_reference(WIMStruct *wim) +image_find_blobs_to_reference(WIMStruct *wim) { struct wim_image_metadata *imd; struct wim_inode *inode; - struct wim_lookup_table_entry *lte; - struct list_head *stream_list; + struct blob_descriptor *blob; + struct list_head *blob_list; int ret; imd = wim_get_current_image_metadata(wim); - image_for_each_unhashed_stream(lte, imd) - lte->will_be_in_output_wim = 0; + image_for_each_unhashed_blob(blob, imd) + blob->will_be_in_output_wim = 0; - stream_list = wim->private; + blob_list = wim->private; image_for_each_inode(inode, imd) { - ret = inode_find_streams_to_reference(inode, - wim->lookup_table, - stream_list); + ret = inode_find_blobs_to_reference(inode, + wim->blob_table, + blob_list); if (ret) return ret; } @@ -1987,40 +1957,40 @@ image_find_streams_to_reference(WIMStruct *wim) } static int -prepare_unfiltered_list_of_streams_in_output_wim(WIMStruct *wim, - int image, - int streams_ok, - struct list_head *stream_list_ret) +prepare_unfiltered_list_of_blobs_in_output_wim(WIMStruct *wim, + int image, + int blobs_ok, + struct list_head *blob_list_ret) { int ret; - INIT_LIST_HEAD(stream_list_ret); + INIT_LIST_HEAD(blob_list_ret); - if (streams_ok && (image == WIMLIB_ALL_IMAGES || - (image == 1 && wim->hdr.image_count == 1))) + if (blobs_ok && (image == WIMLIB_ALL_IMAGES || + (image == 1 && wim->hdr.image_count == 1))) { - /* Fast case: Assume that all streams are being written and - * that the reference counts are correct. */ - struct wim_lookup_table_entry *lte; + /* Fast case: Assume that all blobs are being written and that + * the reference counts are correct. */ + struct blob_descriptor *blob; struct wim_image_metadata *imd; unsigned i; - for_lookup_table_entry(wim->lookup_table, - fully_reference_stream_for_write, - stream_list_ret); + for_blob_in_table(wim->blob_table, + fully_reference_blob_for_write, + blob_list_ret); for (i = 0; i < wim->hdr.image_count; i++) { imd = wim->image_metadata[i]; - image_for_each_unhashed_stream(lte, imd) - fully_reference_stream_for_write(lte, stream_list_ret); + image_for_each_unhashed_blob(blob, imd) + fully_reference_blob_for_write(blob, blob_list_ret); } } else { /* Slow case: Walk through the images being written and - * determine the streams referenced. */ - for_lookup_table_entry(wim->lookup_table, - do_stream_set_not_in_output_wim, NULL); - wim->private = stream_list_ret; - ret = for_image(wim, image, image_find_streams_to_reference); + * determine the blobs referenced. */ + for_blob_in_table(wim->blob_table, + do_blob_set_not_in_output_wim, NULL); + wim->private = blob_list_ret; + ret = for_image(wim, image, image_find_blobs_to_reference); if (ret) return ret; } @@ -2029,59 +1999,57 @@ prepare_unfiltered_list_of_streams_in_output_wim(WIMStruct *wim, } struct insert_other_if_hard_filtered_ctx { - struct stream_size_table *tab; + struct blob_size_table *tab; struct filter_context *filter_ctx; }; static int -insert_other_if_hard_filtered(struct wim_lookup_table_entry *lte, void *_ctx) +insert_other_if_hard_filtered(struct blob_descriptor *blob, void *_ctx) { struct insert_other_if_hard_filtered_ctx *ctx = _ctx; - if (!lte->will_be_in_output_wim && - stream_hard_filtered(lte, ctx->filter_ctx)) - stream_size_table_insert(lte, ctx->tab); + if (!blob->will_be_in_output_wim && + blob_hard_filtered(blob, ctx->filter_ctx)) + blob_size_table_insert(blob, ctx->tab); return 0; } static int -determine_stream_size_uniquity(struct list_head *stream_list, - struct wim_lookup_table *lt, - struct filter_context *filter_ctx) +determine_blob_size_uniquity(struct list_head *blob_list, + struct blob_table *lt, + struct filter_context *filter_ctx) { int ret; - struct stream_size_table tab; - struct wim_lookup_table_entry *lte; + struct blob_size_table tab; + struct blob_descriptor *blob; - ret = init_stream_size_table(&tab, 9001); + ret = init_blob_size_table(&tab, 9001); if (ret) return ret; - if (may_hard_filter_streams(filter_ctx)) { + if (may_hard_filter_blobs(filter_ctx)) { struct insert_other_if_hard_filtered_ctx ctx = { .tab = &tab, .filter_ctx = filter_ctx, }; - for_lookup_table_entry(lt, insert_other_if_hard_filtered, &ctx); + for_blob_in_table(lt, insert_other_if_hard_filtered, &ctx); } - list_for_each_entry(lte, stream_list, write_streams_list) - stream_size_table_insert(lte, &tab); + list_for_each_entry(blob, blob_list, write_blobs_list) + blob_size_table_insert(blob, &tab); - destroy_stream_size_table(&tab); + destroy_blob_size_table(&tab); return 0; } static void -filter_stream_list_for_write(struct list_head *stream_list, - struct filter_context *filter_ctx) +filter_blob_list_for_write(struct list_head *blob_list, + struct filter_context *filter_ctx) { - struct wim_lookup_table_entry *lte, *tmp; + struct blob_descriptor *blob, *tmp; - list_for_each_entry_safe(lte, tmp, - stream_list, write_streams_list) - { - int status = stream_filtered(lte, filter_ctx); + list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) { + int status = blob_filtered(blob, filter_ctx); if (status == 0) { /* Not filtered. */ @@ -2091,19 +2059,19 @@ filter_stream_list_for_write(struct list_head *stream_list, /* Soft filtered. */ } else { /* Hard filtered. */ - lte->will_be_in_output_wim = 0; - list_del(<e->lookup_table_list); + blob->will_be_in_output_wim = 0; + list_del(&blob->blob_table_list); } - list_del(<e->write_streams_list); + list_del(&blob->write_blobs_list); } } } /* - * prepare_stream_list_for_write() - + * prepare_blob_list_for_write() - * - * Prepare the list of streams to write for writing a WIM containing the - * specified image(s) with the specified write flags. + * Prepare the list of blobs to write for writing a WIM containing the specified + * image(s) with the specified write flags. * * @wim * The WIMStruct on whose behalf the write is occurring. @@ -2114,142 +2082,140 @@ filter_stream_list_for_write(struct list_head *stream_list, * @write_flags * WIMLIB_WRITE_FLAG_* flags for the write operation: * - * STREAMS_OK: For writes of all images, assume that all streams in the - * lookup table of @wim and the per-image lists of unhashed streams should - * be taken as-is, and image metadata should not be searched for - * references. This does not exclude filtering with OVERWRITE and - * SKIP_EXTERNAL_WIMS, below. + * STREAMS_OK: For writes of all images, assume that all blobs in the blob + * table of @wim and the per-image lists of unhashed blobs should be taken + * as-is, and image metadata should not be searched for references. This + * does not exclude filtering with OVERWRITE and SKIP_EXTERNAL_WIMS, below. * - * OVERWRITE: Streams already present in @wim shall not be returned in - * @stream_list_ret. + * OVERWRITE: Blobs already present in @wim shall not be returned in + * @blob_list_ret. * - * SKIP_EXTERNAL_WIMS: Streams already present in a WIM file, but not - * @wim, shall be returned in neither @stream_list_ret nor - * @lookup_table_list_ret. + * SKIP_EXTERNAL_WIMS: Blobs already present in a WIM file, but not @wim, + * shall be returned in neither @blob_list_ret nor @blob_table_list_ret. * - * @stream_list_ret - * List of streams, linked by write_streams_list, that need to be written - * will be returned here. + * @blob_list_ret + * List of blobs, linked by write_blobs_list, that need to be written will + * be returned here. * - * Note that this function assumes that unhashed streams will be written; - * it does not take into account that they may become duplicates when - * actually hashed. + * Note that this function assumes that unhashed blobs will be written; it + * does not take into account that they may become duplicates when actually + * hashed. * - * @lookup_table_list_ret - * List of streams, linked by lookup_table_list, that need to be included - * in the WIM's lookup table will be returned here. This will be a - * superset of the streams in @stream_list_ret. + * @blob_table_list_ret + * List of blobs, linked by blob_table_list, that need to be included in + * the WIM's blob table will be returned here. This will be a superset of + * the blobs in @blob_list_ret. * - * This list will be a proper superset of @stream_list_ret if and only if + * This list will be a proper superset of @blob_list_ret if and only if * WIMLIB_WRITE_FLAG_OVERWRITE was specified in @write_flags and some of - * the streams that would otherwise need to be written were already located + * the blobs that would otherwise need to be written were already located * in the WIM file. * - * All streams in this list will have @out_refcnt set to the number of - * references to the stream in the output WIM. If + * All blobs in this list will have @out_refcnt set to the number of + * references to the blob in the output WIM. If * WIMLIB_WRITE_FLAG_STREAMS_OK was specified in @write_flags, @out_refcnt * may be as low as 0. * * @filter_ctx_ret - * A context for queries of stream filter status with stream_filtered() is + * A context for queries of blob filter status with blob_filtered() is * returned in this location. * - * In addition, @will_be_in_output_wim will be set to 1 in all stream entries - * inserted into @lookup_table_list_ret and to 0 in all stream entries in the - * lookup table of @wim not inserted into @lookup_table_list_ret. + * In addition, @will_be_in_output_wim will be set to 1 in all blobs inserted + * into @blob_table_list_ret and to 0 in all blobs in the blob table of @wim not + * inserted into @blob_table_list_ret. * - * Still furthermore, @unique_size will be set to 1 on all stream entries in - * @stream_list_ret that have unique size among all stream entries in - * @stream_list_ret and among all stream entries in the lookup table of @wim - * that are ineligible for being written due to filtering. + * Still furthermore, @unique_size will be set to 1 on all blobs in + * @blob_list_ret that have unique size among all blobs in @blob_list_ret and + * among all blobs in the blob table of @wim that are ineligible for being + * written due to filtering. * * Returns 0 on success; nonzero on read error, memory allocation error, or * otherwise. */ static int -prepare_stream_list_for_write(WIMStruct *wim, int image, - int write_flags, - struct list_head *stream_list_ret, - struct list_head *lookup_table_list_ret, - struct filter_context *filter_ctx_ret) +prepare_blob_list_for_write(WIMStruct *wim, int image, + int write_flags, + struct list_head *blob_list_ret, + struct list_head *blob_table_list_ret, + struct filter_context *filter_ctx_ret) { int ret; - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; filter_ctx_ret->write_flags = write_flags; filter_ctx_ret->wim = wim; - ret = prepare_unfiltered_list_of_streams_in_output_wim( + ret = prepare_unfiltered_list_of_blobs_in_output_wim( wim, image, write_flags & WIMLIB_WRITE_FLAG_STREAMS_OK, - stream_list_ret); + blob_list_ret); if (ret) return ret; - INIT_LIST_HEAD(lookup_table_list_ret); - list_for_each_entry(lte, stream_list_ret, write_streams_list) - list_add_tail(<e->lookup_table_list, lookup_table_list_ret); + INIT_LIST_HEAD(blob_table_list_ret); + list_for_each_entry(blob, blob_list_ret, write_blobs_list) + list_add_tail(&blob->blob_table_list, blob_table_list_ret); - ret = determine_stream_size_uniquity(stream_list_ret, wim->lookup_table, - filter_ctx_ret); + ret = determine_blob_size_uniquity(blob_list_ret, wim->blob_table, + filter_ctx_ret); if (ret) return ret; - if (may_filter_streams(filter_ctx_ret)) - filter_stream_list_for_write(stream_list_ret, filter_ctx_ret); + if (may_filter_blobs(filter_ctx_ret)) + filter_blob_list_for_write(blob_list_ret, filter_ctx_ret); return 0; } static int -write_wim_streams(WIMStruct *wim, int image, int write_flags, - unsigned num_threads, - struct list_head *stream_list_override, - struct list_head *lookup_table_list_ret) +write_file_blobs(WIMStruct *wim, int image, int write_flags, + unsigned num_threads, + struct list_head *blob_list_override, + struct list_head *blob_table_list_ret) { int ret; - struct list_head _stream_list; - struct list_head *stream_list; - struct wim_lookup_table_entry *lte; + struct list_head _blob_list; + struct list_head *blob_list; + struct blob_descriptor *blob; struct filter_context _filter_ctx; struct filter_context *filter_ctx; - if (stream_list_override == NULL) { - /* Normal case: prepare stream list from image(s) being written. + if (blob_list_override == NULL) { + /* Normal case: prepare blob list from image(s) being written. */ - stream_list = &_stream_list; + blob_list = &_blob_list; filter_ctx = &_filter_ctx; - ret = prepare_stream_list_for_write(wim, image, write_flags, - stream_list, - lookup_table_list_ret, - filter_ctx); + ret = prepare_blob_list_for_write(wim, image, write_flags, + blob_list, + blob_table_list_ret, + filter_ctx); if (ret) return ret; } else { /* Currently only as a result of wimlib_split() being called: - * use stream list already explicitly provided. Use existing + * use blob list already explicitly provided. Use existing * reference counts. */ - stream_list = stream_list_override; + blob_list = blob_list_override; filter_ctx = NULL; - INIT_LIST_HEAD(lookup_table_list_ret); - list_for_each_entry(lte, stream_list, write_streams_list) { - lte->out_refcnt = lte->refcnt; - lte->will_be_in_output_wim = 1; - lte->unique_size = 0; - list_add_tail(<e->lookup_table_list, lookup_table_list_ret); + INIT_LIST_HEAD(blob_table_list_ret); + list_for_each_entry(blob, blob_list, write_blobs_list) { + blob->out_refcnt = blob->refcnt; + blob->will_be_in_output_wim = 1; + blob->unique_size = 0; + list_add_tail(&blob->blob_table_list, blob_table_list_ret); } } - return wim_write_stream_list(wim, - stream_list, - write_flags, - num_threads, - filter_ctx); + return wim_write_blob_list(wim, + blob_list, + write_flags, + num_threads, + filter_ctx); } static int -write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags) +write_metadata_blobs(WIMStruct *wim, int image, int write_flags) { int ret; int start_image; @@ -2297,12 +2263,12 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags) } else if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE) { DEBUG("Image %u was not modified; re-using existing " "metadata resource.", i); - stream_set_out_reshdr_for_reuse(imd->metadata_lte); + blob_set_out_reshdr_for_reuse(imd->metadata_blob); ret = 0; } else { DEBUG("Image %u was not modified; copying existing " "metadata resource.", i); - ret = write_wim_resource(imd->metadata_lte, + ret = write_wim_resource(imd->metadata_blob, &wim->out_fd, wim->out_compression_type, wim->out_chunk_size, @@ -2348,52 +2314,51 @@ close_wim_writable(WIMStruct *wim, int write_flags) } static int -cmp_streams_by_out_rspec(const void *p1, const void *p2) +cmp_blobs_by_out_rdesc(const void *p1, const void *p2) { - const struct wim_lookup_table_entry *lte1, *lte2; + const struct blob_descriptor *blob1, *blob2; - lte1 = *(const struct wim_lookup_table_entry**)p1; - lte2 = *(const struct wim_lookup_table_entry**)p2; + blob1 = *(const struct blob_descriptor**)p1; + blob2 = *(const struct blob_descriptor**)p2; - if (lte1->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { - if (lte2->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { - if (lte1->out_res_offset_in_wim != lte2->out_res_offset_in_wim) - return cmp_u64(lte1->out_res_offset_in_wim, - lte2->out_res_offset_in_wim); + if (blob1->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { + if (blob2->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { + if (blob1->out_res_offset_in_wim != blob2->out_res_offset_in_wim) + return cmp_u64(blob1->out_res_offset_in_wim, + blob2->out_res_offset_in_wim); } else { return 1; } } else { - if (lte2->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) + if (blob2->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) return -1; } - return cmp_u64(lte1->out_reshdr.offset_in_wim, - lte2->out_reshdr.offset_in_wim); + return cmp_u64(blob1->out_reshdr.offset_in_wim, + blob2->out_reshdr.offset_in_wim); } static int -write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, - struct wim_reshdr *out_reshdr, - struct list_head *lookup_table_list) +write_blob_table(WIMStruct *wim, int image, int write_flags, + struct wim_reshdr *out_reshdr, + struct list_head *blob_table_list) { int ret; - /* Set output resource metadata for streams already present in WIM. */ + /* Set output resource metadata for blobs already present in WIM. */ if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE) { - struct wim_lookup_table_entry *lte; - list_for_each_entry(lte, lookup_table_list, lookup_table_list) - { - if (lte->resource_location == RESOURCE_IN_WIM && - lte->rspec->wim == wim) + struct blob_descriptor *blob; + list_for_each_entry(blob, blob_table_list, blob_table_list) { + if (blob->blob_location == BLOB_IN_WIM && + blob->rdesc->wim == wim) { - stream_set_out_reshdr_for_reuse(lte); + blob_set_out_reshdr_for_reuse(blob); } } } - ret = sort_stream_list(lookup_table_list, - offsetof(struct wim_lookup_table_entry, lookup_table_list), - cmp_streams_by_out_rspec); + ret = sort_blob_list(blob_table_list, + offsetof(struct blob_descriptor, blob_table_list), + cmp_blobs_by_out_rdesc); if (ret) return ret; @@ -2410,30 +2375,30 @@ write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, end_image = image; } - /* Push metadata resource lookup table entries onto the front of - * the list in reverse order, so that they're written in order. + /* Push metadata blob table entries onto the front of the list + * in reverse order, so that they're written in order. */ for (int i = end_image; i >= start_image; i--) { - struct wim_lookup_table_entry *metadata_lte; + struct blob_descriptor *metadata_blob; - metadata_lte = wim->image_metadata[i - 1]->metadata_lte; - wimlib_assert(metadata_lte->out_reshdr.flags & WIM_RESHDR_FLAG_METADATA); - metadata_lte->out_refcnt = 1; - list_add(&metadata_lte->lookup_table_list, lookup_table_list); + metadata_blob = wim->image_metadata[i - 1]->metadata_blob; + wimlib_assert(metadata_blob->out_reshdr.flags & WIM_RESHDR_FLAG_METADATA); + metadata_blob->out_refcnt = 1; + list_add(&metadata_blob->blob_table_list, blob_table_list); } } - return write_wim_lookup_table_from_stream_list(lookup_table_list, - &wim->out_fd, - wim->hdr.part_number, - out_reshdr, - write_flags_to_resource_flags(write_flags)); + return write_blob_table_from_blob_list(blob_table_list, + &wim->out_fd, + wim->hdr.part_number, + out_reshdr, + write_flags_to_resource_flags(write_flags)); } /* * finish_write(): * - * Finish writing a WIM file: write the lookup table, xml data, and integrity + * Finish writing a WIM file: write the blob table, xml data, and integrity * table, then overwrite the WIM header. By default, closes the WIM file * descriptor (@wim->out_fd) if successful. * @@ -2447,19 +2412,18 @@ write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, * * (public) WIMLIB_WRITE_FLAG_PIPABLE: * Writing a pipable WIM, possibly to a pipe; include pipable WIM - * stream headers before the lookup table and XML data, and also - * write the WIM header at the end instead of seeking to the - * beginning. Can't be combined with - * WIMLIB_WRITE_FLAG_CHECK_INTEGRITY. + * blob headers before the blob table and XML data, and also write + * the WIM header at the end instead of seeking to the beginning. + * Can't be combined with WIMLIB_WRITE_FLAG_CHECK_INTEGRITY. * - * (private) WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE: - * Don't write the lookup table. + * (private) WIMLIB_WRITE_FLAG_NO_BLOB_TABLE: + * Don't write the blob table. * * (private) WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML: * After writing the XML data but before writing the integrity - * table, write a temporary WIM header and flush the stream so that - * the WIM is less likely to become corrupted upon abrupt program - * termination. + * table, write a temporary WIM header and flush the file + * descriptor so that the WIM is less likely to become corrupted + * upon abrupt program termination. * (private) WIMLIB_WRITE_FLAG_HEADER_AT_END: * Instead of overwriting the WIM header at the beginning of the * file, simply append it to the end of the file. (Used when @@ -2477,13 +2441,13 @@ write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, */ static int finish_write(WIMStruct *wim, int image, int write_flags, - struct list_head *lookup_table_list) + struct list_head *blob_table_list) { int ret; off_t hdr_offset; int write_resource_flags; - off_t old_lookup_table_end = 0; - off_t new_lookup_table_end; + off_t old_blob_table_end = 0; + off_t new_blob_table_end; u64 xml_totalbytes; struct integrity_table *old_integrity_table = NULL; @@ -2501,7 +2465,7 @@ finish_write(WIMStruct *wim, int image, int write_flags, } else { copy_reshdr(&wim->hdr.boot_metadata_reshdr, &wim->image_metadata[ - wim->hdr.boot_idx - 1]->metadata_lte->out_reshdr); + wim->hdr.boot_idx - 1]->metadata_blob->out_reshdr); } /* If overwriting the WIM file containing an integrity table in-place, @@ -2515,21 +2479,21 @@ finish_write(WIMStruct *wim, int image, int write_flags, WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) && wim_has_integrity_table(wim)) { - old_lookup_table_end = wim->hdr.lookup_table_reshdr.offset_in_wim + - wim->hdr.lookup_table_reshdr.size_in_wim; + old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim + + wim->hdr.blob_table_reshdr.size_in_wim; (void)read_integrity_table(wim, - old_lookup_table_end - WIM_HEADER_DISK_SIZE, + old_blob_table_end - WIM_HEADER_DISK_SIZE, &old_integrity_table); /* If we couldn't read the old integrity table, we can still * re-calculate the full integrity table ourselves. Hence the * ignoring of the return value. */ } - /* Write lookup table. */ - if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) { - ret = write_wim_lookup_table(wim, image, write_flags, - &wim->hdr.lookup_table_reshdr, - lookup_table_list); + /* Write blob table. */ + if (!(write_flags & WIMLIB_WRITE_FLAG_NO_BLOB_TABLE)) { + ret = write_blob_table(wim, image, write_flags, + &wim->hdr.blob_table_reshdr, + blob_table_list); if (ret) { free_integrity_table(old_integrity_table); return ret; @@ -2563,12 +2527,12 @@ finish_write(WIMStruct *wim, int image, int write_flags, } } - new_lookup_table_end = wim->hdr.lookup_table_reshdr.offset_in_wim + - wim->hdr.lookup_table_reshdr.size_in_wim; + new_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim + + wim->hdr.blob_table_reshdr.size_in_wim; ret = write_integrity_table(wim, - new_lookup_table_end, - old_lookup_table_end, + new_blob_table_end, + old_blob_table_end, old_integrity_table); free_integrity_table(old_integrity_table); if (ret) @@ -2657,9 +2621,9 @@ unlock_wim_for_append(WIMStruct *wim) * stops other software from trying to read the file as a normal WIM. * * - The header at the beginning of the file does not contain all the normal - * information; in particular it will have all 0's for the lookup table and - * XML data resource entries. This is because this information cannot be - * determined until the lookup table and XML data have been written. + * information; in particular it will have all 0's for the blob table and XML + * data resource entries. This is because this information cannot be + * determined until the blob table and XML data have been written. * Consequently, wimlib will write the full header at the very end of the * file. The header at the end, however, is only used when reading the WIM * from a seekable file (not a pipe). @@ -2669,14 +2633,14 @@ unlock_wim_for_append(WIMStruct *wim) * reading the WIM from a pipe. This copy of the XML data is ignored if the * WIM is read from a seekable file (not a pipe). * - * - The format of resources, or streams, has been modified to allow them to be - * used before the "lookup table" has been read. Each stream is prefixed with - * a `struct pwm_stream_hdr' that is basically an abbreviated form of `struct - * wim_lookup_table_entry_disk' that only contains the SHA1 message digest, - * uncompressed stream size, and flags that indicate whether the stream is - * compressed. The data of uncompressed streams then follows literally, while - * the data of compressed streams follows in a modified format. Compressed - * streams do not begin with a chunk table, since the chunk table cannot be + * - The format of resources, or blobs, has been modified to allow them to be + * used before the "blob table" has been read. Each blob is prefixed with a + * `struct pwm_blob_hdr' that is basically an abbreviated form of `struct + * blob_descriptor_disk' that only contains the SHA-1 message digest, + * uncompressed blob size, and flags that indicate whether the blob is + * compressed. The data of uncompressed blobs then follows literally, while + * the data of compressed blobs follows in a modified format. Compressed + * blobs do not begin with a chunk table, since the chunk table cannot be * written until all chunks have been compressed. Instead, each compressed * chunk is prefixed by a `struct pwm_chunk_hdr' that gives its size. * Furthermore, the chunk table is written at the end of the resource instead @@ -2684,29 +2648,29 @@ unlock_wim_for_append(WIMStruct *wim) * `struct pwm_chunk_hdr's were not present; also, the chunk table is only * used if the WIM is being read from a seekable file (not a pipe). * - * - Metadata resources always come before other file resources (streams). - * (This does not by itself constitute an incompatibility with normal WIMs, - * since this is valid in normal WIMs.) + * - Metadata blobs always come before non-metadata blobs. (This does not by + * itself constitute an incompatibility with normal WIMs, since this is valid + * in normal WIMs.) * - * - At least up to the end of the file resources, all components must be packed - * as tightly as possible; there cannot be any "holes" in the WIM. (This does + * - At least up to the end of the blobs, all components must be packed as + * tightly as possible; there cannot be any "holes" in the WIM. (This does * not by itself consititute an incompatibility with normal WIMs, since this * is valid in normal WIMs.) * - * Note: the lookup table, XML data, and header at the end are not used when + * Note: the blob table, XML data, and header at the end are not used when * applying from a pipe. They exist to support functionality such as image * application and export when the WIM is *not* read from a pipe. * * Layout of pipable WIM: * * ---------+----------+--------------------+----------------+--------------+-----------+--------+ - * | Header | XML data | Metadata resources | File resources | Lookup table | XML data | Header | + * | Header | XML data | Metadata resources | File resources | Blob table | XML data | Header | * ---------+----------+--------------------+----------------+--------------+-----------+--------+ * * Layout of normal WIM: * * +--------+-----------------------------+-------------------------+ - * | Header | File and metadata resources | Lookup table | XML data | + * | Header | File and metadata resources | Blob table | XML data | * +--------+-----------------------------+-------------------------+ * * An optional integrity table can follow the final XML data in both normal and @@ -2722,8 +2686,8 @@ unlock_wim_for_append(WIMStruct *wim) static int write_pipable_wim(WIMStruct *wim, int image, int write_flags, unsigned num_threads, - struct list_head *stream_list_override, - struct list_head *lookup_table_list_ret) + struct list_head *blob_list_override, + struct list_head *blob_table_list_ret) { int ret; struct wim_reshdr xml_reshdr; @@ -2736,36 +2700,36 @@ write_pipable_wim(WIMStruct *wim, int image, int write_flags, * been written. */ /* For efficiency, when wimlib adds an image to the WIM with - * wimlib_add_image(), the SHA1 message digests of files is not + * wimlib_add_image(), the SHA-1 message digests of files is not * calculated; instead, they are calculated while the files are being * written. However, this does not work when writing a pipable WIM, - * since when writing a stream to a pipable WIM, its SHA1 message digest - * needs to be known before the stream data is written. Therefore, - * before getting much farther, we need to pre-calculate the SHA1 - * message digests of all streams that will be written. */ - ret = wim_checksum_unhashed_streams(wim); + * since when writing a blob to a pipable WIM, its SHA-1 message digest + * needs to be known before the blob data is written. Therefore, before + * getting much farther, we need to pre-calculate the SHA-1 message + * digests of all blobs that will be written. */ + ret = wim_checksum_unhashed_blobs(wim); if (ret) return ret; /* Write extra copy of the XML data. */ ret = write_wim_xml_data(wim, image, WIM_TOTALBYTES_OMIT, - &xml_reshdr, - WRITE_RESOURCE_FLAG_PIPABLE); + &xml_reshdr, WRITE_RESOURCE_FLAG_PIPABLE); if (ret) return ret; /* Write metadata resources for the image(s) being included in the * output WIM. */ - ret = write_wim_metadata_resources(wim, image, write_flags); + ret = write_metadata_blobs(wim, image, write_flags); if (ret) return ret; - /* Write streams needed for the image(s) being included in the output - * WIM, or streams needed for the split WIM part. */ - return write_wim_streams(wim, image, write_flags, num_threads, - stream_list_override, lookup_table_list_ret); + /* Write blobs needed for the image(s) being included in the output WIM, + * or blobs needed for the split WIM part. */ + return write_file_blobs(wim, image, write_flags, + num_threads, blob_list_override, + blob_table_list_ret); - /* The lookup table, XML data, and header at end are handled by + /* The blob table, XML data, and header at end are handled by * finish_write(). */ } @@ -2779,12 +2743,12 @@ write_wim_part(WIMStruct *wim, unsigned num_threads, unsigned part_number, unsigned total_parts, - struct list_head *stream_list_override, + struct list_head *blob_list_override, const u8 *guid) { int ret; struct wim_header hdr_save; - struct list_head lookup_table_list; + struct list_head blob_table_list; if (total_parts == 1) DEBUG("Writing standalone WIM."); @@ -2853,7 +2817,7 @@ write_wim_part(WIMStruct *wim, else DEBUG("Number of threads: %u", num_threads); DEBUG("Progress function: %s", (wim->progfunc ? "yes" : "no")); - DEBUG("Stream list: %s", (stream_list_override ? "specified" : "autodetect")); + DEBUG("Blob list: %s", (blob_list_override ? "specified" : "autodetect")); DEBUG("GUID: %s", (write_flags & WIMLIB_WRITE_FLAG_RETAIN_GUID) ? "retain" : guid ? "explicit" : "generate new"); @@ -2961,7 +2925,7 @@ write_wim_part(WIMStruct *wim, } /* Clear references to resources that have not been written yet. */ - zero_reshdr(&wim->hdr.lookup_table_reshdr); + zero_reshdr(&wim->hdr.blob_table_reshdr); zero_reshdr(&wim->hdr.xml_data_reshdr); zero_reshdr(&wim->hdr.boot_metadata_reshdr); zero_reshdr(&wim->hdr.integrity_table_reshdr); @@ -3016,31 +2980,32 @@ write_wim_part(WIMStruct *wim, if (ret) goto out_restore_hdr; - /* Write metadata resources and streams. */ + /* Write metadata resources and blobs. */ if (!(write_flags & WIMLIB_WRITE_FLAG_PIPABLE)) { /* Default case: create a normal (non-pipable) WIM. */ - ret = write_wim_streams(wim, image, write_flags, num_threads, - stream_list_override, - &lookup_table_list); + ret = write_file_blobs(wim, image, write_flags, + num_threads, + blob_list_override, + &blob_table_list); if (ret) goto out_restore_hdr; - ret = write_wim_metadata_resources(wim, image, write_flags); + ret = write_metadata_blobs(wim, image, write_flags); if (ret) goto out_restore_hdr; } else { /* Non-default case: create pipable WIM. */ ret = write_pipable_wim(wim, image, write_flags, num_threads, - stream_list_override, - &lookup_table_list); + blob_list_override, + &blob_table_list); if (ret) goto out_restore_hdr; write_flags |= WIMLIB_WRITE_FLAG_HEADER_AT_END; } - /* Write lookup table, XML data, and (optional) integrity table. */ - ret = finish_write(wim, image, write_flags, &lookup_table_list); + /* Write blob table, XML data, and (optional) integrity table. */ + ret = finish_write(wim, image, write_flags, &blob_table_list); out_restore_hdr: memcpy(&wim->hdr, &hdr_save, sizeof(struct wim_header)); (void)close_wim_writable(wim, write_flags); @@ -3097,13 +3062,14 @@ any_images_modified(WIMStruct *wim) } static int -check_resource_offset(struct wim_lookup_table_entry *lte, void *_wim) +check_resource_offset(struct blob_descriptor *blob, void *_wim) { const WIMStruct *wim = _wim; off_t end_offset = *(const off_t*)wim->private; - if (lte->resource_location == RESOURCE_IN_WIM && lte->rspec->wim == wim && - lte->rspec->offset_in_wim + lte->rspec->size_in_wim > end_offset) + if (blob->blob_location == BLOB_IN_WIM && + blob->rdesc->wim == wim && + blob->rdesc->offset_in_wim + blob->rdesc->size_in_wim > end_offset) return WIMLIB_ERR_RESOURCE_ORDER; return 0; } @@ -3118,12 +3084,12 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset) unsigned i; wim->private = &end_offset; - ret = for_lookup_table_entry(wim->lookup_table, check_resource_offset, wim); + ret = for_blob_in_table(wim->blob_table, check_resource_offset, wim); if (ret) return ret; for (i = 0; i < wim->hdr.image_count; i++) { - ret = check_resource_offset(wim->image_metadata[i]->metadata_lte, wim); + ret = check_resource_offset(wim->image_metadata[i]->metadata_blob, wim); if (ret) return ret; } @@ -3131,17 +3097,17 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset) } /* - * Overwrite a WIM, possibly appending streams to it. + * Overwrite a WIM, possibly appending new resources to it. * * A WIM looks like (or is supposed to look like) the following: * * Header (212 bytes) - * Streams and metadata resources (variable size) - * Lookup table (variable size) + * Resources for metadata and files (variable size) + * Blob table (variable size) * XML data (variable size) * Integrity table (optional) (variable size) * - * If we are not adding any streams or metadata resources, the lookup table is + * If we are not adding any new files or metadata, then the blob table is * unchanged--- so we only need to overwrite the XML data, integrity table, and * header. This operation is potentially unsafe if the program is abruptly * terminated while the XML data or integrity table are being overwritten, but @@ -3152,50 +3118,50 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset) * the program is terminated while the integrity table is being calculated (but * no guarantees, due to write re-ordering...). * - * If we are adding new streams or images (metadata resources), the lookup table - * needs to be changed, and those streams need to be written. In this case, we - * try to perform a safe update of the WIM file by writing the streams *after* - * the end of the previous WIM, then writing the new lookup table, XML data, and - * (optionally) integrity table following the new streams. This will produce a - * layout like the following: + * If we are adding new blobs, including new file data as well as any metadata + * for any new images, then the blob table needs to be changed, and those blobs + * need to be written. In this case, we try to perform a safe update of the WIM + * file by writing the blobs *after* the end of the previous WIM, then writing + * the new blob table, XML data, and (optionally) integrity table following the + * new blobs. This will produce a layout like the following: * * Header (212 bytes) - * (OLD) Streams and metadata resources (variable size) - * (OLD) Lookup table (variable size) + * (OLD) Resources for metadata and files (variable size) + * (OLD) Blob table (variable size) * (OLD) XML data (variable size) * (OLD) Integrity table (optional) (variable size) - * (NEW) Streams and metadata resources (variable size) - * (NEW) Lookup table (variable size) + * (NEW) Resources for metadata and files (variable size) + * (NEW) Blob table (variable size) * (NEW) XML data (variable size) * (NEW) Integrity table (optional) (variable size) * * At all points, the WIM is valid as nothing points to the new data yet. Then, - * the header is overwritten to point to the new lookup table, XML data, and + * the header is overwritten to point to the new blob table, XML data, and * integrity table, to produce the following layout: * * Header (212 bytes) - * Streams and metadata resources (variable size) + * Resources for metadata and files (variable size) * Nothing (variable size) - * More Streams and metadata resources (variable size) - * Lookup table (variable size) + * Resources for metadata and files (variable size) + * Blob table (variable size) * XML data (variable size) * Integrity table (optional) (variable size) * * This method allows an image to be appended to a large WIM very quickly, and - * is crash-safe except in the case of write re-ordering, but the - * disadvantage is that a small hole is left in the WIM where the old lookup - * table, xml data, and integrity table were. (These usually only take up a - * small amount of space compared to the streams, however.) + * is crash-safe except in the case of write re-ordering, but the disadvantage + * is that a small hole is left in the WIM where the old blob table, xml data, + * and integrity table were. (These usually only take up a small amount of + * space compared to the blobs, however.) */ static int overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) { int ret; off_t old_wim_end; - u64 old_lookup_table_end, old_xml_begin, old_xml_end; + u64 old_blob_table_end, old_xml_begin, old_xml_end; struct wim_header hdr_save; - struct list_head stream_list; - struct list_head lookup_table_list; + struct list_head blob_list; + struct list_head blob_table_list; struct filter_context filter_ctx; DEBUG("Overwriting `%"TS"' in-place", wim->filename); @@ -3217,14 +3183,13 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) write_flags |= WIMLIB_WRITE_FLAG_OVERWRITE | WIMLIB_WRITE_FLAG_STREAMS_OK; - /* Make sure that the integrity table (if present) is after the XML - * data, and that there are no stream resources, metadata resources, or - * lookup tables after the XML data. Otherwise, these data would be - * overwritten. */ + /* Make sure there is no data after the XML data, except possibily an + * integrity table. If this were the case, then this data would be + * overwritten. */ old_xml_begin = wim->hdr.xml_data_reshdr.offset_in_wim; old_xml_end = old_xml_begin + wim->hdr.xml_data_reshdr.size_in_wim; - old_lookup_table_end = wim->hdr.lookup_table_reshdr.offset_in_wim + - wim->hdr.lookup_table_reshdr.size_in_wim; + old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim + + wim->hdr.blob_table_reshdr.size_in_wim; if (wim->hdr.integrity_table_reshdr.offset_in_wim != 0 && wim->hdr.integrity_table_reshdr.offset_in_wim < old_xml_end) { WARNING("Didn't expect the integrity table to be before the XML data"); @@ -3232,8 +3197,8 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) goto out_restore_memory_hdr; } - if (old_lookup_table_end > old_xml_begin) { - WARNING("Didn't expect the lookup table to be after the XML data"); + if (old_blob_table_end > old_xml_begin) { + WARNING("Didn't expect the blob table to be after the XML data"); ret = WIMLIB_ERR_RESOURCE_ORDER; goto out_restore_memory_hdr; } @@ -3244,22 +3209,22 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) * overwrite these resources). */ if (!wim->image_deletion_occurred && !any_images_modified(wim)) { /* If no images have been modified and no images have been - * deleted, a new lookup table does not need to be written. We + * deleted, a new blob table does not need to be written. We * shall write the new XML data and optional integrity table - * immediately after the lookup table. Note that this may + * immediately after the blob table. Note that this may * overwrite an existing integrity table. */ - DEBUG("Skipping writing lookup table " + DEBUG("Skipping writing blob table " "(no images modified or deleted)"); - old_wim_end = old_lookup_table_end; - write_flags |= WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE | + old_wim_end = old_blob_table_end; + write_flags |= WIMLIB_WRITE_FLAG_NO_BLOB_TABLE | WIMLIB_WRITE_FLAG_CHECKPOINT_AFTER_XML; } else if (wim->hdr.integrity_table_reshdr.offset_in_wim != 0) { - /* Old WIM has an integrity table; begin writing new streams - * after it. */ + /* Old WIM has an integrity table; begin writing new blobs after + * it. */ old_wim_end = wim->hdr.integrity_table_reshdr.offset_in_wim + wim->hdr.integrity_table_reshdr.size_in_wim; } else { - /* No existing integrity table; begin writing new streams after + /* No existing integrity table; begin writing new blobs after * the old XML data. */ old_wim_end = old_xml_end; } @@ -3268,9 +3233,9 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) if (ret) goto out_restore_memory_hdr; - ret = prepare_stream_list_for_write(wim, WIMLIB_ALL_IMAGES, write_flags, - &stream_list, &lookup_table_list, - &filter_ctx); + ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES, write_flags, + &blob_list, &blob_table_list, + &filter_ctx); if (ret) goto out_restore_memory_hdr; @@ -3296,20 +3261,17 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) goto out_restore_physical_hdr; } - ret = wim_write_stream_list(wim, - &stream_list, - write_flags, - num_threads, - &filter_ctx); + ret = wim_write_blob_list(wim, &blob_list, write_flags, + num_threads, &filter_ctx); if (ret) goto out_truncate; - ret = write_wim_metadata_resources(wim, WIMLIB_ALL_IMAGES, write_flags); + ret = write_metadata_blobs(wim, WIMLIB_ALL_IMAGES, write_flags); if (ret) goto out_truncate; ret = finish_write(wim, WIMLIB_ALL_IMAGES, write_flags, - &lookup_table_list); + &blob_table_list); if (ret) goto out_truncate; @@ -3317,7 +3279,7 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) return 0; out_truncate: - if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) { + if (!(write_flags & WIMLIB_WRITE_FLAG_NO_BLOB_TABLE)) { WARNING("Truncating `%"TS"' to its original size (%"PRIu64" bytes)", wim->filename, old_wim_end); /* Return value of ftruncate() is ignored because this is diff --git a/src/xml.c b/src/xml.c index 22966e0e..58750a1f 100644 --- a/src/xml.c +++ b/src/xml.c @@ -32,11 +32,11 @@ #include #include "wimlib/assert.h" +#include "wimlib/blob_table.h" #include "wimlib/dentry.h" #include "wimlib/encoding.h" #include "wimlib/error.h" #include "wimlib/file_io.h" -#include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/resource.h" #include "wimlib/timestamp.h" @@ -90,7 +90,7 @@ struct image_info { /* Note: must update clone_image_info() if adding new fields here */ - struct wim_lookup_table *lookup_table; /* temporary field */ + struct blob_table *blob_table; /* temporary field */ }; /* A struct wim_info structure corresponds to the entire XML data for a WIM file. */ @@ -1214,22 +1214,24 @@ calculate_dentry_statistics(struct wim_dentry *dentry, void *arg) if (!(inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY | FILE_ATTRIBUTE_REPARSE_POINT))) { - struct wim_lookup_table_entry *lte; + struct blob_descriptor *blob; - lte = inode_unnamed_lte(inode, info->lookup_table); - if (lte) { - info->total_bytes += lte->size; + blob = inode_get_blob_for_unnamed_data_stream(inode, + info->blob_table); + if (blob) { + info->total_bytes += blob->size; if (!dentry_is_first_in_inode(dentry)) - info->hard_link_bytes += lte->size; + info->hard_link_bytes += blob->size; } if (inode->i_nlink >= 2 && dentry_is_first_in_inode(dentry)) { - for (unsigned i = 0; i < inode->i_num_ads; i++) { - if (inode->i_ads_entries[i].stream_name_nbytes) { - lte = inode_stream_lte(inode, i + 1, info->lookup_table); - if (lte) { + for (unsigned i = 0; i < inode->i_num_streams; i++) { + if (stream_is_named_data_stream(&inode->i_streams[i])) { + blob = stream_blob(&inode->i_streams[i], + info->blob_table); + if (blob) { info->hard_link_bytes += inode->i_nlink * - lte->size; + blob->size; } } } @@ -1255,7 +1257,7 @@ xml_update_image_info(WIMStruct *wim, int image) image_info->dir_count = 0; image_info->total_bytes = 0; image_info->hard_link_bytes = 0; - image_info->lookup_table = wim->lookup_table; + image_info->blob_table = wim->blob_table; for_dentry_in_tree(wim->image_metadata[image - 1]->root_dentry, calculate_dentry_statistics,