X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=include%2Fwimlib%2Flookup_table.h;h=4158f1578f1a9f79289d26eccb311803b3356c7d;hp=097750588454a733ad3167cc085b5e9dd2470eb4;hb=8895e346ab7a4df65c980ba435d1e5d1c2654d3f;hpb=c9482ee98e12fa3f1073e4fc3c56f5eef3c40f32 diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h index 09775058..4158f157 100644 --- a/include/wimlib/lookup_table.h +++ b/include/wimlib/lookup_table.h @@ -1,176 +1,125 @@ #ifndef _WIMLIB_LOOKUP_TABLE_H #define _WIMLIB_LOOKUP_TABLE_H -#include "wimlib/assert.h" -#include "wimlib/dentry.h" #include "wimlib/list.h" +#include "wimlib/resource.h" #include "wimlib/sha1.h" #include "wimlib/types.h" -#include "wimlib/wim.h" - -#define LOOKUP_FLAG_ADS_OK 0x00000001 -#define LOOKUP_FLAG_DIRECTORY_OK 0x00000002 - - -/* The lookup table of a WIM file maps SHA1 message digests to streams of data. - * Here, the in-memory structure is implemented as a hash table. - * - * Given a SHA1 message digest, the mapped-to stream is specified by an offset - * in the WIM, an uncompressed and compressed size, and resource flags (see - * 'struct resource_entry'). But, we associate additional information, such as - * a reference count, with each stream, so the actual mapping is from SHA1 - * message digests to 'struct wim_lookup_table_entry's, each of which contains - * an embedded 'struct resource_entry'. - * - * Note: Everything will break horribly if there is a SHA1 collision. - */ -struct wim_lookup_table { - struct hlist_head *array; - u64 num_entries; - u64 capacity; - struct list_head *unhashed_streams; -}; - -#ifdef WITH_NTFS_3G - -struct _ntfs_volume; - -struct ntfs_location { - tchar *path; - utf16lechar *stream_name; - u16 stream_name_nchars; - struct _ntfs_volume *ntfs_vol; - bool is_reparse_point; -}; -#endif /* An enumerated type that identifies where the stream corresponding to this * lookup table entry is actually located. * * If we open a WIM and read its lookup table, the location is set to * RESOURCE_IN_WIM since all the streams will initially be located in the WIM. - * However, to deal with problems such as image capture and image mount, we - * allow the actual location of the stream to be somewhere else, such as an - * external file. - */ + * However, to handle situations such as image capture and image mount, we allow + * the actual location of the stream to be somewhere else, such as an external + * file. */ enum resource_location { - /* The lookup table entry does not correspond to a stream (this state - * should exist only temporarily) */ + /* The lookup table entry does not yet correspond to a stream; this is a + * temporary state only. */ RESOURCE_NONEXISTENT = 0, - /* The stream resource is located in a WIM file. The WIMStruct for the - * WIM file will be pointed to by the @wim member. */ + /* The stream is located in a resource in a WIM file identified by the + * `struct wim_resource_spec' pointed to by @rspec. @offset_in_res + * identifies the offset at which this particular stream begins in the + * uncompressed data of the resource; this is normally 0, but a WIM + * resource can be "solid" and contain multiple streams. */ RESOURCE_IN_WIM, -#ifndef __WIN32__ - /* The stream resource is located in an external file. The name of the - * file will be provided by @file_on_disk member. */ + /* The stream is located in the external file named by @file_on_disk. + */ RESOURCE_IN_FILE_ON_DISK, -#endif - /* The stream resource is directly attached in an in-memory buffer - * pointed to by @attached_buffer. */ + /* The stream is directly attached in the in-memory buffer pointed to by + * @attached_buffer. */ RESOURCE_IN_ATTACHED_BUFFER, #ifdef WITH_FUSE - /* The stream resource is located in an external file in the staging - * directory for a read-write mount. */ + /* The stream is located in the external file named by + * @staging_file_name, located in the staging directory for a read-write + * mount. */ RESOURCE_IN_STAGING_FILE, #endif #ifdef WITH_NTFS_3G - /* The stream resource is located in an NTFS volume. It is identified - * by volume, filename, data stream name, and by whether it is a reparse - * point or not. @ntfs_loc points to a structure containing this - * information. */ + /* The stream is located in an NTFS volume. It is identified by volume, + * filename, data stream name, and by whether it is a reparse point or + * not. @ntfs_loc points to a structure containing this information. + * */ RESOURCE_IN_NTFS_VOLUME, #endif #ifdef __WIN32__ - /* Resource must be accessed using Win32 API (may be a named data - * stream) */ - RESOURCE_WIN32, - - /* Windows only: the file is on disk in the file named @file_on_disk, - * but the file is encrypted and must be read using special functions. - * */ + /* Windows only: the stream is located in the external file named by + * @file_on_disk, which is in the Windows NT namespace and may specify a + * named data stream. */ + RESOURCE_IN_WINNT_FILE_ON_DISK, + + /* Windows only: the stream is located in the external file named by + * @file_on_disk, but the file is encrypted and must be read using the + * appropriate Windows API. */ RESOURCE_WIN32_ENCRYPTED, #endif +}; +struct stream_owner { + struct wim_inode *inode; + const utf16lechar *stream_name; }; -/* - * An entry in the lookup table in the WIM file. +/* Specification for a stream, which may be the contents of a file (unnamed data + * stream), a named data stream, reparse point data, or a WIM metadata resource. * - * It is used to find data streams for files in the WIM. - * - * Metadata resources and reparse point data buffers will also have lookup table - * entries associated with the data. - * - * The lookup_table_entry for a given dentry or alternate stream entry in the - * WIM is found using the SHA1 message digest field. - */ + * One instance of this structure is created for each entry in the WIM's lookup + * table, hence the name of the struct. Each of these entries contains the SHA1 + * message digest of a stream and the location of the stream data in the WIM + * file (size, location, flags). The in-memory lookup table is a map from SHA1 + * message digests to stream locations. */ struct wim_lookup_table_entry { - /* List of lookup table entries in this hash bucket */ + /* List node for a hash bucket of the lookup table. */ struct hlist_node hash_list; - /* Location and size of the stream in the WIM, whether it is compressed - * or not, and whether it's a metadata resource or not. This is an - * on-disk field. */ - struct resource_entry resource_entry; + /* Uncompressed size of this stream. */ + u64 size; - /* Specifies which part of the split WIM the resource is located in. - * This is on on-disk field. - * - * In stand-alone WIMs, this must be 1. - * - * In split WIMs, every split WIM part has its own lookup table, and in - * read_lookup_table() it's currently expected that the part number of - * each lookup table entry in a split WIM part's lookup table is the - * same as the part number of that split WIM part. So this makes this - * field redundant since we store a pointer to the corresponding - * WIMStruct in the lookup table entry anyway. - */ - u16 part_number; + /* Stream flags (WIM_RESHDR_FLAG_*). */ + u32 flags : 8; - /* One of the `enum resource_location' values documented above. */ - u16 resource_location : 5; - - /* 1 if this stream is a unique size (only set while writing streams). */ - u16 unique_size : 1; + /* One of the `enum resource_location' values documented above. */ + u32 resource_location : 4; /* 1 if this stream has not had a SHA1 message digest calculated for it - * yet */ - u16 unhashed : 1; + * yet. */ + u32 unhashed : 1; - u16 deferred : 1; + /* Temoorary fields used when writing streams; set as documented for + * prepare_stream_list_for_write(). */ + u32 unique_size : 1; + u32 will_be_in_output_wim : 1; - u16 no_progress : 1; + /* Set to 1 when a metadata entry has its checksum changed; in such + * cases the hash cannot be used to verify the data if the metadata + * resource is read again. (This could be avoided if we used separate + * fields for input/output checksum, but most stream entries wouldn't + * need this.) */ + u32 dont_check_metadata_hash : 1; - /* If resource_location == RESOURCE_IN_WIM, this will be a cached value - * that specifies the compression type of this stream as one of - * WIMLIB_COMPRESSION_TYPE_*. Otherwise this will be 0, which is the - * same as WIMLIB_COMPRESSION_TYPE_NONE. */ - u16 compression_type : 2; + u32 may_send_done_with_file : 1; - /* (On-disk field) - * Number of times this lookup table entry is referenced by dentries. - * Unfortunately, this field is not always set correctly in Microsoft's - * WIMs, so we have no choice but to fix it if more references to the - * lookup table entry are found than stated here. */ - u32 refcnt; + /* Only used by wimlib_export_image() */ + u32 was_exported : 1; union { /* (On-disk field) SHA1 message digest of the stream referenced - * by this lookup table entry */ + * by this lookup table entry. */ u8 hash[SHA1_HASH_SIZE]; /* First 4 or 8 bytes of the SHA1 message digest, used for * inserting the entry into the hash table. Since the SHA1 * message digest can be considered random, we don't really need * the full 20 byte hash just to insert the entry in a hash - * table. */ + * table. */ size_t hash_short; /* Unhashed entries only (unhashed == 1): these variables make @@ -185,147 +134,155 @@ struct wim_lookup_table_entry { }; }; - /* When a WIM file is written, out_refcnt starts at 0 and is incremented - * whenever the stream pointed to by this lookup table entry needs to be - * written. The stream only need to be written when out_refcnt is - * nonzero, since otherwise it is not referenced by any dentries. */ + /* Number of times this lookup table entry is referenced by dentries in + * the WIM. When a WIM's lookup table is read, this field is + * initialized from a corresponding entry. + * + * However, see lte_decrement_refcnt() for information about the + * limitations of this field. */ + u32 refcnt; + + /* When a WIM file is written, this is set to the number of references + * (by dentries) to this stream in the output WIM file. + * + * During extraction, this is the number of slots in stream_owners (or + * inline_stream_owners) that have been filled. + * + * During image export, this is set to the number of references of this + * stream that originated from the source WIM. + * + * When mounting a WIM image read-write, this is set to the number of + * extra references to this stream preemptively taken to allow later + * saving the modified image as a new image and leaving the original + * image alone. */ u32 out_refcnt; - /* Pointers to somewhere where the stream is actually located. See the - * comments for the @resource_location field above. */ +#ifdef WITH_FUSE + /* Number of open file descriptors to this stream during a FUSE mount of + * the containing image. */ + u16 num_opened_fds; +#endif + + /* Specification of where this stream is actually located. Which member + * is valid is determined by the @resource_location field. */ union { - WIMStruct *wim; - tchar *file_on_disk; + struct { + struct wim_resource_spec *rspec; + u64 offset_in_res; + }; + struct { + tchar *file_on_disk; + struct wim_inode *file_inode; + }; void *attached_buffer; #ifdef WITH_FUSE - tchar *staging_file_name; + struct { + char *staging_file_name; + int staging_dir_fd; + }; #endif #ifdef WITH_NTFS_3G struct ntfs_location *ntfs_loc; #endif }; - /* Actual reference count to this stream (only used while - * verifying an image). */ - u32 real_refcnt; - - union { - #ifdef WITH_FUSE - /* Number of times this stream has been opened (used only during - * mounting) */ - u16 num_opened_fds; - #endif - - /* This field is used for the special hardlink or symlink image - * extraction mode. In these mode, all identical files are linked - * together, and @extracted_file will be set to the filename of the - * first extracted file containing this stream. */ - tchar *extracted_file; - }; + /* Links together streams that share the same underlying WIM resource. + * The head is the `stream_list' member of `struct wim_resource_spec'. + */ + struct list_head rspec_node; + /* Temporary fields */ union { - /* When a WIM file is written, @output_resource_entry is filled - * in with the resource entry for the output WIM. This will not - * necessarily be the same as the @resource_entry since: - * - The stream may have a different offset in the new WIM - * - The stream may have a different compressed size in the new - * WIM if the compression type changed - */ - struct resource_entry output_resource_entry; - + /* Fields used temporarily during WIM file writing. */ struct { - struct list_head msg_list; - struct list_head being_compressed_list; - }; - struct list_head lte_dentry_list; + union { + /* List node used for stream size table. */ + struct hlist_node hash_list_2; + + /* Metadata for the underlying solid resource in + * the WIM being written (only valid if + * WIM_RESHDR_FLAG_SOLID set in + * out_reshdr.flags). */ + struct { + u64 out_res_offset_in_wim; + u64 out_res_size_in_wim; + u64 out_res_uncompressed_size; + }; + }; + + /* Links streams being written to the WIM. */ + struct list_head write_streams_list; - struct { - struct hlist_node hash_list_2; + union { + /* Metadata for this stream in the WIM being + * written. */ + struct wim_reshdr out_reshdr; + + struct { + /* Name under which this stream is being + * sorted; used only when sorting + * streams for solid compression. */ + utf16lechar *solid_sort_name; + size_t solid_sort_name_nbytes; + }; + }; + }; - struct list_head write_streams_list; + /* Used temporarily during extraction. This is an array of + * pointers to the inodes being extracted that use this stream. + */ + union { + /* Inodes to extract that reference this stream. + * out_refcnt tracks the number of slots filled. */ + struct stream_owner inline_stream_owners[3]; + struct { + struct stream_owner *stream_owners; + u32 alloc_stream_owners; + }; }; }; - /* Temporary list fields */ + /* Temporary list fields. */ union { - struct list_head unhashed_list; - struct list_head swm_stream_list; + /* Links streams for writing lookup table. */ struct list_head lookup_table_list; - struct list_head extraction_list; - struct list_head export_stream_list; - }; -}; -static inline u64 -wim_resource_size(const struct wim_lookup_table_entry *lte) -{ - return lte->resource_entry.original_size; -} + /* Links streams being extracted. */ + struct list_head extraction_list; -static inline u64 -wim_resource_chunks(const struct wim_lookup_table_entry *lte) -{ - return DIV_ROUND_UP(wim_resource_size(lte), WIM_CHUNK_SIZE); -} + /* Links streams being exported. */ + struct list_head export_stream_list; -static inline u64 -wim_resource_compressed_size(const struct wim_lookup_table_entry *lte) -{ - return lte->resource_entry.size; -} + /* Links original list of streams in the read-write mounted image. */ + struct list_head orig_stream_list; + }; -static inline int -wim_resource_compression_type(const struct wim_lookup_table_entry *lte) -{ - BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0); - return lte->compression_type; -} + /* Links streams that are still unhashed after being been added to a + * WIM. */ + struct list_head unhashed_list; +}; -static inline bool -lte_filename_valid(const struct wim_lookup_table_entry *lte) -{ - return 0 - #ifdef __WIN32__ - || lte->resource_location == RESOURCE_WIN32 - || lte->resource_location == RESOURCE_WIN32_ENCRYPTED - #else - || lte->resource_location == RESOURCE_IN_FILE_ON_DISK - #endif - #ifdef WITH_FUSE - || lte->resource_location == RESOURCE_IN_STAGING_FILE - #endif - ; -} +/* Functions to allocate and free lookup tables */ extern struct wim_lookup_table * new_lookup_table(size_t capacity) _malloc_attribute; -extern int -read_lookup_table(WIMStruct *w); +extern void +free_lookup_table(struct wim_lookup_table *table); -extern int -write_lookup_table(WIMStruct *w, int image, struct resource_entry *out_res_entry); +/* Functions to read or write the lookup table from/to a WIM file */ extern int -write_lookup_table_from_stream_list(struct list_head *stream_list, - int out_fd, - struct resource_entry *out_res_entry); +read_wim_lookup_table(WIMStruct *wim); -extern void -free_lookup_table(struct wim_lookup_table *table); - -extern void -lookup_table_insert(struct wim_lookup_table *table, struct wim_lookup_table_entry *lte); +extern int +write_wim_lookup_table_from_stream_list(struct list_head *stream_list, + struct filedes *out_fd, + u16 part_number, + struct wim_reshdr *out_reshdr, + int write_resource_flags); -/* Unlinks a lookup table entry from the table; does not free it. */ -static inline void -lookup_table_unlink(struct wim_lookup_table *table, struct wim_lookup_table_entry *lte) -{ - wimlib_assert(!lte->unhashed); - hlist_del(<e->hash_list); - wimlib_assert(table->num_entries != 0); - table->num_entries--; -} +/* Functions to create, clone, print, and free lookup table entries */ extern struct wim_lookup_table_entry * new_lookup_table_entry(void) _malloc_attribute; @@ -335,161 +292,120 @@ clone_lookup_table_entry(const struct wim_lookup_table_entry *lte) _malloc_attribute; extern void -print_lookup_table_entry(const struct wim_lookup_table_entry *entry, - FILE *out); +lte_decrement_refcnt(struct wim_lookup_table_entry *lte, + struct wim_lookup_table *table); +#ifdef WITH_FUSE +extern void +lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte); +#endif extern void free_lookup_table_entry(struct wim_lookup_table_entry *lte); +/* Functions to insert and delete entries from a lookup table */ + +extern void +lookup_table_insert(struct wim_lookup_table *table, + struct wim_lookup_table_entry *lte); + +extern void +lookup_table_unlink(struct wim_lookup_table *table, + struct wim_lookup_table_entry *lte); + +/* Function to lookup a stream by SHA1 message digest */ +extern struct wim_lookup_table_entry * +lookup_stream(const struct wim_lookup_table *table, const u8 hash[]); + +/* Functions to iterate through the entries of a lookup table */ + extern int for_lookup_table_entry(struct wim_lookup_table *table, int (*visitor)(struct wim_lookup_table_entry *, void *), void *arg); -extern int -cmp_streams_by_wim_position(const void *p1, const void *p2); - extern int for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table, int (*visitor)(struct wim_lookup_table_entry *, void *), void *arg); -extern struct wim_lookup_table_entry * -__lookup_resource(const struct wim_lookup_table *table, const u8 hash[]); -extern int -lookup_resource(WIMStruct *w, const tchar *path, - int lookup_flags, struct wim_dentry **dentry_ret, - struct wim_lookup_table_entry **lte_ret, u16 *stream_idx_ret); -extern void -lte_decrement_refcnt(struct wim_lookup_table_entry *lte, - struct wim_lookup_table *table); -#ifdef WITH_FUSE -extern void -lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte); -#endif +/* Function to get a resource entry in stable format */ -extern int -lte_zero_out_refcnt(struct wim_lookup_table_entry *entry, void *ignore); +struct wimlib_resource_entry; -extern int -lte_zero_real_refcnt(struct wim_lookup_table_entry *entry, void *ignore); +extern void +lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte, + struct wimlib_resource_entry *wentry); +/* Functions to sort a list of lookup table entries */ extern int -lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *ignore); +sort_stream_list(struct list_head *stream_list, + size_t list_head_offset, + int (*compar)(const void *, const void*)); extern int -inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table); +sort_stream_list_by_sequential_order(struct list_head *stream_list, + size_t list_head_offset); -extern void -inode_unresolve_ltes(struct wim_inode *inode); +extern int +cmp_streams_by_sequential_order(const void *p1, const void *p2); -static inline struct wim_lookup_table_entry * -inode_stream_lte_resolved(const struct wim_inode *inode, unsigned stream_idx) -{ - wimlib_assert(inode->i_resolved); - wimlib_assert(stream_idx <= inode->i_num_ads); - if (stream_idx == 0) - return inode->i_lte; - else - return inode->i_ads_entries[stream_idx - 1].lte; -} +/* Utility functions */ -static inline struct wim_lookup_table_entry * -inode_stream_lte_unresolved(const struct wim_inode *inode, unsigned stream_idx, - const struct wim_lookup_table *table) -{ - wimlib_assert(!inode->i_resolved); - wimlib_assert(stream_idx <= inode->i_num_ads); - if (!table) - return NULL; - if (stream_idx == 0) - return __lookup_resource(table, inode->i_hash); - else - return __lookup_resource(table, - inode->i_ads_entries[ - stream_idx - 1].hash); -} - -extern struct wim_lookup_table_entry * -inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx, - const struct wim_lookup_table *table); +extern int +lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *ignore); -static inline const u8 * -inode_stream_hash_unresolved(const struct wim_inode *inode, unsigned stream_idx) +static inline bool +lte_is_partial(const struct wim_lookup_table_entry * lte) { - wimlib_assert(!inode->i_resolved); - wimlib_assert(stream_idx <= inode->i_num_ads); - if (stream_idx == 0) - return inode->i_hash; - else - return inode->i_ads_entries[stream_idx - 1].hash; + return lte->resource_location == RESOURCE_IN_WIM && + lte->size != lte->rspec->uncompressed_size; } - -static inline const u8 * -inode_stream_hash_resolved(const struct wim_inode *inode, unsigned stream_idx) +static inline const struct stream_owner * +stream_owners(struct wim_lookup_table_entry *stream) { - struct wim_lookup_table_entry *lte; - lte = inode_stream_lte_resolved(inode, stream_idx); - if (lte) - return lte->hash; + if (stream->out_refcnt <= ARRAY_LEN(stream->inline_stream_owners)) + return stream->inline_stream_owners; else - return zero_hash; + return stream->stream_owners; } -/* - * Returns the hash for stream @stream_idx of the inode, where stream_idx = 0 - * means the default un-named file stream, and stream_idx >= 1 corresponds to an - * alternate data stream. - * - * This works for both resolved and un-resolved dentries. - */ -static inline const u8 * -inode_stream_hash(const struct wim_inode *inode, unsigned stream_idx) +static inline void +lte_bind_wim_resource_spec(struct wim_lookup_table_entry *lte, + struct wim_resource_spec *rspec) { - if (inode->i_resolved) - return inode_stream_hash_resolved(inode, stream_idx); - else - return inode_stream_hash_unresolved(inode, stream_idx); + lte->resource_location = RESOURCE_IN_WIM; + lte->rspec = rspec; + list_add_tail(<e->rspec_node, &rspec->stream_list); } -static inline u16 -inode_stream_name_nbytes(const struct wim_inode *inode, unsigned stream_idx) +static inline void +lte_unbind_wim_resource_spec(struct wim_lookup_table_entry *lte) { - wimlib_assert(stream_idx <= inode->i_num_ads); - if (stream_idx == 0) - return 0; - else - return inode->i_ads_entries[stream_idx - 1].stream_name_nbytes; + list_del(<e->rspec_node); + lte->resource_location = RESOURCE_NONEXISTENT; } -extern struct wim_lookup_table_entry * -inode_unnamed_lte_resolved(const struct wim_inode *inode); - -extern struct wim_lookup_table_entry * -inode_unnamed_lte_unresolved(const struct wim_inode *inode, - const struct wim_lookup_table *table); +extern void +lte_put_resource(struct wim_lookup_table_entry *lte); extern struct wim_lookup_table_entry * -inode_unnamed_lte(const struct wim_inode *inode, const struct wim_lookup_table *table); - -extern u64 -lookup_table_total_stream_size(struct wim_lookup_table *table); - +new_stream_from_data_buffer(const void *buffer, size_t size, + struct wim_lookup_table *lookup_table); static inline void -lookup_table_insert_unhashed(struct wim_lookup_table *table, - struct wim_lookup_table_entry *lte, - struct wim_inode *back_inode, - u32 back_stream_id) +add_unhashed_stream(struct wim_lookup_table_entry *lte, + struct wim_inode *back_inode, + u32 back_stream_id, + struct list_head *unhashed_streams) { lte->unhashed = 1; lte->back_inode = back_inode; lte->back_stream_id = back_stream_id; - list_add_tail(<e->unhashed_list, table->unhashed_streams); + list_add_tail(<e->unhashed_list, unhashed_streams); } extern int