#ifndef _WIMLIB_LOOKUP_TABLE_H
#define _WIMLIB_LOOKUP_TABLE_H
-#include "wimlib/assert.h"
-#include "wimlib/dentry.h"
#include "wimlib/list.h"
+#include "wimlib/resource.h"
#include "wimlib/sha1.h"
#include "wimlib/types.h"
-#include "wimlib/wim.h"
-
-#define LOOKUP_FLAG_ADS_OK 0x00000001
-#define LOOKUP_FLAG_DIRECTORY_OK 0x00000002
-
-
-/* The lookup table of a WIM file maps SHA1 message digests to streams of data.
- * Here, the in-memory structure is implemented as a hash table.
- *
- * Given a SHA1 message digest, the mapped-to stream is specified by an offset
- * in the WIM, an uncompressed and compressed size, and resource flags (see
- * 'struct resource_entry'). But, we associate additional information, such as
- * a reference count, with each stream, so the actual mapping is from SHA1
- * message digests to 'struct wim_lookup_table_entry's, each of which contains
- * an embedded 'struct resource_entry'.
- *
- * Note: Everything will break horribly if there is a SHA1 collision.
- */
-struct wim_lookup_table {
- struct hlist_head *array;
- u64 num_entries;
- u64 capacity;
- struct list_head *unhashed_streams;
-};
-
-#ifdef WITH_NTFS_3G
-
-struct _ntfs_volume;
-
-struct ntfs_location {
- tchar *path;
- utf16lechar *stream_name;
- u16 stream_name_nchars;
- struct _ntfs_volume *ntfs_vol;
- bool is_reparse_point;
-};
-#endif
/* An enumerated type that identifies where the stream corresponding to this
* lookup table entry is actually located.
* RESOURCE_IN_WIM since all the streams will initially be located in the WIM.
* However, to handle situations such as image capture and image mount, we allow
* the actual location of the stream to be somewhere else, such as an external
- * file.
- */
+ * file. */
enum resource_location {
/* The lookup table entry does not yet correspond to a stream; this is a
* temporary state only. */
* `struct wim_resource_spec' pointed to by @rspec. @offset_in_res
* identifies the offset at which this particular stream begins in the
* uncompressed data of the resource; this is normally 0, but in general
- * a WIM resource may contain multiple streams. */
+ * a WIM resource may be "packed" and potentially contain multiple
+ * streams. */
RESOURCE_IN_WIM,
/* The stream is located in the external file named by @file_on_disk.
- * On Windows, @file_on_disk may actually specify a named data stream.
*/
RESOURCE_IN_FILE_ON_DISK,
#endif
#ifdef __WIN32__
+ /* Windows only: the stream is located in the external file named by
+ * @file_on_disk, which is in the Windows NT namespace and may specify a
+ * named data stream. */
+ RESOURCE_IN_WINNT_FILE_ON_DISK,
+
/* Windows only: the stream is located in the external file named by
* @file_on_disk, but the file is encrypted and must be read using the
* appropriate Windows API. */
RESOURCE_WIN32_ENCRYPTED,
#endif
+};
+struct stream_owner {
+ struct wim_inode *inode;
+ const utf16lechar *stream_name;
};
-/*
- * An entry in the lookup table in the WIM file.
+/* Specification for a stream, which may be the contents of a file (unnamed data
+ * stream), a named data stream, reparse point data, or a WIM metadata resource.
*
- * It is used to find data streams for files in the WIM.
- *
- * Metadata resources and reparse point data buffers will also have lookup table
- * entries associated with the data.
- *
- * The lookup_table_entry for a given dentry or alternate stream entry in the
- * WIM is found using the SHA1 message digest field.
- */
+ * One instance of this structure is created for each entry in the WIM's lookup
+ * table, hence the name of the struct. Each of these entries contains the SHA1
+ * message digest of a stream and the location of the stream data in the WIM
+ * file (size, location, flags). The in-memory lookup table is a map from SHA1
+ * message digests to stream locations. */
struct wim_lookup_table_entry {
- /* List of lookup table entries in this hash bucket */
+ /* List node for a hash bucket of the lookup table. */
struct hlist_node hash_list;
- /* Uncompressed size of the stream. */
+ /* Uncompressed size of this stream. */
u64 size;
/* Stream flags (WIM_RESHDR_FLAG_*). */
- u16 flags : 8;
-
- /* One of the `enum resource_location' values documented above. */
- u16 resource_location : 5;
+ u32 flags : 8;
- /* 1 if this stream is a unique size (only set while writing streams). */
- u16 unique_size : 1;
+ /* One of the `enum resource_location' values documented above. */
+ u32 resource_location : 4;
/* 1 if this stream has not had a SHA1 message digest calculated for it
- * yet */
- u16 unhashed : 1;
+ * yet. */
+ u32 unhashed : 1;
- u16 deferred : 1;
-
- u16 no_progress : 1;
+ /* Temoorary fields used when writing streams; set as documented for
+ * prepare_stream_list_for_write(). */
+ u32 unique_size : 1;
+ u32 will_be_in_output_wim : 1;
/* Set to 1 when a metadata entry has its checksum changed; in such
- * cases the hash is no longer valid to verify the data if the metadata
- * resource is read again. */
- u16 dont_check_metadata_hash : 1;
-
- /* Only used during WIM write. Normal value is 0 (resource not
- * filtered). */
- u16 filtered : 2;
-#define FILTERED_SAME_WIM 0x1 /* Resource already in same WIM */
-#define FILTERED_EXTERNAL_WIM 0x2 /* Resource already in external WIM */
-
- /* (On-disk field)
- * Number of times this lookup table entry is referenced by dentries.
- * Unfortunately, this field is not always set correctly in Microsoft's
- * WIMs, so we have no choice but to fix it if more references to the
- * lookup table entry are found than stated here. */
- u32 refcnt;
+ * cases the hash cannot be used to verify the data if the metadata
+ * resource is read again. (This could be avoided if we used separate
+ * fields for input/output checksum, but most stream entries wouldn't
+ * need this.) */
+ u32 dont_check_metadata_hash : 1;
+
+ u32 may_send_done_with_file : 1;
union {
/* (On-disk field) SHA1 message digest of the stream referenced
};
};
- /* When a WIM file is written, out_refcnt starts at 0 and is incremented
- * whenever the stream pointed to by this lookup table entry needs to be
- * written. The stream only need to be written when out_refcnt is
- * nonzero, since otherwise it is not referenced by any dentries. */
+ /* Number of times this lookup table entry is referenced by dentries in
+ * the WIM. When a WIM's lookup table is read, this field is
+ * initialized from a corresponding entry; while it should be correct,
+ * in general it may not be. wim_recalculate_refcnts() recalculates the
+ * reference counts for all streams and is run before doing any
+ * deletions. */
+ u32 refcnt;
+
+ /* When a WIM file is written, this is set to the number of references
+ * (by dentries) to this stream in the output WIM file.
+ *
+ * During extraction, this is the number of slots in stream_owners (or
+ * inline_stream_owners) that have been filled.
+ *
+ * During image export, this is set to the number of references of this
+ * stream that originated from the source WIM.
+ *
+ * When mounting a WIM image read-write, this is set to the number of
+ * extra references to this stream preemptively taken to allow later
+ * saving the modified image as a new image and leaving the original
+ * image alone. */
u32 out_refcnt;
- /* Pointers to somewhere where the stream is actually located. See the
- * comments for the @resource_location field above. */
+#ifdef WITH_FUSE
+ /* Number of open file descriptors to this stream during a FUSE mount of
+ * the containing image. */
+ u16 num_opened_fds;
+#endif
+
+ /* Specification of where this stream is actually located. Which member
+ * is valid is determined by the @resource_location field. */
union {
struct {
struct wim_resource_spec *rspec;
u64 offset_in_res;
};
- tchar *file_on_disk;
+ struct {
+ tchar *file_on_disk;
+ struct wim_inode *file_inode;
+ };
void *attached_buffer;
#ifdef WITH_FUSE
- tchar *staging_file_name;
+ struct {
+ char *staging_file_name;
+ int staging_dir_fd;
+ };
#endif
#ifdef WITH_NTFS_3G
struct ntfs_location *ntfs_loc;
#endif
};
- /* Actual reference count to this stream (only used while
- * verifying an image). */
- u32 real_refcnt;
-
- union {
- #ifdef WITH_FUSE
- /* Number of times this stream has been opened (used only during
- * mounting) */
- u16 num_opened_fds;
- #endif
-
- /* This field is used for the special hardlink or symlink image
- * extraction mode. In these mode, all identical files are linked
- * together, and @extracted_file will be set to the filename of the
- * first extracted file containing this stream. */
- tchar *extracted_file;
- };
+ /* Links together streams that share the same underlying WIM resource.
+ * The head is the `stream_list' member of `struct wim_resource_spec'.
+ */
+ struct list_head rspec_node;
/* Temporary fields */
union {
- /* Used temporarily during WIM file writing */
+ /* Fields used temporarily during WIM file writing. */
struct {
- struct hlist_node hash_list_2;
+ union {
+ /* List node used for stream size table. */
+ struct hlist_node hash_list_2;
+
+ /* Metadata for the underlying packed resource
+ * in the WIM being written (only valid if
+ * WIM_RESHDR_FLAG_PACKED_STREAMS set in
+ * out_reshdr.flags). */
+ struct {
+ u64 out_res_offset_in_wim;
+ u64 out_res_size_in_wim;
+ u64 out_res_uncompressed_size;
+ };
+ };
/* Links streams being written to the WIM. */
struct list_head write_streams_list;
- };
- /* Used temporarily during WIM file writing (after above) */
- struct {
- struct list_head msg_list;
- struct list_head being_compressed_list;
+ /* Metadata for this stream in the WIM being written.
+ */
+ struct wim_reshdr out_reshdr;
};
- /* When a WIM file is written, @output_reshdr is filled in with
- * the resource header for the output WIM. */
- struct wim_reshdr out_reshdr;
-
- /* Used temporarily during extraction */
+ /* Used temporarily during extraction. This is an array of
+ * pointers to the inodes being extracted that use this stream.
+ */
union {
- /* out_refcnt tracks number of slots filled */
- struct wim_dentry *inline_lte_dentries[4];
+ /* Inodes to extract that reference this stream.
+ * out_refcnt tracks the number of slots filled. */
+ struct stream_owner inline_stream_owners[3];
struct {
- struct wim_dentry **lte_dentries;
- unsigned long alloc_lte_dentries;
+ struct stream_owner *stream_owners;
+ u32 alloc_stream_owners;
};
};
+
+ /* Actual reference count to this stream (only used while
+ * verifying an image). */
+ u32 real_refcnt;
};
- /* Temporary list fields */
+ /* Temporary list fields. */
union {
- /* Links streams when writing lookup table. */
+ /* Links streams for writing lookup table. */
struct list_head lookup_table_list;
/* Links streams being extracted. */
/* Links streams being exported. */
struct list_head export_stream_list;
+
+ /* Links original list of streams in the read-write mounted image. */
+ struct list_head orig_stream_list;
};
- /* Links streams that are still unhashed after being been added
- * to a WIM. */
+ /* Links streams that are still unhashed after being been added to a
+ * WIM. */
struct list_head unhashed_list;
-
- struct list_head wim_resource_list;
};
-static inline bool
-lte_is_partial(const struct wim_lookup_table_entry * lte)
-{
- return lte->resource_location == RESOURCE_IN_WIM &&
- lte->size != lte->rspec->uncompressed_size;
-}
-
-static inline bool
-lte_filename_valid(const struct wim_lookup_table_entry *lte)
-{
- return lte->resource_location == RESOURCE_IN_FILE_ON_DISK
- #ifdef __WIN32__
- || lte->resource_location == RESOURCE_WIN32_ENCRYPTED
- #endif
- #ifdef WITH_FUSE
- || lte->resource_location == RESOURCE_IN_STAGING_FILE
- #endif
- ;
-}
+/* Functions to allocate and free lookup tables */
extern struct wim_lookup_table *
new_lookup_table(size_t capacity) _malloc_attribute;
+extern void
+free_lookup_table(struct wim_lookup_table *table);
+
+/* Functions to read or write the lookup table from/to a WIM file */
+
extern int
read_wim_lookup_table(WIMStruct *wim);
extern int
-write_wim_lookup_table(WIMStruct *wim, int image, int write_flags,
- struct wim_reshdr *out_reshdr,
- struct list_head *stream_list_override);
-
-extern void
-free_lookup_table(struct wim_lookup_table *table);
+write_wim_lookup_table_from_stream_list(struct list_head *stream_list,
+ struct filedes *out_fd,
+ u16 part_number,
+ struct wim_reshdr *out_reshdr,
+ int write_resource_flags);
-extern void
-lookup_table_insert(struct wim_lookup_table *table, struct wim_lookup_table_entry *lte);
-
-/* Unlinks a lookup table entry from the table; does not free it. */
-static inline void
-lookup_table_unlink(struct wim_lookup_table *table, struct wim_lookup_table_entry *lte)
-{
- wimlib_assert(!lte->unhashed);
- hlist_del(<e->hash_list);
- wimlib_assert(table->num_entries != 0);
- table->num_entries--;
-}
+/* Functions to create, clone, print, and free lookup table entries */
extern struct wim_lookup_table_entry *
new_lookup_table_entry(void) _malloc_attribute;
_malloc_attribute;
extern void
-print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out);
+lte_decrement_refcnt(struct wim_lookup_table_entry *lte,
+ struct wim_lookup_table *table);
+#ifdef WITH_FUSE
+extern void
+lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte);
+#endif
extern void
free_lookup_table_entry(struct wim_lookup_table_entry *lte);
+/* Functions to insert and delete entries from a lookup table */
+
extern void
-lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte,
- struct wimlib_resource_entry *wentry);
+lookup_table_insert(struct wim_lookup_table *table,
+ struct wim_lookup_table_entry *lte);
+
+extern void
+lookup_table_unlink(struct wim_lookup_table *table,
+ struct wim_lookup_table_entry *lte);
+
+/* Function to lookup a stream by SHA1 message digest */
+extern struct wim_lookup_table_entry *
+lookup_stream(const struct wim_lookup_table *table, const u8 hash[]);
+
+/* Functions to iterate through the entries of a lookup table */
extern int
for_lookup_table_entry(struct wim_lookup_table *table,
int (*visitor)(struct wim_lookup_table_entry *, void *),
void *arg);
-extern int
-sort_stream_list_by_sequential_order(struct list_head *stream_list,
- size_t list_head_offset);
-
extern int
for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table,
int (*visitor)(struct wim_lookup_table_entry *,
void *),
void *arg);
-extern struct wim_lookup_table_entry *
-lookup_resource(const struct wim_lookup_table *table, const u8 hash[]);
-extern int
-wim_pathname_to_stream(WIMStruct *wim, const tchar *path,
- int lookup_flags,
- struct wim_dentry **dentry_ret,
- struct wim_lookup_table_entry **lte_ret,
- u16 *stream_idx_ret);
-extern void
-lte_decrement_refcnt(struct wim_lookup_table_entry *lte,
- struct wim_lookup_table *table);
-#ifdef WITH_FUSE
-extern void
-lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte);
-#endif
+/* Function to get a resource entry in stable format */
-extern int
-lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *ignore);
+struct wimlib_resource_entry;
-extern int
-lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *ignore);
+extern void
+lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte,
+ struct wimlib_resource_entry *wentry);
+/* Functions to sort a list of lookup table entries */
extern int
-lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *ignore);
+sort_stream_list(struct list_head *stream_list,
+ size_t list_head_offset,
+ int (*compar)(const void *, const void*));
-static inline void
-lte_bind_wim_resource_spec(struct wim_lookup_table_entry *lte,
- struct wim_resource_spec *rspec)
-{
- lte->resource_location = RESOURCE_IN_WIM;
- lte->rspec = rspec;
- list_add_tail(<e->wim_resource_list, &rspec->stream_list);
-}
+extern int
+sort_stream_list_by_sequential_order(struct list_head *stream_list,
+ size_t list_head_offset);
-static inline void
-lte_unbind_wim_resource_spec(struct wim_lookup_table_entry *lte)
-{
- list_del(<e->wim_resource_list);
- lte->rspec = NULL;
- lte->resource_location = RESOURCE_NONEXISTENT;
-}
+/* Utility functions */
extern int
-inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table,
- bool force);
+lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *ignore);
extern int
-resource_not_found_error(const struct wim_inode *inode, const u8 *hash);
-
-extern void
-inode_unresolve_ltes(struct wim_inode *inode);
-
-static inline struct wim_lookup_table_entry *
-inode_stream_lte_resolved(const struct wim_inode *inode, unsigned stream_idx)
-{
- wimlib_assert(inode->i_resolved);
- wimlib_assert(stream_idx <= inode->i_num_ads);
- if (stream_idx == 0)
- return inode->i_lte;
- else
- return inode->i_ads_entries[stream_idx - 1].lte;
-}
+lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *ignore);
-static inline struct wim_lookup_table_entry *
-inode_stream_lte_unresolved(const struct wim_inode *inode, unsigned stream_idx,
- const struct wim_lookup_table *table)
+static inline bool
+lte_is_partial(const struct wim_lookup_table_entry * lte)
{
- wimlib_assert(!inode->i_resolved);
- wimlib_assert(stream_idx <= inode->i_num_ads);
- if (!table)
- return NULL;
- if (stream_idx == 0)
- return lookup_resource(table, inode->i_hash);
- else
- return lookup_resource(table,
- inode->i_ads_entries[
- stream_idx - 1].hash);
+ return lte->resource_location == RESOURCE_IN_WIM &&
+ lte->size != lte->rspec->uncompressed_size;
}
-extern struct wim_lookup_table_entry *
-inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx,
- const struct wim_lookup_table *table);
-
-static inline const u8 *
-inode_stream_hash_unresolved(const struct wim_inode *inode, unsigned stream_idx)
+static inline const struct stream_owner *
+stream_owners(struct wim_lookup_table_entry *stream)
{
- wimlib_assert(!inode->i_resolved);
- wimlib_assert(stream_idx <= inode->i_num_ads);
- if (stream_idx == 0)
- return inode->i_hash;
+ if (stream->out_refcnt <= ARRAY_LEN(stream->inline_stream_owners))
+ return stream->inline_stream_owners;
else
- return inode->i_ads_entries[stream_idx - 1].hash;
+ return stream->stream_owners;
}
-
-static inline const u8 *
-inode_stream_hash_resolved(const struct wim_inode *inode, unsigned stream_idx)
-{
- struct wim_lookup_table_entry *lte;
- lte = inode_stream_lte_resolved(inode, stream_idx);
- if (lte)
- return lte->hash;
- else
- return zero_hash;
-}
-
-/*
- * Returns the hash for stream @stream_idx of the inode, where stream_idx = 0
- * means the default un-named file stream, and stream_idx >= 1 corresponds to an
- * alternate data stream.
- *
- * This works for both resolved and un-resolved dentries.
- */
-static inline const u8 *
-inode_stream_hash(const struct wim_inode *inode, unsigned stream_idx)
+static inline void
+lte_bind_wim_resource_spec(struct wim_lookup_table_entry *lte,
+ struct wim_resource_spec *rspec)
{
- if (inode->i_resolved)
- return inode_stream_hash_resolved(inode, stream_idx);
- else
- return inode_stream_hash_unresolved(inode, stream_idx);
+ lte->resource_location = RESOURCE_IN_WIM;
+ lte->rspec = rspec;
+ list_add_tail(<e->rspec_node, &rspec->stream_list);
}
-static inline u16
-inode_stream_name_nbytes(const struct wim_inode *inode, unsigned stream_idx)
+static inline void
+lte_unbind_wim_resource_spec(struct wim_lookup_table_entry *lte)
{
- wimlib_assert(stream_idx <= inode->i_num_ads);
- if (stream_idx == 0)
- return 0;
- else
- return inode->i_ads_entries[stream_idx - 1].stream_name_nbytes;
+ list_del(<e->rspec_node);
+ lte->resource_location = RESOURCE_NONEXISTENT;
}
-extern struct wim_lookup_table_entry *
-inode_unnamed_stream_resolved(const struct wim_inode *inode, u16 *stream_idx_ret);
-
-extern struct wim_lookup_table_entry *
-inode_unnamed_lte_resolved(const struct wim_inode *inode);
-
-extern struct wim_lookup_table_entry *
-inode_unnamed_lte_unresolved(const struct wim_inode *inode,
- const struct wim_lookup_table *table);
+extern void
+lte_put_resource(struct wim_lookup_table_entry *lte);
extern struct wim_lookup_table_entry *
-inode_unnamed_lte(const struct wim_inode *inode, const struct wim_lookup_table *table);
-
-extern const u8 *
-inode_unnamed_stream_hash(const struct wim_inode *inode);
+new_stream_from_data_buffer(const void *buffer, size_t size,
+ struct wim_lookup_table *lookup_table);
static inline void
-lookup_table_insert_unhashed(struct wim_lookup_table *table,
- struct wim_lookup_table_entry *lte,
- struct wim_inode *back_inode,
- u32 back_stream_id)
+add_unhashed_stream(struct wim_lookup_table_entry *lte,
+ struct wim_inode *back_inode,
+ u32 back_stream_id,
+ struct list_head *unhashed_streams)
{
lte->unhashed = 1;
lte->back_inode = back_inode;
lte->back_stream_id = back_stream_id;
- list_add_tail(<e->unhashed_list, table->unhashed_streams);
+ list_add_tail(<e->unhashed_list, unhashed_streams);
}
extern int