1 #ifndef _WIMLIB_BLOB_TABLE_H
2 #define _WIMLIB_BLOB_TABLE_H
4 #include "wimlib/list.h"
5 #include "wimlib/resource.h"
6 #include "wimlib/sha1.h"
7 #include "wimlib/types.h"
9 /* An enumerated type that identifies where a blob's data is located. */
12 /* The blob's data does not exist. This is a temporary state only. */
15 /* The blob's data is located in a WIM resource identified by the
16 * `struct wim_resource_descriptor' pointed to by @rdesc.
17 * @offset_in_res identifies the offset at which this particular blob
18 * begins in the uncompressed data of the resource. */
21 /* The blob's data is available as the contents of the file named by
25 /* The blob's data is available as the contents of the in-memory buffer
26 * pointed to by @attached_buffer. */
27 BLOB_IN_ATTACHED_BUFFER,
30 /* The blob's data is available as the contents of the file with name
31 * @staging_file_name relative to the open directory file descriptor
37 /* The blob's data is available as the contents of an NTFS attribute
38 * accessible through libntfs-3g. The attribute is identified by
39 * volume, path to an inode, attribute name, and attribute type.
40 * @ntfs_loc points to a structure containing this information. */
45 /* Windows only: the blob's data is available as the contents of the
46 * data stream named by @file_on_disk. @file_on_disk is an NT namespace
47 * path that may be longer than the Win32-level MAX_PATH. Furthermore,
48 * the stream may require "backup semantics" to access. */
49 BLOB_IN_WINNT_FILE_ON_DISK,
51 /* Windows only: the blob's data is available as the raw encrypted data
52 * of the external file named by @file_on_disk. @file_on_disk is a
53 * Win32 namespace path. */
58 /* A "blob target" is a stream, and the inode to which that stream belongs, to
59 * which a blob needs to be extracted as part of an extraction operation. Since
60 * blobs are single-instanced, a blob may have multiple targets. */
61 struct blob_extraction_target {
62 struct wim_inode *inode;
63 struct wim_inode_stream *stream;
67 * Descriptor for a blob, which is a known length sequence of binary data.
69 * Within a WIM file, blobs are single instanced and are identified by SHA-1
72 struct blob_descriptor {
74 /* List node for a hash bucket of the blob table */
75 struct hlist_node hash_list;
77 /* Uncompressed size of this blob */
80 /* One of the `enum blob_location' values documented above. */
81 u32 blob_location : 4;
83 /* Blob flags (WIM_RESHDR_FLAG_*) */
86 /* 1 iff the SHA-1 message digest of this blob is unknown. */
89 /* Temporary fields used when writing blobs; set as documented for
90 * prepare_blob_list_for_write(). */
92 u32 will_be_in_output_wim : 1;
94 /* Set to 1 if this blob represents a metadata resource that has been
95 * changed. In such cases, the hash cannot be used to verify the data
96 * if the metadata resource is read again. (This could be avoided if we
97 * used separate fields for input/output checksum, but most blobs
98 * wouldn't need this.) */
99 u32 dont_check_metadata_hash : 1;
101 u32 may_send_done_with_file : 1;
103 /* Only used by wimlib_export_image() */
104 u32 was_exported : 1;
108 * For unhashed == 0: 'hash' is the SHA-1 message digest of the
109 * blob's data. 'hash_short' allows accessing just a prefix of
110 * the SHA-1 message digest, which is useful for getting a "hash
111 * code" for hash table lookup/insertion.
113 u8 hash[SHA1_HASH_SIZE];
116 /* For unhashed == 1: these variables make it possible to find
117 * the stream that references this blob. There can be at most
118 * one such reference, since duplicate blobs can only be joined
119 * after they have been hashed. */
121 struct wim_inode *back_inode;
126 /* Number of times this blob is referenced by file streams in WIM
127 * images. See blob_decrement_refcnt() for information about the
128 * limitations of this field. */
132 * When a WIM file is written, this is set to the number of references
133 * (from file streams) to this blob in the output WIM file.
135 * During extraction, this is set to the number of targets to which this
136 * blob is being extracted.
138 * During image export, this is set to the number of references of this
139 * blob that originated from the source WIM.
141 * When mounting a WIM image read-write, this is set to the number of
142 * extra references to this blob preemptively taken to allow later
143 * saving the modified image as a new image and leaving the original
149 /* Number of open file descriptors to this blob during a FUSE mount of
150 * the containing image. */
154 /* Specification of where this blob's data is located. Which member of
155 * this union is valid is determined by the @blob_location field. */
159 struct wim_resource_descriptor *rdesc;
163 /* BLOB_IN_FILE_ON_DISK
164 * BLOB_IN_WINNT_FILE_ON_DISK
165 * BLOB_WIN32_ENCRYPTED */
168 struct wim_inode *file_inode;
171 /* BLOB_IN_ATTACHED_BUFFER */
172 void *attached_buffer;
175 /* BLOB_IN_STAGING_FILE */
177 char *staging_file_name;
183 /* BLOB_IN_NTFS_VOLUME */
184 struct ntfs_location *ntfs_loc;
188 /* Links together blobs that share the same underlying WIM resource.
189 * The head is the 'blob_list' member of
190 * 'struct wim_resource_descriptor'. */
191 struct list_head rdesc_node;
193 /* Temporary fields */
195 /* Fields used temporarily during WIM file writing. */
198 /* List node used for blob size table. */
199 struct hlist_node hash_list_2;
201 /* Metadata for the underlying solid resource in
202 * the WIM being written (only valid if
203 * WIM_RESHDR_FLAG_SOLID set in
204 * out_reshdr.flags). */
206 u64 out_res_offset_in_wim;
207 u64 out_res_size_in_wim;
208 u64 out_res_uncompressed_size;
212 /* Links blobs being written to the WIM. */
213 struct list_head write_blobs_list;
216 /* Metadata for this blob in the WIM being
218 struct wim_reshdr out_reshdr;
221 /* Name under which this blob is being
222 * sorted; used only when sorting blobs
223 * for solid compression. */
224 utf16lechar *solid_sort_name;
225 size_t solid_sort_name_nbytes;
230 /* Used temporarily during extraction. This is an array of
231 * references to the streams being extracted that use this blob.
232 * out_refcnt tracks the number of slots filled. */
234 struct blob_extraction_target inline_blob_extraction_targets[3];
236 struct blob_extraction_target *blob_extraction_targets;
237 u32 alloc_blob_extraction_targets;
242 /* Temporary list fields. */
244 /* Links blobs for writing blob table. */
245 struct list_head blob_table_list;
247 /* Links blobs being extracted. */
248 struct list_head extraction_list;
250 /* Links blobs being exported. */
251 struct list_head export_blob_list;
253 /* Links original list of blobs in the read-write mounted image. */
254 struct list_head orig_blob_list;
257 /* Links blobs that are still unhashed after being been added to a WIM.
259 struct list_head unhashed_list;
262 extern struct blob_table *
263 new_blob_table(size_t capacity) _malloc_attribute;
266 free_blob_table(struct blob_table *table);
269 read_blob_table(WIMStruct *wim);
272 write_blob_table_from_blob_list(struct list_head *blob_list,
273 struct filedes *out_fd,
275 struct wim_reshdr *out_reshdr,
276 int write_resource_flags);
278 extern struct blob_descriptor *
279 new_blob_descriptor(void) _malloc_attribute;
281 extern struct blob_descriptor *
282 clone_blob_descriptor(const struct blob_descriptor *blob)
286 blob_decrement_refcnt(struct blob_descriptor *blob,
287 struct blob_table *table);
290 blob_decrement_num_opened_fds(struct blob_descriptor *blob);
294 free_blob_descriptor(struct blob_descriptor *blob);
297 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob);
300 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob);
302 extern struct blob_descriptor *
303 lookup_blob(const struct blob_table *table, const u8 *hash);
306 for_blob_in_table(struct blob_table *table,
307 int (*visitor)(struct blob_descriptor *, void *), void *arg);
310 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
311 int (*visitor)(struct blob_descriptor *, void *),
314 struct wimlib_resource_entry;
317 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
318 struct wimlib_resource_entry *wentry);
321 sort_blob_list(struct list_head *blob_list,
322 size_t list_head_offset,
323 int (*compar)(const void *, const void*));
326 sort_blob_list_by_sequential_order(struct list_head *blob_list,
327 size_t list_head_offset);
330 cmp_blobs_by_sequential_order(const void *p1, const void *p2);
333 blob_is_in_solid_wim_resource(const struct blob_descriptor *blob)
335 return blob->blob_location == BLOB_IN_WIM &&
336 blob->size != blob->rdesc->uncompressed_size;
340 blob_is_in_file(const struct blob_descriptor *blob)
342 return blob->blob_location == BLOB_IN_FILE_ON_DISK
344 || blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK
345 || blob->blob_location == BLOB_WIN32_ENCRYPTED
350 static inline const struct blob_extraction_target *
351 blob_extraction_targets(struct blob_descriptor *blob)
353 if (blob->out_refcnt <= ARRAY_LEN(blob->inline_blob_extraction_targets))
354 return blob->inline_blob_extraction_targets;
356 return blob->blob_extraction_targets;
360 blob_set_is_located_in_wim_resource(struct blob_descriptor *blob,
361 struct wim_resource_descriptor *rdesc)
363 blob->blob_location = BLOB_IN_WIM;
365 list_add_tail(&blob->rdesc_node, &rdesc->blob_list);
369 blob_unset_is_located_in_wim_resource(struct blob_descriptor *blob)
371 list_del(&blob->rdesc_node);
372 blob->blob_location = BLOB_NONEXISTENT;
375 extern struct blob_descriptor *
376 new_blob_from_data_buffer(const void *buffer, size_t size,
377 struct blob_table *blob_table);
380 hash_unhashed_blob(struct blob_descriptor *blob,
381 struct blob_table *blob_table,
382 struct blob_descriptor **blob_ret);
384 extern struct blob_descriptor **
385 retrieve_pointer_to_unhashed_blob(struct blob_descriptor *blob);
388 prepare_unhashed_blob(struct blob_descriptor *blob,
389 struct wim_inode *back_inode, u32 stream_id,
390 struct list_head *unhashed_blobs)
395 blob->back_inode = back_inode;
396 blob->back_stream_id = stream_id;
397 list_add_tail(&blob->unhashed_list, unhashed_blobs);
400 #endif /* _WIMLIB_BLOB_TABLE_H */