1 #ifndef _WIMLIB_BLOB_TABLE_H
2 #define _WIMLIB_BLOB_TABLE_H
4 #include "wimlib/list.h"
5 #include "wimlib/resource.h"
6 #include "wimlib/sha1.h"
7 #include "wimlib/types.h"
9 /* An enumerated type that identifies where a blob's data is located. */
12 /* The blob's data does not exist. This is a temporary state only. */
15 /* The blob's data is available in the WIM resource identified by the
16 * `struct wim_resource_descriptor' pointed to by @rdesc.
17 * @offset_in_res identifies the offset at which this particular blob
18 * begins in the uncompressed data of the resource. */
21 /* The blob's data is available as the contents of the file named by
25 /* The blob's data is available as the contents of the in-memory buffer
26 * pointed to by @attached_buffer. */
27 BLOB_IN_ATTACHED_BUFFER,
30 /* The blob's data is available as the contents of the file with name
31 * @staging_file_name relative to the open directory file descriptor
37 /* The blob's data is available as the contents of an NTFS attribute
38 * accessible through libntfs-3g. @ntfs_loc points to a structure which
39 * identifies the attribute. */
44 /* Windows only: the blob's data is available in the file (or named data
45 * stream) specified by @windows_file. The data might be only properly
46 * accessible through the Windows API. */
51 /* A "blob extraction target" is a stream, and the inode to which that stream
52 * belongs, to which a blob needs to be extracted as part of an extraction
53 * operation. Since blobs are single-instanced, a blob may have multiple
54 * extraction targets. */
55 struct blob_extraction_target {
56 struct wim_inode *inode;
57 struct wim_inode_stream *stream;
61 * Descriptor for a "blob", which is a known length sequence of binary data.
63 * Within a WIM file, blobs are single instanced and are identified by SHA-1
66 struct blob_descriptor {
68 /* List node for a hash bucket of the blob table */
69 struct hlist_node hash_list;
72 * Uncompressed size of this blob.
74 * In most cases we are now enforcing that this is nonzero; i.e. an
75 * empty stream will have "no blob" rather than "an empty blob". The
78 * - blob descriptors with 'blob_location == BLOB_NONEXISTENT',
79 * e.g. placeholder entries for new metadata resources or for
80 * blobs required for pipable WIM extraction. In these cases the
81 * size is not meaningful information anyway.
82 * - blob descriptors with 'blob_location == BLOB_IN_STAGING_FILE'
83 * can vary their size over time, including to 0.
89 * For unhashed == 0: 'hash' is the SHA-1 message digest of the
90 * blob's data. 'hash_short' allows accessing just a prefix of
91 * the SHA-1 message digest, which is useful for getting a "hash
92 * code" for hash table lookup/insertion.
94 u8 hash[SHA1_HASH_SIZE];
97 /* For unhashed == 1: these variables make it possible to find
98 * the stream that references this blob. There can be at most
99 * one such reference, since duplicate blobs can only be joined
100 * after they have been hashed. */
102 struct wim_inode *back_inode;
107 /* Number of times this blob is referenced by file streams in WIM
108 * images. See blob_decrement_refcnt() for information about the
109 * limitations of this field. */
113 * When a WIM file is written, this is set to the number of references
114 * (from file streams) to this blob in the output WIM file.
116 * During extraction, this is set to the number of targets to which this
117 * blob is being extracted.
119 * During image export, this is set to the number of references of this
120 * blob that originated from the source WIM.
122 * When mounting a WIM image read-write, this is set to the number of
123 * extra references to this blob preemptively taken to allow later
124 * saving the modified image as a new image and leaving the original
130 /* Number of open file descriptors to this blob during a FUSE mount of
135 /* One of the `enum blob_location' values documented above. */
136 u16 blob_location : 4;
138 /* 1 iff this blob contains "metadata" as opposed to data. */
141 /* 1 iff the SHA-1 message digest of this blob is unknown. */
144 /* 1 iff this blob has failed its checksum. */
147 /* Temporary fields used when writing blobs; set as documented for
148 * prepare_blob_list_for_write(). */
150 u16 will_be_in_output_wim : 1;
152 u16 may_send_done_with_file : 1;
154 /* Only used by wimlib_export_image() */
155 u16 was_exported : 1;
157 /* Specification of where this blob's data is located. Which member of
158 * this union is valid is determined by the @blob_location field. */
162 struct wim_resource_descriptor *rdesc;
165 /* Links together blobs that share the same underlying
166 * WIM resource. The head is rdesc->blob_list. */
167 struct list_head rdesc_node;
174 /* BLOB_IN_FILE_ON_DISK
175 * BLOB_IN_WINDOWS_FILE */
179 struct windows_file *windows_file;
181 struct wim_inode *file_inode;
184 /* BLOB_IN_ATTACHED_BUFFER */
185 void *attached_buffer;
188 /* BLOB_IN_STAGING_FILE */
190 char *staging_file_name;
196 /* BLOB_IN_NTFS_VOLUME */
197 struct ntfs_location *ntfs_loc;
201 /* List link for per-WIM-image list of unhashed blobs */
202 struct list_head unhashed_list;
206 /* Temporary fields */
208 /* Fields used temporarily during WIM file writing. */
211 /* List node used for blob size table. */
212 struct hlist_node hash_list_2;
214 /* Metadata for the underlying solid resource in
215 * the WIM being written (only valid if
216 * WIM_RESHDR_FLAG_SOLID set in
217 * out_reshdr.flags). */
219 u64 out_res_offset_in_wim;
220 u64 out_res_size_in_wim;
221 u64 out_res_uncompressed_size;
225 /* Links blobs being written to the WIM. */
226 struct list_head write_blobs_list;
229 /* Metadata for this blob in the WIM being
231 struct wim_reshdr out_reshdr;
234 /* Name under which this blob is being
235 * sorted; used only when sorting blobs
236 * for solid compression. */
237 utf16lechar *solid_sort_name;
238 size_t solid_sort_name_nbytes;
243 /* Used temporarily during extraction. This is an array of
244 * references to the streams being extracted that use this blob.
245 * out_refcnt tracks the number of slots filled. */
247 struct blob_extraction_target inline_blob_extraction_targets[3];
249 struct blob_extraction_target *blob_extraction_targets;
250 u32 alloc_blob_extraction_targets;
255 /* Temporary list fields. */
257 /* Links blobs for writing blob table. */
258 struct list_head blob_table_list;
260 /* Links blobs being extracted. */
261 struct list_head extraction_list;
263 /* Links blobs being exported. */
264 struct list_head export_blob_list;
268 extern struct blob_table *
269 new_blob_table(size_t capacity) _malloc_attribute;
272 free_blob_table(struct blob_table *table);
275 read_blob_table(WIMStruct *wim);
278 write_blob_table_from_blob_list(struct list_head *blob_list,
279 struct filedes *out_fd,
281 struct wim_reshdr *out_reshdr,
282 int write_resource_flags);
284 extern struct blob_descriptor *
285 new_blob_descriptor(void) _malloc_attribute;
287 extern struct blob_descriptor *
288 clone_blob_descriptor(const struct blob_descriptor *blob) _malloc_attribute;
291 blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table);
294 blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table,
299 blob_decrement_num_opened_fds(struct blob_descriptor *blob);
303 blob_release_location(struct blob_descriptor *blob);
306 free_blob_descriptor(struct blob_descriptor *blob);
309 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob);
312 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob);
314 extern struct blob_descriptor *
315 lookup_blob(const struct blob_table *table, const u8 *hash);
318 for_blob_in_table(struct blob_table *table,
319 int (*visitor)(struct blob_descriptor *, void *), void *arg);
322 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
323 int (*visitor)(struct blob_descriptor *, void *),
326 struct wimlib_resource_entry;
329 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
330 struct wimlib_resource_entry *wentry);
333 sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
334 int (*compar)(const void *, const void*));
337 sort_blob_list_by_sequential_order(struct list_head *blob_list,
338 size_t list_head_offset);
341 cmp_blobs_by_sequential_order(const void *p1, const void *p2);
343 static inline const struct blob_extraction_target *
344 blob_extraction_targets(const struct blob_descriptor *blob)
346 if (blob->out_refcnt <= ARRAY_LEN(blob->inline_blob_extraction_targets))
347 return blob->inline_blob_extraction_targets;
349 return blob->blob_extraction_targets;
353 * Declare that the specified blob is located in the specified WIM resource at
354 * the specified offset. The caller is expected to set blob->size if required.
357 blob_set_is_located_in_wim_resource(struct blob_descriptor *blob,
358 struct wim_resource_descriptor *rdesc,
361 blob->blob_location = BLOB_IN_WIM;
363 list_add_tail(&blob->rdesc_node, &rdesc->blob_list);
364 blob->offset_in_res = offset_in_res;
368 blob_unset_is_located_in_wim_resource(struct blob_descriptor *blob)
370 list_del(&blob->rdesc_node);
371 blob->blob_location = BLOB_NONEXISTENT;
375 blob_set_is_located_in_attached_buffer(struct blob_descriptor *blob,
376 void *buffer, size_t size)
378 blob->blob_location = BLOB_IN_ATTACHED_BUFFER;
379 blob->attached_buffer = buffer;
384 blob_is_in_file(const struct blob_descriptor *blob)
386 return blob->blob_location == BLOB_IN_FILE_ON_DISK
388 || blob->blob_location == BLOB_IN_WINDOWS_FILE
394 extern const wchar_t *
395 get_windows_file_path(const struct windows_file *file);
398 static inline const tchar *
399 blob_file_path(const struct blob_descriptor *blob)
402 if (blob->blob_location == BLOB_IN_WINDOWS_FILE)
403 return get_windows_file_path(blob->windows_file);
405 return blob->file_on_disk;
408 extern struct blob_descriptor *
409 new_blob_from_data_buffer(const void *buffer, size_t size,
410 struct blob_table *blob_table);
412 extern struct blob_descriptor *
413 after_blob_hashed(struct blob_descriptor *blob,
414 struct blob_descriptor **back_ptr,
415 struct blob_table *blob_table, struct wim_inode *inode);
418 hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
419 struct blob_descriptor **blob_ret);
421 extern struct blob_descriptor **
422 retrieve_pointer_to_unhashed_blob(struct blob_descriptor *blob);
425 prepare_unhashed_blob(struct blob_descriptor *blob,
426 struct wim_inode *back_inode, u32 stream_id,
427 struct list_head *unhashed_blobs)
432 blob->back_inode = back_inode;
433 blob->back_stream_id = stream_id;
434 list_add_tail(&blob->unhashed_list, unhashed_blobs);
437 #endif /* _WIMLIB_BLOB_TABLE_H */