]> wimlib.net Git - wimlib/blob - src/blob_table.c
Optimize layout of 'struct blob_descriptor'
[wimlib] / src / blob_table.c
1 /*
2  * blob_table.c
3  *
4  * A blob table maps SHA-1 message digests to "blobs", which are nonempty
5  * sequences of binary data.  Within a WIM file, blobs are single-instanced.
6  *
7  * This file also contains code to read and write the corresponding on-disk
8  * representation of this table in the WIM file format.
9  */
10
11 /*
12  * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
13  *
14  * This file is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option) any
17  * later version.
18  *
19  * This file is distributed in the hope that it will be useful, but WITHOUT
20  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with this file; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h> /* for unlink()  */
35
36 #include "wimlib/assert.h"
37 #include "wimlib/blob_table.h"
38 #include "wimlib/encoding.h"
39 #include "wimlib/endianness.h"
40 #include "wimlib/error.h"
41 #include "wimlib/metadata.h"
42 #include "wimlib/ntfs_3g.h"
43 #include "wimlib/resource.h"
44 #include "wimlib/unaligned.h"
45 #include "wimlib/util.h"
46 #include "wimlib/write.h"
47
48 /* A hash table mapping SHA-1 message digests to blob descriptors  */
49 struct blob_table {
50         struct hlist_head *array;
51         size_t num_blobs;
52         size_t capacity;
53 };
54
55 struct blob_table *
56 new_blob_table(size_t capacity)
57 {
58         struct blob_table *table;
59         struct hlist_head *array;
60
61         table = MALLOC(sizeof(struct blob_table));
62         if (table == NULL)
63                 goto oom;
64
65         array = CALLOC(capacity, sizeof(array[0]));
66         if (array == NULL) {
67                 FREE(table);
68                 goto oom;
69         }
70
71         table->num_blobs = 0;
72         table->capacity = capacity;
73         table->array = array;
74         return table;
75
76 oom:
77         ERROR("Failed to allocate memory for blob table "
78               "with capacity %zu", capacity);
79         return NULL;
80 }
81
82 static int
83 do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore)
84 {
85         free_blob_descriptor(blob);
86         return 0;
87 }
88
89 void
90 free_blob_table(struct blob_table *table)
91 {
92         if (table) {
93                 for_blob_in_table(table, do_free_blob_descriptor, NULL);
94                 FREE(table->array);
95                 FREE(table);
96         }
97 }
98
99 struct blob_descriptor *
100 new_blob_descriptor(void)
101 {
102         struct blob_descriptor *blob;
103
104         blob = CALLOC(1, sizeof(struct blob_descriptor));
105         if (blob == NULL)
106                 return NULL;
107
108         /* blob->refcnt = 0  */
109         /* blob->blob_location = BLOB_NONEXISTENT  */
110         BUILD_BUG_ON(BLOB_NONEXISTENT != 0);
111
112         return blob;
113 }
114
115 struct blob_descriptor *
116 clone_blob_descriptor(const struct blob_descriptor *old)
117 {
118         struct blob_descriptor *new;
119
120         new = memdup(old, sizeof(struct blob_descriptor));
121         if (new == NULL)
122                 return NULL;
123
124         switch (new->blob_location) {
125         case BLOB_IN_WIM:
126                 list_add(&new->rdesc_node, &new->rdesc->blob_list);
127                 break;
128
129         case BLOB_IN_FILE_ON_DISK:
130 #ifdef __WIN32__
131         case BLOB_IN_WINNT_FILE_ON_DISK:
132         case BLOB_WIN32_ENCRYPTED:
133 #endif
134 #ifdef WITH_FUSE
135         case BLOB_IN_STAGING_FILE:
136                 BUILD_BUG_ON((void*)&old->file_on_disk !=
137                              (void*)&old->staging_file_name);
138 #endif
139                 new->file_on_disk = TSTRDUP(old->file_on_disk);
140                 if (new->file_on_disk == NULL)
141                         goto out_free;
142                 break;
143         case BLOB_IN_ATTACHED_BUFFER:
144                 new->attached_buffer = memdup(old->attached_buffer, old->size);
145                 if (new->attached_buffer == NULL)
146                         goto out_free;
147                 break;
148 #ifdef WITH_NTFS_3G
149         case BLOB_IN_NTFS_VOLUME:
150                 if (old->ntfs_loc) {
151                         new->ntfs_loc = memdup(old->ntfs_loc,
152                                                sizeof(struct ntfs_location));
153                         if (new->ntfs_loc == NULL)
154                                 goto out_free;
155                         new->ntfs_loc->path = STRDUP(old->ntfs_loc->path);
156                         new->ntfs_loc->attr_name = NULL;
157                         if (new->ntfs_loc->path == NULL)
158                                 goto out_free;
159                         if (new->ntfs_loc->attr_name_nchars != 0) {
160                                 new->ntfs_loc->attr_name =
161                                         utf16le_dup(old->ntfs_loc->attr_name);
162                                 if (new->ntfs_loc->attr_name == NULL)
163                                         goto out_free;
164                         }
165                 }
166                 break;
167 #endif
168         default:
169                 break;
170         }
171         return new;
172
173 out_free:
174         free_blob_descriptor(new);
175         return NULL;
176 }
177
178 static void
179 blob_release_location(struct blob_descriptor *blob)
180 {
181         switch (blob->blob_location) {
182         case BLOB_IN_WIM:
183                 list_del(&blob->rdesc_node);
184                 if (list_empty(&blob->rdesc->blob_list))
185                         FREE(blob->rdesc);
186                 break;
187         case BLOB_IN_FILE_ON_DISK:
188 #ifdef __WIN32__
189         case BLOB_IN_WINNT_FILE_ON_DISK:
190         case BLOB_WIN32_ENCRYPTED:
191 #endif
192 #ifdef WITH_FUSE
193         case BLOB_IN_STAGING_FILE:
194                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
195                              (void*)&blob->staging_file_name);
196 #endif
197         case BLOB_IN_ATTACHED_BUFFER:
198                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
199                              (void*)&blob->attached_buffer);
200                 FREE(blob->file_on_disk);
201                 break;
202 #ifdef WITH_NTFS_3G
203         case BLOB_IN_NTFS_VOLUME:
204                 if (blob->ntfs_loc) {
205                         FREE(blob->ntfs_loc->path);
206                         FREE(blob->ntfs_loc->attr_name);
207                         FREE(blob->ntfs_loc);
208                 }
209                 break;
210 #endif
211         default:
212                 break;
213         }
214 }
215
216 void
217 free_blob_descriptor(struct blob_descriptor *blob)
218 {
219         if (blob) {
220                 blob_release_location(blob);
221                 FREE(blob);
222         }
223 }
224
225 /* Should this blob be retained even if it has no references?  */
226 static bool
227 should_retain_blob(const struct blob_descriptor *blob)
228 {
229         return blob->blob_location == BLOB_IN_WIM;
230 }
231
232 static void
233 finalize_blob(struct blob_descriptor *blob)
234 {
235         if (!should_retain_blob(blob))
236                 free_blob_descriptor(blob);
237 }
238
239 /*
240  * Decrements the reference count of the specified blob, which must be either
241  * (a) unhashed, or (b) inserted in the specified blob table.
242  *
243  * If the blob's reference count reaches 0, we may unlink it from @table and
244  * free it.  However, we retain blobs with 0 reference count that originated
245  * from WIM files (BLOB_IN_WIM).  We do this for two reasons:
246  *
247  * 1. This prevents information about valid blobs in a WIM file --- blobs which
248  *    will continue to be present after appending to the WIM file --- from being
249  *    lost merely because we dropped all references to them.
250  *
251  * 2. Blob reference counts we read from WIM files can't be trusted.  It's
252  *    possible that a WIM has reference counts that are too low; WIMGAPI
253  *    sometimes creates WIMs where this is the case.  It's also possible that
254  *    blobs have been referenced from an external WIM; those blobs can
255  *    potentially have any reference count at all, either lower or higher than
256  *    would be expected for this WIM ("this WIM" meaning the owner of @table) if
257  *    it were a standalone WIM.
258  *
259  * So we can't take the reference counts too seriously.  But at least, we do
260  * recalculate by default when writing a new WIM file.
261  */
262 void
263 blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table)
264 {
265         blob_subtract_refcnt(blob, table, 1);
266 }
267
268 void
269 blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table,
270                      u32 count)
271 {
272         if (unlikely(blob->refcnt < count)) {
273                 blob->refcnt = 0; /* See comment above  */
274                 return;
275         }
276
277         blob->refcnt -= count;
278
279         if (blob->refcnt != 0)
280                 return;
281
282         if (blob->unhashed) {
283                 list_del(&blob->unhashed_list);
284         #ifdef WITH_FUSE
285                 /* If the blob has been extracted to a staging file for a FUSE
286                  * mount, unlink the staging file.  (Note that there still may
287                  * be open file descriptors to it.)  */
288                 if (blob->blob_location == BLOB_IN_STAGING_FILE)
289                         unlinkat(blob->staging_dir_fd,
290                                  blob->staging_file_name, 0);
291         #endif
292         } else {
293                 if (!should_retain_blob(blob))
294                         blob_table_unlink(table, blob);
295         }
296
297         /* If FUSE mounts are enabled, then don't actually free the blob
298          * descriptor until the last file descriptor to it has been closed.  */
299 #ifdef WITH_FUSE
300         if (blob->num_opened_fds == 0)
301 #endif
302                 finalize_blob(blob);
303 }
304
305 #ifdef WITH_FUSE
306 void
307 blob_decrement_num_opened_fds(struct blob_descriptor *blob)
308 {
309         wimlib_assert(blob->num_opened_fds != 0);
310
311         if (--blob->num_opened_fds == 0 && blob->refcnt == 0)
312                 finalize_blob(blob);
313 }
314 #endif
315
316 static void
317 blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob)
318 {
319         size_t i = blob->hash_short % table->capacity;
320
321         hlist_add_head(&blob->hash_list, &table->array[i]);
322 }
323
324 static void
325 enlarge_blob_table(struct blob_table *table)
326 {
327         size_t old_capacity, new_capacity;
328         struct hlist_head *old_array, *new_array;
329         struct blob_descriptor *blob;
330         struct hlist_node *cur, *tmp;
331         size_t i;
332
333         old_capacity = table->capacity;
334         new_capacity = old_capacity * 2;
335         new_array = CALLOC(new_capacity, sizeof(struct hlist_head));
336         if (new_array == NULL)
337                 return;
338         old_array = table->array;
339         table->array = new_array;
340         table->capacity = new_capacity;
341
342         for (i = 0; i < old_capacity; i++) {
343                 hlist_for_each_entry_safe(blob, cur, tmp, &old_array[i], hash_list) {
344                         hlist_del(&blob->hash_list);
345                         blob_table_insert_raw(table, blob);
346                 }
347         }
348         FREE(old_array);
349 }
350
351 /* Insert a blob descriptor into the blob table.  */
352 void
353 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob)
354 {
355         blob_table_insert_raw(table, blob);
356         if (++table->num_blobs > table->capacity)
357                 enlarge_blob_table(table);
358 }
359
360 /* Unlinks a blob descriptor from the blob table; does not free it.  */
361 void
362 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob)
363 {
364         wimlib_assert(!blob->unhashed);
365         wimlib_assert(table->num_blobs != 0);
366
367         hlist_del(&blob->hash_list);
368         table->num_blobs--;
369 }
370
371 /* Given a SHA-1 message digest, return the corresponding blob descriptor from
372  * the specified blob table, or NULL if there is none.  */
373 struct blob_descriptor *
374 lookup_blob(const struct blob_table *table, const u8 *hash)
375 {
376         size_t i;
377         struct blob_descriptor *blob;
378         struct hlist_node *pos;
379
380         i = load_size_t_unaligned(hash) % table->capacity;
381         hlist_for_each_entry(blob, pos, &table->array[i], hash_list)
382                 if (hashes_equal(hash, blob->hash))
383                         return blob;
384         return NULL;
385 }
386
387 /* Call a function on all blob descriptors in the specified blob table.  Stop
388  * early and return nonzero if any call to the function returns nonzero.  */
389 int
390 for_blob_in_table(struct blob_table *table,
391                   int (*visitor)(struct blob_descriptor *, void *), void *arg)
392 {
393         struct blob_descriptor *blob;
394         struct hlist_node *pos, *tmp;
395         int ret;
396
397         for (size_t i = 0; i < table->capacity; i++) {
398                 hlist_for_each_entry_safe(blob, pos, tmp, &table->array[i],
399                                           hash_list)
400                 {
401                         ret = visitor(blob, arg);
402                         if (ret)
403                                 return ret;
404                 }
405         }
406         return 0;
407 }
408
409 /*
410  * This is a qsort() callback that sorts blobs into an order optimized for
411  * reading.  Sorting is done primarily by blob location, then secondarily by a
412  * location-dependent order.  For example, blobs in WIM resources are sorted
413  * such that the underlying WIM files will be read sequentially.  This is
414  * especially important for WIM files containing solid resources.
415  */
416 int
417 cmp_blobs_by_sequential_order(const void *p1, const void *p2)
418 {
419         const struct blob_descriptor *blob1, *blob2;
420         int v;
421         WIMStruct *wim1, *wim2;
422
423         blob1 = *(const struct blob_descriptor**)p1;
424         blob2 = *(const struct blob_descriptor**)p2;
425
426         v = (int)blob1->blob_location - (int)blob2->blob_location;
427
428         /* Different resource locations?  */
429         if (v)
430                 return v;
431
432         switch (blob1->blob_location) {
433         case BLOB_IN_WIM:
434                 wim1 = blob1->rdesc->wim;
435                 wim2 = blob2->rdesc->wim;
436
437                 /* Different (possibly split) WIMs?  */
438                 if (wim1 != wim2) {
439                         v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN);
440                         if (v)
441                                 return v;
442                 }
443
444                 /* Different part numbers in the same WIM?  */
445                 v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number;
446                 if (v)
447                         return v;
448
449                 if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim)
450                         return cmp_u64(blob1->rdesc->offset_in_wim,
451                                        blob2->rdesc->offset_in_wim);
452
453                 return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
454
455         case BLOB_IN_FILE_ON_DISK:
456 #ifdef WITH_FUSE
457         case BLOB_IN_STAGING_FILE:
458 #endif
459 #ifdef __WIN32__
460         case BLOB_IN_WINNT_FILE_ON_DISK:
461         case BLOB_WIN32_ENCRYPTED:
462 #endif
463                 /* Compare files by path: just a heuristic that will place files
464                  * in the same directory next to each other.  */
465                 return tstrcmp(blob1->file_on_disk, blob2->file_on_disk);
466 #ifdef WITH_NTFS_3G
467         case BLOB_IN_NTFS_VOLUME:
468                 return tstrcmp(blob1->ntfs_loc->path, blob2->ntfs_loc->path);
469 #endif
470         default:
471                 /* No additional sorting order defined for this resource
472                  * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare
473                  * everything equal to each other.  */
474                 return 0;
475         }
476 }
477
478 int
479 sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
480                int (*compar)(const void *, const void*))
481 {
482         struct list_head *cur;
483         struct blob_descriptor **array;
484         size_t i;
485         size_t array_size;
486         size_t num_blobs = 0;
487
488         list_for_each(cur, blob_list)
489                 num_blobs++;
490
491         if (num_blobs <= 1)
492                 return 0;
493
494         array_size = num_blobs * sizeof(array[0]);
495         array = MALLOC(array_size);
496         if (array == NULL)
497                 return WIMLIB_ERR_NOMEM;
498
499         cur = blob_list->next;
500         for (i = 0; i < num_blobs; i++) {
501                 array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset);
502                 cur = cur->next;
503         }
504
505         qsort(array, num_blobs, sizeof(array[0]), compar);
506
507         INIT_LIST_HEAD(blob_list);
508         for (i = 0; i < num_blobs; i++) {
509                 list_add_tail((struct list_head*)
510                                ((u8*)array[i] + list_head_offset), blob_list);
511         }
512         FREE(array);
513         return 0;
514 }
515
516 /* Sort the specified list of blobs in an order optimized for sequential
517  * reading.  */
518 int
519 sort_blob_list_by_sequential_order(struct list_head *blob_list,
520                                    size_t list_head_offset)
521 {
522         return sort_blob_list(blob_list, list_head_offset,
523                               cmp_blobs_by_sequential_order);
524 }
525
526 static int
527 add_blob_to_array(struct blob_descriptor *blob, void *_pp)
528 {
529         struct blob_descriptor ***pp = _pp;
530         *(*pp)++ = blob;
531         return 0;
532 }
533
534 /* Iterate through the blob descriptors in the specified blob table in an order
535  * optimized for sequential reading.  */
536 int
537 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
538                                              int (*visitor)(struct blob_descriptor *, void *),
539                                              void *arg)
540 {
541         struct blob_descriptor **blob_array, **p;
542         size_t num_blobs = table->num_blobs;
543         int ret;
544
545         blob_array = MALLOC(num_blobs * sizeof(blob_array[0]));
546         if (!blob_array)
547                 return WIMLIB_ERR_NOMEM;
548         p = blob_array;
549         for_blob_in_table(table, add_blob_to_array, &p);
550
551         wimlib_assert(p == blob_array + num_blobs);
552
553         qsort(blob_array, num_blobs, sizeof(blob_array[0]),
554               cmp_blobs_by_sequential_order);
555         ret = 0;
556         for (size_t i = 0; i < num_blobs; i++) {
557                 ret = visitor(blob_array[i], arg);
558                 if (ret)
559                         break;
560         }
561         FREE(blob_array);
562         return ret;
563 }
564
565 /* On-disk format of a blob descriptor in a WIM file.
566  *
567  * Note: if the WIM file contains solid resource(s), then this structure is
568  * sometimes overloaded to describe a "resource" rather than a "blob".  See the
569  * code for details.  */
570 struct blob_descriptor_disk {
571
572         /* Size, offset, and flags of the blob.  */
573         struct wim_reshdr_disk reshdr;
574
575         /* Which part of the split WIM this blob is in; indexed from 1. */
576         le16 part_number;
577
578         /* Reference count of this blob over all WIM images.  (But see comment
579          * above blob_decrement_refcnt().)  */
580         le32 refcnt;
581
582         /* SHA-1 message digest of the uncompressed data of this blob, or all
583          * zeroes if this blob is of zero length.  */
584         u8 hash[SHA1_HASH_SIZE];
585 } _packed_attribute;
586
587 /* Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
588  * count how many specify resources (as opposed to blobs within those
589  * resources).
590  *
591  * Returns the resulting count.  */
592 static size_t
593 count_solid_resources(const struct blob_descriptor_disk *entries, size_t max)
594 {
595         size_t count = 0;
596         do {
597                 struct wim_reshdr reshdr;
598
599                 get_wim_reshdr(&(entries++)->reshdr, &reshdr);
600
601                 if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) {
602                         /* Run was terminated by a stand-alone blob entry.  */
603                         break;
604                 }
605
606                 if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
607                         /* This is a resource entry.  */
608                         count++;
609                 }
610         } while (--max);
611         return count;
612 }
613
614 /*
615  * Given a run of consecutive blob descriptors with the SOLID flag set and
616  * having @num_rdescs resource entries, load resource information from them into
617  * the resource descriptors in the @rdescs array.
618  *
619  * Returns 0 on success, or a nonzero error code on failure.
620  */
621 static int
622 do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs,
623                    size_t num_rdescs,
624                    const struct blob_descriptor_disk *entries)
625 {
626         for (size_t i = 0; i < num_rdescs; i++) {
627                 struct wim_reshdr reshdr;
628                 struct alt_chunk_table_header_disk hdr;
629                 struct wim_resource_descriptor *rdesc;
630                 int ret;
631
632                 /* Advance to next resource entry.  */
633
634                 do {
635                         get_wim_reshdr(&(entries++)->reshdr, &reshdr);
636                 } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER);
637
638                 rdesc = rdescs[i];
639
640                 wim_res_hdr_to_desc(&reshdr, wim, rdesc);
641
642                 /* For solid resources, the uncompressed size, compression type,
643                  * and chunk size are stored in the resource itself, not in the
644                  * blob table.  */
645
646                 ret = full_pread(&wim->in_fd, &hdr,
647                                  sizeof(hdr), reshdr.offset_in_wim);
648                 if (ret) {
649                         ERROR("Failed to read header of solid resource "
650                               "(offset_in_wim=%"PRIu64")",
651                               reshdr.offset_in_wim);
652                         return ret;
653                 }
654
655                 rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize);
656
657                 /* Compression format numbers must be the same as in
658                  * WIMGAPI to be compatible here.  */
659                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
660                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
661                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
662                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
663                 rdesc->compression_type = le32_to_cpu(hdr.compression_format);
664
665                 rdesc->chunk_size = le32_to_cpu(hdr.chunk_size);
666
667                 DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" "
668                       "(%"TS"/%"PRIu32") @ +%"PRIu64"",
669                       i + 1, num_rdescs,
670                       rdesc->uncompressed_size,
671                       rdesc->size_in_wim,
672                       wimlib_get_compression_type_string(rdesc->compression_type),
673                       rdesc->chunk_size,
674                       rdesc->offset_in_wim);
675         }
676         return 0;
677 }
678
679 /*
680  * Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
681  * allocate a 'struct wim_resource_descriptor' for each resource within that
682  * run.
683  *
684  * Returns 0 on success, or a nonzero error code on failure.
685  * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret.
686  */
687 static int
688 load_solid_info(WIMStruct *wim,
689                 const struct blob_descriptor_disk *entries,
690                 size_t num_remaining_entries,
691                 struct wim_resource_descriptor ***rdescs_ret,
692                 size_t *num_rdescs_ret)
693 {
694         size_t num_rdescs;
695         struct wim_resource_descriptor **rdescs;
696         size_t i;
697         int ret;
698
699         num_rdescs = count_solid_resources(entries, num_remaining_entries);
700         rdescs = CALLOC(num_rdescs, sizeof(rdescs[0]));
701         if (!rdescs)
702                 return WIMLIB_ERR_NOMEM;
703
704         for (i = 0; i < num_rdescs; i++) {
705                 rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor));
706                 if (!rdescs[i]) {
707                         ret = WIMLIB_ERR_NOMEM;
708                         goto out_free_rdescs;
709                 }
710         }
711
712         ret = do_load_solid_info(wim, rdescs, num_rdescs, entries);
713         if (ret)
714                 goto out_free_rdescs;
715
716         *rdescs_ret = rdescs;
717         *num_rdescs_ret = num_rdescs;
718         return 0;
719
720 out_free_rdescs:
721         for (i = 0; i < num_rdescs; i++)
722                 FREE(rdescs[i]);
723         FREE(rdescs);
724         return ret;
725 }
726
727 /* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor
728  * with the SOLID flag set, try to assign it to resource in the current solid
729  * run.  */
730 static int
731 assign_blob_to_solid_resource(const struct wim_reshdr *reshdr,
732                               struct blob_descriptor *blob,
733                               struct wim_resource_descriptor **rdescs,
734                               size_t num_rdescs)
735 {
736         u64 offset = reshdr->offset_in_wim;
737
738         /* XXX: This linear search will be slow in the degenerate case where the
739          * number of solid resources in the run is huge.  */
740         blob->size = reshdr->size_in_wim;
741         for (size_t i = 0; i < num_rdescs; i++) {
742                 if (offset + blob->size <= rdescs[i]->uncompressed_size) {
743                         blob->offset_in_res = offset;
744                         blob_set_is_located_in_wim_resource(blob, rdescs[i]);
745                         return 0;
746                 }
747                 offset -= rdescs[i]->uncompressed_size;
748         }
749         ERROR("blob could not be assigned to a solid resource");
750         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
751 }
752
753 static void
754 free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
755 {
756         if (rdescs) {
757                 for (size_t i = 0; i < num_rdescs; i++)
758                         if (list_empty(&rdescs[i]->blob_list))
759                                 FREE(rdescs[i]);
760                 FREE(rdescs);
761         }
762 }
763
764 static int
765 cmp_blobs_by_offset_in_res(const void *p1, const void *p2)
766 {
767         const struct blob_descriptor *blob1, *blob2;
768
769         blob1 = *(const struct blob_descriptor**)p1;
770         blob2 = *(const struct blob_descriptor**)p2;
771
772         return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
773 }
774
775 /* Validate the size and location of a WIM resource.  */
776 static int
777 validate_resource(struct wim_resource_descriptor *rdesc)
778 {
779         struct blob_descriptor *blob;
780         bool out_of_order;
781         u64 expected_next_offset;
782         int ret;
783
784         /* Verify that the resource itself has a valid offset and size.  */
785         if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim)
786                 goto invalid_due_to_overflow;
787
788         /* Verify that each blob in the resource has a valid offset and size.
789          */
790         expected_next_offset = 0;
791         out_of_order = false;
792         list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
793                 if (blob->offset_in_res + blob->size < blob->size ||
794                     blob->offset_in_res + blob->size > rdesc->uncompressed_size)
795                         goto invalid_due_to_overflow;
796
797                 if (blob->offset_in_res >= expected_next_offset)
798                         expected_next_offset = blob->offset_in_res + blob->size;
799                 else
800                         out_of_order = true;
801         }
802
803         /* If the blobs were not located at strictly increasing positions (not
804          * allowing for overlap), sort them.  Then make sure that none overlap.
805          */
806         if (out_of_order) {
807                 ret = sort_blob_list(&rdesc->blob_list,
808                                      offsetof(struct blob_descriptor,
809                                               rdesc_node),
810                                      cmp_blobs_by_offset_in_res);
811                 if (ret)
812                         return ret;
813
814                 expected_next_offset = 0;
815                 list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
816                         if (blob->offset_in_res >= expected_next_offset)
817                                 expected_next_offset = blob->offset_in_res + blob->size;
818                         else
819                                 goto invalid_due_to_overlap;
820                 }
821         }
822
823         return 0;
824
825 invalid_due_to_overflow:
826         ERROR("Invalid blob table (offset overflow)");
827         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
828
829 invalid_due_to_overlap:
830         ERROR("Invalid blob table (blobs in solid resource overlap)");
831         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
832 }
833
834 static int
835 finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
836 {
837         int ret = 0;
838         for (size_t i = 0; i < num_rdescs; i++) {
839                 ret = validate_resource(rdescs[i]);
840                 if (ret)
841                         break;
842         }
843         free_solid_rdescs(rdescs, num_rdescs);
844         return ret;
845 }
846
847 /*
848  * read_blob_table() -
849  *
850  * Read the blob table from a WIM file.  Usually, each entry in this table
851  * describes a "blob", or equivalently a "resource", that the WIM file contains,
852  * along with its location and SHA-1 message digest.  Descriptors for
853  * non-metadata blobs will be saved in the in-memory blob table
854  * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a
855  * special location per-image (the wim->image_metadata array).
856  *
857  * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple
858  * blobs that are compressed together.  Such a resource is called a "solid
859  * resource".  Solid resources are still described in the on-disk "blob table",
860  * although the format is not the most logical.  A consecutive sequence of
861  * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run".
862  * A solid run describes a set of solid resources, each of which contains a set
863  * of blobs.  In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size
864  * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource,
865  * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid
866  * resource.  There are some oddities in how we need to determine which solid
867  * resource a blob is actually in; see the code for details.
868  *
869  * Possible return values:
870  *      WIMLIB_ERR_SUCCESS (0)
871  *      WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
872  *      WIMLIB_ERR_NOMEM
873  *
874  *      Or an error code caused by failure to read the blob table from the WIM
875  *      file.
876  */
877 int
878 read_blob_table(WIMStruct *wim)
879 {
880         int ret;
881         size_t num_entries;
882         void *buf = NULL;
883         struct blob_table *table = NULL;
884         struct blob_descriptor *cur_blob = NULL;
885         size_t num_duplicate_blobs = 0;
886         size_t num_wrong_part_blobs = 0;
887         u32 image_index = 0;
888         struct wim_resource_descriptor **cur_solid_rdescs = NULL;
889         size_t cur_num_solid_rdescs = 0;
890
891         DEBUG("Reading blob table.");
892
893         /* Calculate the number of entries in the blob table.  */
894         num_entries = wim->hdr.blob_table_reshdr.uncompressed_size /
895                       sizeof(struct blob_descriptor_disk);
896
897         /* Read the blob table into a buffer.  */
898         ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf);
899         if (ret)
900                 goto out;
901
902         /* Allocate a hash table to map SHA-1 message digests into blob
903          * descriptors.  This is the in-memory "blob table".  */
904         table = new_blob_table(num_entries * 2 + 1);
905         if (!table)
906                 goto oom;
907
908         /* Allocate and initalize blob descriptors from the raw blob table
909          * buffer.  */
910         for (size_t i = 0; i < num_entries; i++) {
911                 const struct blob_descriptor_disk *disk_entry =
912                         &((const struct blob_descriptor_disk*)buf)[i];
913                 struct wim_reshdr reshdr;
914                 u16 part_number;
915
916                 /* Get the resource header  */
917                 get_wim_reshdr(&disk_entry->reshdr, &reshdr);
918
919                 DEBUG("reshdr: size_in_wim=%"PRIu64", "
920                       "uncompressed_size=%"PRIu64", "
921                       "offset_in_wim=%"PRIu64", "
922                       "flags=0x%02x",
923                       reshdr.size_in_wim, reshdr.uncompressed_size,
924                       reshdr.offset_in_wim, reshdr.flags);
925
926                 /* Ignore SOLID flag if it isn't supposed to be used in this WIM
927                  * version.  */
928                 if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
929                         reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID;
930
931                 /* Allocate a new 'struct blob_descriptor'.  */
932                 cur_blob = new_blob_descriptor();
933                 if (!cur_blob)
934                         goto oom;
935
936                 /* Get the part number, reference count, and hash.  */
937                 part_number = le16_to_cpu(disk_entry->part_number);
938                 cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt);
939                 copy_hash(cur_blob->hash, disk_entry->hash);
940
941                 if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
942
943                         /* SOLID entry  */
944
945                         if (!cur_solid_rdescs) {
946                                 /* Starting new run  */
947                                 ret = load_solid_info(wim, disk_entry,
948                                                       num_entries - i,
949                                                       &cur_solid_rdescs,
950                                                       &cur_num_solid_rdescs);
951                                 if (ret)
952                                         goto out;
953                         }
954
955                         if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
956                                 /* Resource entry, not blob entry  */
957                                 goto free_cur_blob_and_continue;
958                         }
959
960                         /* Blob entry  */
961
962                         ret = assign_blob_to_solid_resource(&reshdr,
963                                                             cur_blob,
964                                                             cur_solid_rdescs,
965                                                             cur_num_solid_rdescs);
966                         if (ret)
967                                 goto out;
968
969                 } else {
970                         /* Normal blob/resource entry; SOLID not set.  */
971
972                         struct wim_resource_descriptor *rdesc;
973
974                         if (unlikely(cur_solid_rdescs)) {
975                                 /* This entry terminated a solid run.  */
976                                 ret = finish_solid_rdescs(cur_solid_rdescs,
977                                                           cur_num_solid_rdescs);
978                                 cur_solid_rdescs = NULL;
979                                 if (ret)
980                                         goto out;
981                         }
982
983                         /* How to handle an uncompressed resource with its
984                          * uncompressed size different from its compressed size?
985                          *
986                          * Based on a simple test, WIMGAPI seems to handle this
987                          * as follows:
988                          *
989                          * if (size_in_wim > uncompressed_size) {
990                          *      Ignore uncompressed_size; use size_in_wim
991                          *      instead.
992                          * } else {
993                          *      Honor uncompressed_size, but treat the part of
994                          *      the file data above size_in_wim as all zeros.
995                          * }
996                          *
997                          * So we will do the same.  */
998                         if (unlikely(!(reshdr.flags &
999                                        WIM_RESHDR_FLAG_COMPRESSED) &&
1000                                      (reshdr.size_in_wim >
1001                                       reshdr.uncompressed_size)))
1002                         {
1003                                 reshdr.uncompressed_size = reshdr.size_in_wim;
1004                         }
1005
1006                         /* Set up a resource descriptor for this blob.  */
1007
1008                         rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
1009                         if (!rdesc)
1010                                 goto oom;
1011
1012                         wim_res_hdr_to_desc(&reshdr, wim, rdesc);
1013
1014                         cur_blob->offset_in_res = 0;
1015                         cur_blob->size = reshdr.uncompressed_size;
1016
1017                         blob_set_is_located_in_wim_resource(cur_blob, rdesc);
1018                 }
1019
1020                 /* cur_blob is now a blob bound to a resource.  */
1021
1022                 /* Ignore entries with all zeroes in the hash field.  */
1023                 if (is_zero_hash(cur_blob->hash))
1024                         goto free_cur_blob_and_continue;
1025
1026                 /* Verify that the part number matches that of the underlying
1027                  * WIM file.  */
1028                 if (part_number != wim->hdr.part_number) {
1029                         num_wrong_part_blobs++;
1030                         goto free_cur_blob_and_continue;
1031                 }
1032
1033                 if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
1034
1035                         cur_blob->is_metadata = 1;
1036
1037                         /* Blob table entry for a metadata resource.  */
1038
1039                         /* Metadata entries with no references must be ignored.
1040                          * See, for example, the WinPE WIMs from the WAIK v2.1.
1041                          */
1042                         if (cur_blob->refcnt == 0)
1043                                 goto free_cur_blob_and_continue;
1044
1045                         if (cur_blob->refcnt != 1) {
1046                                 /* We don't currently support this case due to
1047                                  * the complications of multiple images sharing
1048                                  * the same metadata resource or a metadata
1049                                  * resource also being referenced by files.  */
1050                                 ERROR("Found metadata resource with refcnt != 1");
1051                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1052                                 goto out;
1053                         }
1054
1055                         if (wim->hdr.part_number != 1) {
1056                                 WARNING("Ignoring metadata resource found in a "
1057                                         "non-first part of the split WIM");
1058                                 goto free_cur_blob_and_continue;
1059                         }
1060
1061                         /* The number of entries in the blob table with
1062                          * WIM_RESHDR_FLAG_METADATA set should be the same as
1063                          * the image_count field in the WIM header.  */
1064                         if (image_index == wim->hdr.image_count) {
1065                                 WARNING("Found more metadata resources than images");
1066                                 goto free_cur_blob_and_continue;
1067                         }
1068
1069                         /* Notice very carefully:  We are assigning the metadata
1070                          * resources to images in the same order in which their
1071                          * blob table entries occur on disk.  (This is also the
1072                          * behavior of Microsoft's software.)  In particular,
1073                          * this overrides the actual locations of the metadata
1074                          * resources themselves in the WIM file as well as any
1075                          * information written in the XML data.  */
1076                         DEBUG("Found metadata resource for image %"PRIu32" at "
1077                               "offset %"PRIu64".",
1078                               image_index + 1,
1079                               reshdr.offset_in_wim);
1080
1081                         wim->image_metadata[image_index++]->metadata_blob = cur_blob;
1082                 } else {
1083                         /* Blob table entry for a non-metadata blob.  */
1084
1085                         /* Ignore this blob if it's a duplicate.  */
1086                         if (lookup_blob(table, cur_blob->hash)) {
1087                                 num_duplicate_blobs++;
1088                                 goto free_cur_blob_and_continue;
1089                         }
1090
1091                         /* Insert the blob into the in-memory blob table, keyed
1092                          * by its SHA-1 message digest.  */
1093                         blob_table_insert(table, cur_blob);
1094                 }
1095
1096                 continue;
1097
1098         free_cur_blob_and_continue:
1099                 if (cur_solid_rdescs &&
1100                     cur_blob->blob_location == BLOB_IN_WIM)
1101                         blob_unset_is_located_in_wim_resource(cur_blob);
1102                 free_blob_descriptor(cur_blob);
1103         }
1104         cur_blob = NULL;
1105
1106         if (cur_solid_rdescs) {
1107                 /* End of blob table terminated a solid run.  */
1108                 ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1109                 cur_solid_rdescs = NULL;
1110                 if (ret)
1111                         goto out;
1112         }
1113
1114         if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) {
1115                 WARNING("Could not find metadata resources for all images");
1116                 for (u32 i = image_index; i < wim->hdr.image_count; i++)
1117                         put_image_metadata(wim->image_metadata[i], NULL);
1118                 wim->hdr.image_count = image_index;
1119         }
1120
1121         if (num_duplicate_blobs > 0)
1122                 WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs);
1123
1124         if (num_wrong_part_blobs > 0) {
1125                 WARNING("Ignoring %zu blobs with wrong part number",
1126                         num_wrong_part_blobs);
1127         }
1128
1129         DEBUG("Done reading blob table.");
1130         wim->blob_table = table;
1131         ret = 0;
1132         goto out_free_buf;
1133
1134 oom:
1135         ERROR("Not enough memory to read blob table!");
1136         ret = WIMLIB_ERR_NOMEM;
1137 out:
1138         free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1139         free_blob_descriptor(cur_blob);
1140         free_blob_table(table);
1141 out_free_buf:
1142         FREE(buf);
1143         return ret;
1144 }
1145
1146 static void
1147 write_blob_descriptor(struct blob_descriptor_disk *disk_entry,
1148                       const struct wim_reshdr *out_reshdr,
1149                       u16 part_number, u32 refcnt, const u8 *hash)
1150 {
1151         put_wim_reshdr(out_reshdr, &disk_entry->reshdr);
1152         disk_entry->part_number = cpu_to_le16(part_number);
1153         disk_entry->refcnt = cpu_to_le32(refcnt);
1154         copy_hash(disk_entry->hash, hash);
1155 }
1156
1157 /* Note: the list of blob descriptors must be sorted so that all entries for the
1158  * same solid resource are consecutive.  In addition, blob descriptors for
1159  * metadata resources must be in the same order as the indices of the underlying
1160  * images.  */
1161 int
1162 write_blob_table_from_blob_list(struct list_head *blob_list,
1163                                 struct filedes *out_fd,
1164                                 u16 part_number,
1165                                 struct wim_reshdr *out_reshdr,
1166                                 int write_resource_flags)
1167 {
1168         size_t table_size;
1169         struct blob_descriptor *blob;
1170         struct blob_descriptor_disk *table_buf;
1171         struct blob_descriptor_disk *table_buf_ptr;
1172         int ret;
1173         u64 prev_res_offset_in_wim = ~0ULL;
1174         u64 prev_uncompressed_size;
1175         u64 logical_offset;
1176
1177         table_size = 0;
1178         list_for_each_entry(blob, blob_list, blob_table_list) {
1179                 table_size += sizeof(struct blob_descriptor_disk);
1180
1181                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID &&
1182                     blob->out_res_offset_in_wim != prev_res_offset_in_wim)
1183                 {
1184                         table_size += sizeof(struct blob_descriptor_disk);
1185                         prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1186                 }
1187         }
1188
1189         DEBUG("Writing WIM blob table (size=%zu, offset=%"PRIu64")",
1190               table_size, out_fd->offset);
1191
1192         table_buf = MALLOC(table_size);
1193         if (table_buf == NULL) {
1194                 ERROR("Failed to allocate %zu bytes for temporary blob table",
1195                       table_size);
1196                 return WIMLIB_ERR_NOMEM;
1197         }
1198         table_buf_ptr = table_buf;
1199
1200         prev_res_offset_in_wim = ~0ULL;
1201         prev_uncompressed_size = 0;
1202         logical_offset = 0;
1203         list_for_each_entry(blob, blob_list, blob_table_list) {
1204                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
1205                         struct wim_reshdr tmp_reshdr;
1206
1207                         /* Eww.  When WIMGAPI sees multiple solid resources, it
1208                          * expects the offsets to be adjusted as if there were
1209                          * really only one solid resource.  */
1210
1211                         if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) {
1212                                 /* Put the resource entry for solid resource  */
1213                                 tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim;
1214                                 tmp_reshdr.size_in_wim = blob->out_res_size_in_wim;
1215                                 tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER;
1216                                 tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID;
1217
1218                                 write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1219                                                       part_number, 1, zero_hash);
1220
1221                                 logical_offset += prev_uncompressed_size;
1222
1223                                 prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1224                                 prev_uncompressed_size = blob->out_res_uncompressed_size;
1225                         }
1226                         tmp_reshdr = blob->out_reshdr;
1227                         tmp_reshdr.offset_in_wim += logical_offset;
1228                         write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1229                                               part_number, blob->out_refcnt, blob->hash);
1230                 } else {
1231                         write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr,
1232                                               part_number, blob->out_refcnt, blob->hash);
1233                 }
1234
1235         }
1236         wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size);
1237
1238         /* Write the blob table uncompressed.  Although wimlib can handle a
1239          * compressed blob table, MS software cannot.  */
1240         ret = write_wim_resource_from_buffer(table_buf,
1241                                              table_size,
1242                                              true,
1243                                              out_fd,
1244                                              WIMLIB_COMPRESSION_TYPE_NONE,
1245                                              0,
1246                                              out_reshdr,
1247                                              NULL,
1248                                              write_resource_flags);
1249         FREE(table_buf);
1250         DEBUG("ret=%d", ret);
1251         return ret;
1252 }
1253
1254 /* Allocate a blob descriptor for the contents of the buffer, or re-use an
1255  * existing descriptor in @blob_table for an identical blob.  */
1256 struct blob_descriptor *
1257 new_blob_from_data_buffer(const void *buffer, size_t size,
1258                           struct blob_table *blob_table)
1259 {
1260         u8 hash[SHA1_HASH_SIZE];
1261         struct blob_descriptor *blob, *existing_blob;
1262
1263         sha1_buffer(buffer, size, hash);
1264         existing_blob = lookup_blob(blob_table, hash);
1265         if (existing_blob) {
1266                 wimlib_assert(existing_blob->size == size);
1267                 blob = existing_blob;
1268         } else {
1269                 void *buffer_copy;
1270                 blob = new_blob_descriptor();
1271                 if (blob == NULL)
1272                         return NULL;
1273                 buffer_copy = memdup(buffer, size);
1274                 if (buffer_copy == NULL) {
1275                         free_blob_descriptor(blob);
1276                         return NULL;
1277                 }
1278                 blob->blob_location = BLOB_IN_ATTACHED_BUFFER;
1279                 blob->attached_buffer = buffer_copy;
1280                 blob->size = size;
1281                 copy_hash(blob->hash, hash);
1282                 blob_table_insert(blob_table, blob);
1283         }
1284         return blob;
1285 }
1286
1287 struct blob_descriptor *
1288 after_blob_hashed(struct blob_descriptor *blob,
1289                   struct blob_descriptor **back_ptr,
1290                   struct blob_table *blob_table)
1291 {
1292         struct blob_descriptor *duplicate_blob;
1293
1294         list_del(&blob->unhashed_list);
1295         blob->unhashed = 0;
1296
1297         /* Look for a duplicate blob  */
1298         duplicate_blob = lookup_blob(blob_table, blob->hash);
1299         if (duplicate_blob) {
1300                 /* We have a duplicate blob.  Transfer the reference counts from
1301                  * this blob to the duplicate and update the reference to this
1302                  * blob (from a stream) to point to the duplicate.  The caller
1303                  * is responsible for freeing @blob if needed.  */
1304                 wimlib_assert(duplicate_blob->size == blob->size);
1305                 duplicate_blob->refcnt += blob->refcnt;
1306                 blob->refcnt = 0;
1307                 *back_ptr = duplicate_blob;
1308                 return duplicate_blob;
1309         } else {
1310                 /* No duplicate blob, so we need to insert this blob into the
1311                  * blob table and treat it as a hashed blob.  */
1312                 blob_table_insert(blob_table, blob);
1313                 return blob;
1314         }
1315 }
1316
1317 /*
1318  * Calculate the SHA-1 message digest of a blob and move its descriptor from the
1319  * list of unhashed blobs to the blob table, possibly joining it with an
1320  * identical blob.
1321  *
1322  * @blob:
1323  *      The blob to hash
1324  * @blob_table:
1325  *      The blob table in which the blob needs to be indexed
1326  * @blob_ret:
1327  *      On success, a pointer to the resulting blob descriptor is written to
1328  *      this location.  This will be the same as @blob if it was inserted into
1329  *      the blob table, or different if a duplicate blob was found.
1330  *
1331  * Returns 0 on success; nonzero if there is an error reading the blob data.
1332  */
1333 int
1334 hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
1335                    struct blob_descriptor **blob_ret)
1336 {
1337         struct blob_descriptor **back_ptr;
1338         int ret;
1339
1340         back_ptr = retrieve_pointer_to_unhashed_blob(blob);
1341
1342         ret = sha1_blob(blob);
1343         if (ret)
1344                 return ret;
1345
1346         *blob_ret = after_blob_hashed(blob, back_ptr, blob_table);
1347         return 0;
1348 }
1349
1350 void
1351 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
1352                               struct wimlib_resource_entry *wentry)
1353 {
1354         memset(wentry, 0, sizeof(*wentry));
1355
1356         wentry->uncompressed_size = blob->size;
1357         if (blob->blob_location == BLOB_IN_WIM) {
1358                 unsigned res_flags = blob->rdesc->flags;
1359
1360                 wentry->part_number = blob->rdesc->wim->hdr.part_number;
1361                 if (res_flags & WIM_RESHDR_FLAG_SOLID) {
1362                         wentry->offset = blob->offset_in_res;
1363                 } else {
1364                         wentry->compressed_size = blob->rdesc->size_in_wim;
1365                         wentry->offset = blob->rdesc->offset_in_wim;
1366                 }
1367                 wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim;
1368                 wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim;
1369                 wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size;
1370
1371                 wentry->is_compressed = (res_flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
1372                 wentry->is_free = (res_flags & WIM_RESHDR_FLAG_FREE) != 0;
1373                 wentry->is_spanned = (res_flags & WIM_RESHDR_FLAG_SPANNED) != 0;
1374                 wentry->packed = (res_flags & WIM_RESHDR_FLAG_SOLID) != 0;
1375         }
1376         copy_hash(wentry->sha1_hash, blob->hash);
1377         wentry->reference_count = blob->refcnt;
1378         wentry->is_metadata = blob->is_metadata;
1379 }
1380
1381 struct iterate_blob_context {
1382         wimlib_iterate_lookup_table_callback_t cb;
1383         void *user_ctx;
1384 };
1385
1386 static int
1387 do_iterate_blob(struct blob_descriptor *blob, void *_ctx)
1388 {
1389         struct iterate_blob_context *ctx = _ctx;
1390         struct wimlib_resource_entry entry;
1391
1392         blob_to_wimlib_resource_entry(blob, &entry);
1393         return (*ctx->cb)(&entry, ctx->user_ctx);
1394 }
1395
1396 /* API function documented in wimlib.h  */
1397 WIMLIBAPI int
1398 wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
1399                             wimlib_iterate_lookup_table_callback_t cb,
1400                             void *user_ctx)
1401 {
1402         if (flags != 0)
1403                 return WIMLIB_ERR_INVALID_PARAM;
1404
1405         struct iterate_blob_context ctx = {
1406                 .cb = cb,
1407                 .user_ctx = user_ctx,
1408         };
1409         if (wim_has_metadata(wim)) {
1410                 int ret;
1411                 for (int i = 0; i < wim->hdr.image_count; i++) {
1412                         ret = do_iterate_blob(wim->image_metadata[i]->metadata_blob,
1413                                               &ctx);
1414                         if (ret)
1415                                 return ret;
1416                 }
1417         }
1418         return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx);
1419 }