New helper: wim_reshdr_to_desc_and_blob()
[wimlib] / src / blob_table.c
1 /*
2  * blob_table.c
3  *
4  * A blob table maps SHA-1 message digests to "blobs", which are nonempty
5  * sequences of binary data.  Within a WIM file, blobs are single-instanced.
6  *
7  * This file also contains code to read and write the corresponding on-disk
8  * representation of this table in the WIM file format.
9  */
10
11 /*
12  * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
13  *
14  * This file is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option) any
17  * later version.
18  *
19  * This file is distributed in the hope that it will be useful, but WITHOUT
20  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with this file; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h> /* for unlink()  */
35
36 #include "wimlib/assert.h"
37 #include "wimlib/blob_table.h"
38 #include "wimlib/encoding.h"
39 #include "wimlib/endianness.h"
40 #include "wimlib/error.h"
41 #include "wimlib/metadata.h"
42 #include "wimlib/ntfs_3g.h"
43 #include "wimlib/resource.h"
44 #include "wimlib/unaligned.h"
45 #include "wimlib/util.h"
46 #include "wimlib/write.h"
47
48 /* A hash table mapping SHA-1 message digests to blob descriptors  */
49 struct blob_table {
50         struct hlist_head *array;
51         size_t num_blobs;
52         size_t capacity;
53 };
54
55 struct blob_table *
56 new_blob_table(size_t capacity)
57 {
58         struct blob_table *table;
59         struct hlist_head *array;
60
61         table = MALLOC(sizeof(struct blob_table));
62         if (table == NULL)
63                 goto oom;
64
65         array = CALLOC(capacity, sizeof(array[0]));
66         if (array == NULL) {
67                 FREE(table);
68                 goto oom;
69         }
70
71         table->num_blobs = 0;
72         table->capacity = capacity;
73         table->array = array;
74         return table;
75
76 oom:
77         ERROR("Failed to allocate memory for blob table "
78               "with capacity %zu", capacity);
79         return NULL;
80 }
81
82 static int
83 do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore)
84 {
85         free_blob_descriptor(blob);
86         return 0;
87 }
88
89 void
90 free_blob_table(struct blob_table *table)
91 {
92         if (table) {
93                 for_blob_in_table(table, do_free_blob_descriptor, NULL);
94                 FREE(table->array);
95                 FREE(table);
96         }
97 }
98
99 struct blob_descriptor *
100 new_blob_descriptor(void)
101 {
102         BUILD_BUG_ON(BLOB_NONEXISTENT != 0);
103         return CALLOC(1, sizeof(struct blob_descriptor));
104 }
105
106 struct blob_descriptor *
107 clone_blob_descriptor(const struct blob_descriptor *old)
108 {
109         struct blob_descriptor *new;
110
111         new = memdup(old, sizeof(struct blob_descriptor));
112         if (new == NULL)
113                 return NULL;
114
115         switch (new->blob_location) {
116         case BLOB_IN_WIM:
117                 list_add(&new->rdesc_node, &new->rdesc->blob_list);
118                 break;
119
120         case BLOB_IN_FILE_ON_DISK:
121 #ifdef __WIN32__
122         case BLOB_IN_WINNT_FILE_ON_DISK:
123         case BLOB_WIN32_ENCRYPTED:
124 #endif
125 #ifdef WITH_FUSE
126         case BLOB_IN_STAGING_FILE:
127                 BUILD_BUG_ON((void*)&old->file_on_disk !=
128                              (void*)&old->staging_file_name);
129 #endif
130                 new->file_on_disk = TSTRDUP(old->file_on_disk);
131                 if (new->file_on_disk == NULL)
132                         goto out_free;
133                 break;
134         case BLOB_IN_ATTACHED_BUFFER:
135                 new->attached_buffer = memdup(old->attached_buffer, old->size);
136                 if (new->attached_buffer == NULL)
137                         goto out_free;
138                 break;
139 #ifdef WITH_NTFS_3G
140         case BLOB_IN_NTFS_VOLUME:
141                 new->ntfs_loc = clone_ntfs_location(old->ntfs_loc);
142                 if (!new->ntfs_loc)
143                         goto out_free;
144                 break;
145 #endif
146         }
147         return new;
148
149 out_free:
150         free_blob_descriptor(new);
151         return NULL;
152 }
153
154 static void
155 blob_release_location(struct blob_descriptor *blob)
156 {
157         switch (blob->blob_location) {
158         case BLOB_IN_WIM:
159                 list_del(&blob->rdesc_node);
160                 if (list_empty(&blob->rdesc->blob_list))
161                         FREE(blob->rdesc);
162                 break;
163         case BLOB_IN_FILE_ON_DISK:
164 #ifdef __WIN32__
165         case BLOB_IN_WINNT_FILE_ON_DISK:
166         case BLOB_WIN32_ENCRYPTED:
167 #endif
168 #ifdef WITH_FUSE
169         case BLOB_IN_STAGING_FILE:
170                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
171                              (void*)&blob->staging_file_name);
172 #endif
173         case BLOB_IN_ATTACHED_BUFFER:
174                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
175                              (void*)&blob->attached_buffer);
176                 FREE(blob->file_on_disk);
177                 break;
178 #ifdef WITH_NTFS_3G
179         case BLOB_IN_NTFS_VOLUME:
180                 if (blob->ntfs_loc)
181                         free_ntfs_location(blob->ntfs_loc);
182                 break;
183 #endif
184         }
185 }
186
187 void
188 free_blob_descriptor(struct blob_descriptor *blob)
189 {
190         if (blob) {
191                 blob_release_location(blob);
192                 FREE(blob);
193         }
194 }
195
196 /* Should this blob be retained even if it has no references?  */
197 static bool
198 should_retain_blob(const struct blob_descriptor *blob)
199 {
200         return blob->blob_location == BLOB_IN_WIM;
201 }
202
203 static void
204 finalize_blob(struct blob_descriptor *blob)
205 {
206         if (!should_retain_blob(blob))
207                 free_blob_descriptor(blob);
208 }
209
210 /*
211  * Decrements the reference count of the specified blob, which must be either
212  * (a) unhashed, or (b) inserted in the specified blob table.
213  *
214  * If the blob's reference count reaches 0, we may unlink it from @table and
215  * free it.  However, we retain blobs with 0 reference count that originated
216  * from WIM files (BLOB_IN_WIM).  We do this for two reasons:
217  *
218  * 1. This prevents information about valid blobs in a WIM file --- blobs which
219  *    will continue to be present after appending to the WIM file --- from being
220  *    lost merely because we dropped all references to them.
221  *
222  * 2. Blob reference counts we read from WIM files can't be trusted.  It's
223  *    possible that a WIM has reference counts that are too low; WIMGAPI
224  *    sometimes creates WIMs where this is the case.  It's also possible that
225  *    blobs have been referenced from an external WIM; those blobs can
226  *    potentially have any reference count at all, either lower or higher than
227  *    would be expected for this WIM ("this WIM" meaning the owner of @table) if
228  *    it were a standalone WIM.
229  *
230  * So we can't take the reference counts too seriously.  But at least, we do
231  * recalculate by default when writing a new WIM file.
232  */
233 void
234 blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table)
235 {
236         blob_subtract_refcnt(blob, table, 1);
237 }
238
239 void
240 blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table,
241                      u32 count)
242 {
243         if (unlikely(blob->refcnt < count)) {
244                 blob->refcnt = 0; /* See comment above  */
245                 return;
246         }
247
248         blob->refcnt -= count;
249
250         if (blob->refcnt != 0)
251                 return;
252
253         if (blob->unhashed) {
254                 list_del(&blob->unhashed_list);
255         #ifdef WITH_FUSE
256                 /* If the blob has been extracted to a staging file for a FUSE
257                  * mount, unlink the staging file.  (Note that there still may
258                  * be open file descriptors to it.)  */
259                 if (blob->blob_location == BLOB_IN_STAGING_FILE)
260                         unlinkat(blob->staging_dir_fd,
261                                  blob->staging_file_name, 0);
262         #endif
263         } else {
264                 if (!should_retain_blob(blob))
265                         blob_table_unlink(table, blob);
266         }
267
268         /* If FUSE mounts are enabled, then don't actually free the blob
269          * descriptor until the last file descriptor to it has been closed.  */
270 #ifdef WITH_FUSE
271         if (blob->num_opened_fds == 0)
272 #endif
273                 finalize_blob(blob);
274 }
275
276 #ifdef WITH_FUSE
277 void
278 blob_decrement_num_opened_fds(struct blob_descriptor *blob)
279 {
280         wimlib_assert(blob->num_opened_fds != 0);
281
282         if (--blob->num_opened_fds == 0 && blob->refcnt == 0)
283                 finalize_blob(blob);
284 }
285 #endif
286
287 static void
288 blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob)
289 {
290         size_t i = blob->hash_short % table->capacity;
291
292         hlist_add_head(&blob->hash_list, &table->array[i]);
293 }
294
295 static void
296 enlarge_blob_table(struct blob_table *table)
297 {
298         size_t old_capacity, new_capacity;
299         struct hlist_head *old_array, *new_array;
300         struct blob_descriptor *blob;
301         struct hlist_node *tmp;
302         size_t i;
303
304         old_capacity = table->capacity;
305         new_capacity = old_capacity * 2;
306         new_array = CALLOC(new_capacity, sizeof(struct hlist_head));
307         if (new_array == NULL)
308                 return;
309         old_array = table->array;
310         table->array = new_array;
311         table->capacity = new_capacity;
312
313         for (i = 0; i < old_capacity; i++) {
314                 hlist_for_each_entry_safe(blob, tmp, &old_array[i], hash_list) {
315                         hlist_del(&blob->hash_list);
316                         blob_table_insert_raw(table, blob);
317                 }
318         }
319         FREE(old_array);
320 }
321
322 /* Insert a blob descriptor into the blob table.  */
323 void
324 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob)
325 {
326         blob_table_insert_raw(table, blob);
327         if (++table->num_blobs > table->capacity)
328                 enlarge_blob_table(table);
329 }
330
331 /* Unlinks a blob descriptor from the blob table; does not free it.  */
332 void
333 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob)
334 {
335         wimlib_assert(!blob->unhashed);
336         wimlib_assert(table->num_blobs != 0);
337
338         hlist_del(&blob->hash_list);
339         table->num_blobs--;
340 }
341
342 /* Given a SHA-1 message digest, return the corresponding blob descriptor from
343  * the specified blob table, or NULL if there is none.  */
344 struct blob_descriptor *
345 lookup_blob(const struct blob_table *table, const u8 *hash)
346 {
347         size_t i;
348         struct blob_descriptor *blob;
349
350         i = load_size_t_unaligned(hash) % table->capacity;
351         hlist_for_each_entry(blob, &table->array[i], hash_list)
352                 if (hashes_equal(hash, blob->hash))
353                         return blob;
354         return NULL;
355 }
356
357 /* Call a function on all blob descriptors in the specified blob table.  Stop
358  * early and return nonzero if any call to the function returns nonzero.  */
359 int
360 for_blob_in_table(struct blob_table *table,
361                   int (*visitor)(struct blob_descriptor *, void *), void *arg)
362 {
363         struct blob_descriptor *blob;
364         struct hlist_node *tmp;
365         int ret;
366
367         for (size_t i = 0; i < table->capacity; i++) {
368                 hlist_for_each_entry_safe(blob, tmp, &table->array[i],
369                                           hash_list)
370                 {
371                         ret = visitor(blob, arg);
372                         if (ret)
373                                 return ret;
374                 }
375         }
376         return 0;
377 }
378
379 /*
380  * This is a qsort() callback that sorts blobs into an order optimized for
381  * reading.  Sorting is done primarily by blob location, then secondarily by a
382  * location-dependent order.  For example, blobs in WIM resources are sorted
383  * such that the underlying WIM files will be read sequentially.  This is
384  * especially important for WIM files containing solid resources.
385  */
386 int
387 cmp_blobs_by_sequential_order(const void *p1, const void *p2)
388 {
389         const struct blob_descriptor *blob1, *blob2;
390         int v;
391         WIMStruct *wim1, *wim2;
392
393         blob1 = *(const struct blob_descriptor**)p1;
394         blob2 = *(const struct blob_descriptor**)p2;
395
396         v = (int)blob1->blob_location - (int)blob2->blob_location;
397
398         /* Different locations?  */
399         if (v)
400                 return v;
401
402         switch (blob1->blob_location) {
403         case BLOB_IN_WIM:
404                 wim1 = blob1->rdesc->wim;
405                 wim2 = blob2->rdesc->wim;
406
407                 /* Different (possibly split) WIMs?  */
408                 if (wim1 != wim2) {
409                         v = cmp_guids(wim1->hdr.guid, wim2->hdr.guid);
410                         if (v)
411                                 return v;
412                 }
413
414                 /* Different part numbers in the same WIM?  */
415                 v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number;
416                 if (v)
417                         return v;
418
419                 if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim)
420                         return cmp_u64(blob1->rdesc->offset_in_wim,
421                                        blob2->rdesc->offset_in_wim);
422
423                 return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
424
425         case BLOB_IN_FILE_ON_DISK:
426 #ifdef WITH_FUSE
427         case BLOB_IN_STAGING_FILE:
428 #endif
429 #ifdef __WIN32__
430         case BLOB_IN_WINNT_FILE_ON_DISK:
431         case BLOB_WIN32_ENCRYPTED:
432                 /* Windows: compare by starting LCN (logical cluster number)  */
433                 v = cmp_u64(blob1->sort_key, blob2->sort_key);
434                 if (v)
435                         return v;
436 #endif
437                 /* Compare files by path: just a heuristic that will place files
438                  * in the same directory next to each other.  */
439                 return tstrcmp(blob1->file_on_disk, blob2->file_on_disk);
440 #ifdef WITH_NTFS_3G
441         case BLOB_IN_NTFS_VOLUME:
442                 return cmp_ntfs_locations(blob1->ntfs_loc, blob2->ntfs_loc);
443 #endif
444         default:
445                 /* No additional sorting order defined for this resource
446                  * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare
447                  * everything equal to each other.  */
448                 return 0;
449         }
450 }
451
452 int
453 sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
454                int (*compar)(const void *, const void*))
455 {
456         struct list_head *cur;
457         struct blob_descriptor **array;
458         size_t i;
459         size_t array_size;
460         size_t num_blobs = 0;
461
462         list_for_each(cur, blob_list)
463                 num_blobs++;
464
465         if (num_blobs <= 1)
466                 return 0;
467
468         array_size = num_blobs * sizeof(array[0]);
469         array = MALLOC(array_size);
470         if (array == NULL)
471                 return WIMLIB_ERR_NOMEM;
472
473         cur = blob_list->next;
474         for (i = 0; i < num_blobs; i++) {
475                 array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset);
476                 cur = cur->next;
477         }
478
479         qsort(array, num_blobs, sizeof(array[0]), compar);
480
481         INIT_LIST_HEAD(blob_list);
482         for (i = 0; i < num_blobs; i++) {
483                 list_add_tail((struct list_head*)
484                                ((u8*)array[i] + list_head_offset), blob_list);
485         }
486         FREE(array);
487         return 0;
488 }
489
490 /* Sort the specified list of blobs in an order optimized for sequential
491  * reading.  */
492 int
493 sort_blob_list_by_sequential_order(struct list_head *blob_list,
494                                    size_t list_head_offset)
495 {
496         return sort_blob_list(blob_list, list_head_offset,
497                               cmp_blobs_by_sequential_order);
498 }
499
500 static int
501 add_blob_to_array(struct blob_descriptor *blob, void *_pp)
502 {
503         struct blob_descriptor ***pp = _pp;
504         *(*pp)++ = blob;
505         return 0;
506 }
507
508 /* Iterate through the blob descriptors in the specified blob table in an order
509  * optimized for sequential reading.  */
510 int
511 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
512                                              int (*visitor)(struct blob_descriptor *, void *),
513                                              void *arg)
514 {
515         struct blob_descriptor **blob_array, **p;
516         size_t num_blobs = table->num_blobs;
517         int ret;
518
519         blob_array = MALLOC(num_blobs * sizeof(blob_array[0]));
520         if (!blob_array)
521                 return WIMLIB_ERR_NOMEM;
522         p = blob_array;
523         for_blob_in_table(table, add_blob_to_array, &p);
524
525         wimlib_assert(p == blob_array + num_blobs);
526
527         qsort(blob_array, num_blobs, sizeof(blob_array[0]),
528               cmp_blobs_by_sequential_order);
529         ret = 0;
530         for (size_t i = 0; i < num_blobs; i++) {
531                 ret = visitor(blob_array[i], arg);
532                 if (ret)
533                         break;
534         }
535         FREE(blob_array);
536         return ret;
537 }
538
539 /* On-disk format of a blob descriptor in a WIM file.
540  *
541  * Note: if the WIM file contains solid resource(s), then this structure is
542  * sometimes overloaded to describe a "resource" rather than a "blob".  See the
543  * code for details.  */
544 struct blob_descriptor_disk {
545
546         /* Size, offset, and flags of the blob.  */
547         struct wim_reshdr_disk reshdr;
548
549         /* Which part of the split WIM this blob is in; indexed from 1. */
550         le16 part_number;
551
552         /* Reference count of this blob over all WIM images.  (But see comment
553          * above blob_decrement_refcnt().)  */
554         le32 refcnt;
555
556         /* SHA-1 message digest of the uncompressed data of this blob, or all
557          * zeroes if this blob is of zero length.  */
558         u8 hash[SHA1_HASH_SIZE];
559 } _packed_attribute;
560
561 /* Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
562  * count how many specify resources (as opposed to blobs within those
563  * resources).
564  *
565  * Returns the resulting count.  */
566 static size_t
567 count_solid_resources(const struct blob_descriptor_disk *entries, size_t max)
568 {
569         size_t count = 0;
570         do {
571                 struct wim_reshdr reshdr;
572
573                 get_wim_reshdr(&(entries++)->reshdr, &reshdr);
574
575                 if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) {
576                         /* Run was terminated by a stand-alone blob entry.  */
577                         break;
578                 }
579
580                 if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
581                         /* This is a resource entry.  */
582                         count++;
583                 }
584         } while (--max);
585         return count;
586 }
587
588 /*
589  * Given a run of consecutive blob descriptors with the SOLID flag set and
590  * having @num_rdescs resource entries, load resource information from them into
591  * the resource descriptors in the @rdescs array.
592  *
593  * Returns 0 on success, or a nonzero error code on failure.
594  */
595 static int
596 do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs,
597                    size_t num_rdescs,
598                    const struct blob_descriptor_disk *entries)
599 {
600         for (size_t i = 0; i < num_rdescs; i++) {
601                 struct wim_reshdr reshdr;
602                 struct alt_chunk_table_header_disk hdr;
603                 struct wim_resource_descriptor *rdesc;
604                 int ret;
605
606                 /* Advance to next resource entry.  */
607
608                 do {
609                         get_wim_reshdr(&(entries++)->reshdr, &reshdr);
610                 } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER);
611
612                 rdesc = rdescs[i];
613
614                 wim_reshdr_to_desc(&reshdr, wim, rdesc);
615
616                 /* For solid resources, the uncompressed size, compression type,
617                  * and chunk size are stored in the resource itself, not in the
618                  * blob table.  */
619
620                 ret = full_pread(&wim->in_fd, &hdr,
621                                  sizeof(hdr), reshdr.offset_in_wim);
622                 if (ret) {
623                         ERROR("Failed to read header of solid resource "
624                               "(offset_in_wim=%"PRIu64")",
625                               reshdr.offset_in_wim);
626                         return ret;
627                 }
628
629                 rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize);
630
631                 /* Compression format numbers must be the same as in
632                  * WIMGAPI to be compatible here.  */
633                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
634                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
635                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
636                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
637                 rdesc->compression_type = le32_to_cpu(hdr.compression_format);
638
639                 rdesc->chunk_size = le32_to_cpu(hdr.chunk_size);
640
641                 DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" "
642                       "(%"TS"/%"PRIu32") @ +%"PRIu64"",
643                       i + 1, num_rdescs,
644                       rdesc->uncompressed_size,
645                       rdesc->size_in_wim,
646                       wimlib_get_compression_type_string(rdesc->compression_type),
647                       rdesc->chunk_size,
648                       rdesc->offset_in_wim);
649         }
650         return 0;
651 }
652
653 /*
654  * Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
655  * allocate a 'struct wim_resource_descriptor' for each resource within that
656  * run.
657  *
658  * Returns 0 on success, or a nonzero error code on failure.
659  * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret.
660  */
661 static int
662 load_solid_info(WIMStruct *wim,
663                 const struct blob_descriptor_disk *entries,
664                 size_t num_remaining_entries,
665                 struct wim_resource_descriptor ***rdescs_ret,
666                 size_t *num_rdescs_ret)
667 {
668         size_t num_rdescs;
669         struct wim_resource_descriptor **rdescs;
670         size_t i;
671         int ret;
672
673         num_rdescs = count_solid_resources(entries, num_remaining_entries);
674         rdescs = CALLOC(num_rdescs, sizeof(rdescs[0]));
675         if (!rdescs)
676                 return WIMLIB_ERR_NOMEM;
677
678         for (i = 0; i < num_rdescs; i++) {
679                 rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor));
680                 if (!rdescs[i]) {
681                         ret = WIMLIB_ERR_NOMEM;
682                         goto out_free_rdescs;
683                 }
684         }
685
686         ret = do_load_solid_info(wim, rdescs, num_rdescs, entries);
687         if (ret)
688                 goto out_free_rdescs;
689
690         *rdescs_ret = rdescs;
691         *num_rdescs_ret = num_rdescs;
692         return 0;
693
694 out_free_rdescs:
695         for (i = 0; i < num_rdescs; i++)
696                 FREE(rdescs[i]);
697         FREE(rdescs);
698         return ret;
699 }
700
701 /* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor
702  * with the SOLID flag set, try to assign it to resource in the current solid
703  * run.  */
704 static int
705 assign_blob_to_solid_resource(const struct wim_reshdr *reshdr,
706                               struct blob_descriptor *blob,
707                               struct wim_resource_descriptor **rdescs,
708                               size_t num_rdescs)
709 {
710         u64 offset = reshdr->offset_in_wim;
711
712         /* XXX: This linear search will be slow in the degenerate case where the
713          * number of solid resources in the run is huge.  */
714         blob->size = reshdr->size_in_wim;
715         for (size_t i = 0; i < num_rdescs; i++) {
716                 if (offset + blob->size <= rdescs[i]->uncompressed_size) {
717                         blob_set_is_located_in_wim_resource(blob, rdescs[i], offset);
718                         return 0;
719                 }
720                 offset -= rdescs[i]->uncompressed_size;
721         }
722         ERROR("blob could not be assigned to a solid resource");
723         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
724 }
725
726 static void
727 free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
728 {
729         if (rdescs) {
730                 for (size_t i = 0; i < num_rdescs; i++)
731                         if (list_empty(&rdescs[i]->blob_list))
732                                 FREE(rdescs[i]);
733                 FREE(rdescs);
734         }
735 }
736
737 static int
738 cmp_blobs_by_offset_in_res(const void *p1, const void *p2)
739 {
740         const struct blob_descriptor *blob1, *blob2;
741
742         blob1 = *(const struct blob_descriptor**)p1;
743         blob2 = *(const struct blob_descriptor**)p2;
744
745         return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
746 }
747
748 /* Validate the size and location of a WIM resource.  */
749 static int
750 validate_resource(struct wim_resource_descriptor *rdesc)
751 {
752         struct blob_descriptor *blob;
753         bool out_of_order;
754         u64 expected_next_offset;
755         int ret;
756
757         /* Verify that the resource itself has a valid offset and size.  */
758         if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim)
759                 goto invalid_due_to_overflow;
760
761         /* Verify that each blob in the resource has a valid offset and size.
762          */
763         expected_next_offset = 0;
764         out_of_order = false;
765         list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
766                 if (blob->offset_in_res + blob->size < blob->size ||
767                     blob->offset_in_res + blob->size > rdesc->uncompressed_size)
768                         goto invalid_due_to_overflow;
769
770                 if (blob->offset_in_res >= expected_next_offset)
771                         expected_next_offset = blob->offset_in_res + blob->size;
772                 else
773                         out_of_order = true;
774         }
775
776         /* If the blobs were not located at strictly increasing positions (not
777          * allowing for overlap), sort them.  Then make sure that none overlap.
778          */
779         if (out_of_order) {
780                 ret = sort_blob_list(&rdesc->blob_list,
781                                      offsetof(struct blob_descriptor,
782                                               rdesc_node),
783                                      cmp_blobs_by_offset_in_res);
784                 if (ret)
785                         return ret;
786
787                 expected_next_offset = 0;
788                 list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
789                         if (blob->offset_in_res >= expected_next_offset)
790                                 expected_next_offset = blob->offset_in_res + blob->size;
791                         else
792                                 goto invalid_due_to_overlap;
793                 }
794         }
795
796         return 0;
797
798 invalid_due_to_overflow:
799         ERROR("Invalid blob table (offset overflow)");
800         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
801
802 invalid_due_to_overlap:
803         ERROR("Invalid blob table (blobs in solid resource overlap)");
804         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
805 }
806
807 static int
808 finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
809 {
810         int ret = 0;
811         for (size_t i = 0; i < num_rdescs; i++) {
812                 ret = validate_resource(rdescs[i]);
813                 if (ret)
814                         break;
815         }
816         free_solid_rdescs(rdescs, num_rdescs);
817         return ret;
818 }
819
820 /*
821  * read_blob_table() -
822  *
823  * Read the blob table from a WIM file.  Usually, each entry in this table
824  * describes a "blob", or equivalently a "resource", that the WIM file contains,
825  * along with its location and SHA-1 message digest.  Descriptors for
826  * non-metadata blobs will be saved in the in-memory blob table
827  * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a
828  * special location per-image (the wim->image_metadata array).
829  *
830  * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple
831  * blobs that are compressed together.  Such a resource is called a "solid
832  * resource".  Solid resources are still described in the on-disk "blob table",
833  * although the format is not the most logical.  A consecutive sequence of
834  * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run".
835  * A solid run describes a set of solid resources, each of which contains a set
836  * of blobs.  In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size
837  * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource,
838  * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid
839  * resource.  There are some oddities in how we need to determine which solid
840  * resource a blob is actually in; see the code for details.
841  *
842  * Possible return values:
843  *      WIMLIB_ERR_SUCCESS (0)
844  *      WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
845  *      WIMLIB_ERR_NOMEM
846  *
847  *      Or an error code caused by failure to read the blob table from the WIM
848  *      file.
849  */
850 int
851 read_blob_table(WIMStruct *wim)
852 {
853         int ret;
854         size_t num_entries;
855         void *buf = NULL;
856         struct blob_table *table = NULL;
857         struct blob_descriptor *cur_blob = NULL;
858         size_t num_duplicate_blobs = 0;
859         size_t num_empty_blobs = 0;
860         size_t num_wrong_part_blobs = 0;
861         u32 image_index = 0;
862         struct wim_resource_descriptor **cur_solid_rdescs = NULL;
863         size_t cur_num_solid_rdescs = 0;
864
865         DEBUG("Reading blob table.");
866
867         /* Calculate the number of entries in the blob table.  */
868         num_entries = wim->hdr.blob_table_reshdr.uncompressed_size /
869                       sizeof(struct blob_descriptor_disk);
870
871         /* Read the blob table into a buffer.  */
872         ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf);
873         if (ret)
874                 goto out;
875
876         /* Allocate a hash table to map SHA-1 message digests into blob
877          * descriptors.  This is the in-memory "blob table".  */
878         table = new_blob_table(num_entries * 2 + 1);
879         if (!table)
880                 goto oom;
881
882         /* Allocate and initalize blob descriptors from the raw blob table
883          * buffer.  */
884         for (size_t i = 0; i < num_entries; i++) {
885                 const struct blob_descriptor_disk *disk_entry =
886                         &((const struct blob_descriptor_disk*)buf)[i];
887                 struct wim_reshdr reshdr;
888                 u16 part_number;
889
890                 /* Get the resource header  */
891                 get_wim_reshdr(&disk_entry->reshdr, &reshdr);
892
893                 DEBUG("reshdr: size_in_wim=%"PRIu64", "
894                       "uncompressed_size=%"PRIu64", "
895                       "offset_in_wim=%"PRIu64", "
896                       "flags=0x%02x",
897                       reshdr.size_in_wim, reshdr.uncompressed_size,
898                       reshdr.offset_in_wim, reshdr.flags);
899
900                 /* Ignore SOLID flag if it isn't supposed to be used in this WIM
901                  * version.  */
902                 if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
903                         reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID;
904
905                 /* Allocate a new 'struct blob_descriptor'.  */
906                 cur_blob = new_blob_descriptor();
907                 if (!cur_blob)
908                         goto oom;
909
910                 /* Get the part number, reference count, and hash.  */
911                 part_number = le16_to_cpu(disk_entry->part_number);
912                 cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt);
913                 copy_hash(cur_blob->hash, disk_entry->hash);
914
915                 if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
916
917                         /* SOLID entry  */
918
919                         if (!cur_solid_rdescs) {
920                                 /* Starting new run  */
921                                 ret = load_solid_info(wim, disk_entry,
922                                                       num_entries - i,
923                                                       &cur_solid_rdescs,
924                                                       &cur_num_solid_rdescs);
925                                 if (ret)
926                                         goto out;
927                         }
928
929                         if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
930                                 /* Resource entry, not blob entry  */
931                                 goto free_cur_blob_and_continue;
932                         }
933
934                         /* Blob entry  */
935
936                         ret = assign_blob_to_solid_resource(&reshdr,
937                                                             cur_blob,
938                                                             cur_solid_rdescs,
939                                                             cur_num_solid_rdescs);
940                         if (ret)
941                                 goto out;
942
943                 } else {
944                         /* Normal blob/resource entry; SOLID not set.  */
945
946                         struct wim_resource_descriptor *rdesc;
947
948                         if (unlikely(cur_solid_rdescs)) {
949                                 /* This entry terminated a solid run.  */
950                                 ret = finish_solid_rdescs(cur_solid_rdescs,
951                                                           cur_num_solid_rdescs);
952                                 cur_solid_rdescs = NULL;
953                                 if (ret)
954                                         goto out;
955                         }
956
957                         if (unlikely(!(reshdr.flags & WIM_RESHDR_FLAG_COMPRESSED) &&
958                                      (reshdr.size_in_wim != reshdr.uncompressed_size)))
959                         {
960                                 ERROR("Uncompressed resource has "
961                                       "size_in_wim != uncompressed_size");
962                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
963                                 goto out;
964                         }
965
966                         /* Set up a resource descriptor for this blob.  */
967
968                         rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
969                         if (!rdesc)
970                                 goto oom;
971
972                         wim_reshdr_to_desc_and_blob(&reshdr, wim, rdesc, cur_blob);
973                 }
974
975                 /* cur_blob is now a blob bound to a resource.  */
976
977                 /* Ignore entries with all zeroes in the hash field.  */
978                 if (unlikely(is_zero_hash(cur_blob->hash)))
979                         goto free_cur_blob_and_continue;
980
981                 /* Verify that the blob has nonzero size.  */
982                 if (unlikely(cur_blob->size == 0)) {
983                         num_empty_blobs++;
984                         goto free_cur_blob_and_continue;
985                 }
986
987                 /* Verify that the part number matches that of the underlying
988                  * WIM file.  */
989                 if (unlikely(part_number != wim->hdr.part_number)) {
990                         num_wrong_part_blobs++;
991                         goto free_cur_blob_and_continue;
992                 }
993
994                 if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
995
996                         cur_blob->is_metadata = 1;
997
998                         /* Blob table entry for a metadata resource.  */
999
1000                         /* Metadata entries with no references must be ignored.
1001                          * See, for example, the WinPE WIMs from the WAIK v2.1.
1002                          */
1003                         if (cur_blob->refcnt == 0)
1004                                 goto free_cur_blob_and_continue;
1005
1006                         if (cur_blob->refcnt != 1) {
1007                                 /* We don't currently support this case due to
1008                                  * the complications of multiple images sharing
1009                                  * the same metadata resource or a metadata
1010                                  * resource also being referenced by files.  */
1011                                 ERROR("Found metadata resource with refcnt != 1");
1012                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1013                                 goto out;
1014                         }
1015
1016                         if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
1017                                 ERROR("Image metadata in solid resources "
1018                                       "is unsupported.");
1019                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1020                                 goto out;
1021                         }
1022
1023                         if (wim->hdr.part_number != 1) {
1024                                 WARNING("Ignoring metadata resource found in a "
1025                                         "non-first part of the split WIM");
1026                                 goto free_cur_blob_and_continue;
1027                         }
1028
1029                         /* The number of entries in the blob table with
1030                          * WIM_RESHDR_FLAG_METADATA set should be the same as
1031                          * the image_count field in the WIM header.  */
1032                         if (image_index == wim->hdr.image_count) {
1033                                 WARNING("Found more metadata resources than images");
1034                                 goto free_cur_blob_and_continue;
1035                         }
1036
1037                         /* Notice very carefully:  We are assigning the metadata
1038                          * resources to images in the same order in which their
1039                          * blob table entries occur on disk.  (This is also the
1040                          * behavior of Microsoft's software.)  In particular,
1041                          * this overrides the actual locations of the metadata
1042                          * resources themselves in the WIM file as well as any
1043                          * information written in the XML data.  */
1044                         DEBUG("Found metadata resource for image %"PRIu32" at "
1045                               "offset %"PRIu64".",
1046                               image_index + 1,
1047                               reshdr.offset_in_wim);
1048
1049                         wim->image_metadata[image_index++]->metadata_blob = cur_blob;
1050                 } else {
1051                         /* Blob table entry for a non-metadata blob.  */
1052
1053                         /* Ignore this blob if it's a duplicate.  */
1054                         if (lookup_blob(table, cur_blob->hash)) {
1055                                 num_duplicate_blobs++;
1056                                 goto free_cur_blob_and_continue;
1057                         }
1058
1059                         /* Insert the blob into the in-memory blob table, keyed
1060                          * by its SHA-1 message digest.  */
1061                         blob_table_insert(table, cur_blob);
1062                 }
1063
1064                 continue;
1065
1066         free_cur_blob_and_continue:
1067                 if (cur_solid_rdescs &&
1068                     cur_blob->blob_location == BLOB_IN_WIM)
1069                         blob_unset_is_located_in_wim_resource(cur_blob);
1070                 free_blob_descriptor(cur_blob);
1071         }
1072         cur_blob = NULL;
1073
1074         if (cur_solid_rdescs) {
1075                 /* End of blob table terminated a solid run.  */
1076                 ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1077                 cur_solid_rdescs = NULL;
1078                 if (ret)
1079                         goto out;
1080         }
1081
1082         if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) {
1083                 WARNING("Could not find metadata resources for all images");
1084                 for (u32 i = image_index; i < wim->hdr.image_count; i++)
1085                         put_image_metadata(wim->image_metadata[i], NULL);
1086                 wim->hdr.image_count = image_index;
1087         }
1088
1089         if (num_duplicate_blobs > 0)
1090                 WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs);
1091
1092         if (num_empty_blobs > 0)
1093                 WARNING("Ignoring %zu empty blobs", num_empty_blobs);
1094
1095         if (num_wrong_part_blobs > 0) {
1096                 WARNING("Ignoring %zu blobs with wrong part number",
1097                         num_wrong_part_blobs);
1098         }
1099
1100         DEBUG("Done reading blob table.");
1101         wim->blob_table = table;
1102         ret = 0;
1103         goto out_free_buf;
1104
1105 oom:
1106         ERROR("Not enough memory to read blob table!");
1107         ret = WIMLIB_ERR_NOMEM;
1108 out:
1109         free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1110         free_blob_descriptor(cur_blob);
1111         free_blob_table(table);
1112 out_free_buf:
1113         FREE(buf);
1114         return ret;
1115 }
1116
1117 static void
1118 write_blob_descriptor(struct blob_descriptor_disk *disk_entry,
1119                       const struct wim_reshdr *out_reshdr,
1120                       u16 part_number, u32 refcnt, const u8 *hash)
1121 {
1122         put_wim_reshdr(out_reshdr, &disk_entry->reshdr);
1123         disk_entry->part_number = cpu_to_le16(part_number);
1124         disk_entry->refcnt = cpu_to_le32(refcnt);
1125         copy_hash(disk_entry->hash, hash);
1126 }
1127
1128 /* Note: the list of blob descriptors must be sorted so that all entries for the
1129  * same solid resource are consecutive.  In addition, blob descriptors for
1130  * metadata resources must be in the same order as the indices of the underlying
1131  * images.  */
1132 int
1133 write_blob_table_from_blob_list(struct list_head *blob_list,
1134                                 struct filedes *out_fd,
1135                                 u16 part_number,
1136                                 struct wim_reshdr *out_reshdr,
1137                                 int write_resource_flags)
1138 {
1139         size_t table_size;
1140         struct blob_descriptor *blob;
1141         struct blob_descriptor_disk *table_buf;
1142         struct blob_descriptor_disk *table_buf_ptr;
1143         int ret;
1144         u64 prev_res_offset_in_wim = ~0ULL;
1145         u64 prev_uncompressed_size;
1146         u64 logical_offset;
1147
1148         table_size = 0;
1149         list_for_each_entry(blob, blob_list, blob_table_list) {
1150                 table_size += sizeof(struct blob_descriptor_disk);
1151
1152                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID &&
1153                     blob->out_res_offset_in_wim != prev_res_offset_in_wim)
1154                 {
1155                         table_size += sizeof(struct blob_descriptor_disk);
1156                         prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1157                 }
1158         }
1159
1160         DEBUG("Writing WIM blob table (size=%zu, offset=%"PRIu64")",
1161               table_size, out_fd->offset);
1162
1163         table_buf = MALLOC(table_size);
1164         if (table_buf == NULL) {
1165                 ERROR("Failed to allocate %zu bytes for temporary blob table",
1166                       table_size);
1167                 return WIMLIB_ERR_NOMEM;
1168         }
1169         table_buf_ptr = table_buf;
1170
1171         prev_res_offset_in_wim = ~0ULL;
1172         prev_uncompressed_size = 0;
1173         logical_offset = 0;
1174         list_for_each_entry(blob, blob_list, blob_table_list) {
1175                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
1176                         struct wim_reshdr tmp_reshdr;
1177
1178                         /* Eww.  When WIMGAPI sees multiple solid resources, it
1179                          * expects the offsets to be adjusted as if there were
1180                          * really only one solid resource.  */
1181
1182                         if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) {
1183                                 /* Put the resource entry for solid resource  */
1184                                 tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim;
1185                                 tmp_reshdr.size_in_wim = blob->out_res_size_in_wim;
1186                                 tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER;
1187                                 tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID;
1188
1189                                 write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1190                                                       part_number, 1, zero_hash);
1191
1192                                 logical_offset += prev_uncompressed_size;
1193
1194                                 prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1195                                 prev_uncompressed_size = blob->out_res_uncompressed_size;
1196                         }
1197                         tmp_reshdr = blob->out_reshdr;
1198                         tmp_reshdr.offset_in_wim += logical_offset;
1199                         write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1200                                               part_number, blob->out_refcnt, blob->hash);
1201                 } else {
1202                         write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr,
1203                                               part_number, blob->out_refcnt, blob->hash);
1204                 }
1205
1206         }
1207         wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size);
1208
1209         /* Write the blob table uncompressed.  Although wimlib can handle a
1210          * compressed blob table, MS software cannot.  */
1211         ret = write_wim_resource_from_buffer(table_buf,
1212                                              table_size,
1213                                              true,
1214                                              out_fd,
1215                                              WIMLIB_COMPRESSION_TYPE_NONE,
1216                                              0,
1217                                              out_reshdr,
1218                                              NULL,
1219                                              write_resource_flags);
1220         FREE(table_buf);
1221         DEBUG("ret=%d", ret);
1222         return ret;
1223 }
1224
1225 /* Allocate a blob descriptor for the contents of the buffer, or re-use an
1226  * existing descriptor in @blob_table for an identical blob.  */
1227 struct blob_descriptor *
1228 new_blob_from_data_buffer(const void *buffer, size_t size,
1229                           struct blob_table *blob_table)
1230 {
1231         u8 hash[SHA1_HASH_SIZE];
1232         struct blob_descriptor *blob;
1233         void *buffer_copy;
1234
1235         sha1_buffer(buffer, size, hash);
1236
1237         blob = lookup_blob(blob_table, hash);
1238         if (blob)
1239                 return blob;
1240
1241         blob = new_blob_descriptor();
1242         if (!blob)
1243                 return NULL;
1244
1245         buffer_copy = memdup(buffer, size);
1246         if (!buffer_copy) {
1247                 free_blob_descriptor(blob);
1248                 return NULL;
1249         }
1250         blob_set_is_located_in_attached_buffer(blob, buffer_copy, size);
1251         copy_hash(blob->hash, hash);
1252         blob_table_insert(blob_table, blob);
1253         return blob;
1254 }
1255
1256 struct blob_descriptor *
1257 after_blob_hashed(struct blob_descriptor *blob,
1258                   struct blob_descriptor **back_ptr,
1259                   struct blob_table *blob_table)
1260 {
1261         struct blob_descriptor *duplicate_blob;
1262
1263         list_del(&blob->unhashed_list);
1264         blob->unhashed = 0;
1265
1266         /* Look for a duplicate blob  */
1267         duplicate_blob = lookup_blob(blob_table, blob->hash);
1268         if (duplicate_blob) {
1269                 /* We have a duplicate blob.  Transfer the reference counts from
1270                  * this blob to the duplicate and update the reference to this
1271                  * blob (from a stream) to point to the duplicate.  The caller
1272                  * is responsible for freeing @blob if needed.  */
1273                 wimlib_assert(duplicate_blob->size == blob->size);
1274                 duplicate_blob->refcnt += blob->refcnt;
1275                 blob->refcnt = 0;
1276                 *back_ptr = duplicate_blob;
1277                 return duplicate_blob;
1278         } else {
1279                 /* No duplicate blob, so we need to insert this blob into the
1280                  * blob table and treat it as a hashed blob.  */
1281                 blob_table_insert(blob_table, blob);
1282                 return blob;
1283         }
1284 }
1285
1286 /*
1287  * Calculate the SHA-1 message digest of a blob and move its descriptor from the
1288  * list of unhashed blobs to the blob table, possibly joining it with an
1289  * identical blob.
1290  *
1291  * @blob:
1292  *      The blob to hash
1293  * @blob_table:
1294  *      The blob table in which the blob needs to be indexed
1295  * @blob_ret:
1296  *      On success, a pointer to the resulting blob descriptor is written to
1297  *      this location.  This will be the same as @blob if it was inserted into
1298  *      the blob table, or different if a duplicate blob was found.
1299  *
1300  * Returns 0 on success; nonzero if there is an error reading the blob data.
1301  */
1302 int
1303 hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
1304                    struct blob_descriptor **blob_ret)
1305 {
1306         struct blob_descriptor **back_ptr;
1307         int ret;
1308
1309         back_ptr = retrieve_pointer_to_unhashed_blob(blob);
1310
1311         ret = sha1_blob(blob);
1312         if (ret)
1313                 return ret;
1314
1315         *blob_ret = after_blob_hashed(blob, back_ptr, blob_table);
1316         return 0;
1317 }
1318
1319 void
1320 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
1321                               struct wimlib_resource_entry *wentry)
1322 {
1323         memset(wentry, 0, sizeof(*wentry));
1324
1325         wentry->uncompressed_size = blob->size;
1326         if (blob->blob_location == BLOB_IN_WIM) {
1327                 unsigned res_flags = blob->rdesc->flags;
1328
1329                 wentry->part_number = blob->rdesc->wim->hdr.part_number;
1330                 if (res_flags & WIM_RESHDR_FLAG_SOLID) {
1331                         wentry->offset = blob->offset_in_res;
1332                 } else {
1333                         wentry->compressed_size = blob->rdesc->size_in_wim;
1334                         wentry->offset = blob->rdesc->offset_in_wim;
1335                 }
1336                 wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim;
1337                 wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim;
1338                 wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size;
1339
1340                 wentry->is_compressed = (res_flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
1341                 wentry->is_free = (res_flags & WIM_RESHDR_FLAG_FREE) != 0;
1342                 wentry->is_spanned = (res_flags & WIM_RESHDR_FLAG_SPANNED) != 0;
1343                 wentry->packed = (res_flags & WIM_RESHDR_FLAG_SOLID) != 0;
1344         }
1345         if (!blob->unhashed)
1346                 copy_hash(wentry->sha1_hash, blob->hash);
1347         wentry->reference_count = blob->refcnt;
1348         wentry->is_metadata = blob->is_metadata;
1349 }
1350
1351 struct iterate_blob_context {
1352         wimlib_iterate_lookup_table_callback_t cb;
1353         void *user_ctx;
1354 };
1355
1356 static int
1357 do_iterate_blob(struct blob_descriptor *blob, void *_ctx)
1358 {
1359         struct iterate_blob_context *ctx = _ctx;
1360         struct wimlib_resource_entry entry;
1361
1362         blob_to_wimlib_resource_entry(blob, &entry);
1363         return (*ctx->cb)(&entry, ctx->user_ctx);
1364 }
1365
1366 /* API function documented in wimlib.h  */
1367 WIMLIBAPI int
1368 wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
1369                             wimlib_iterate_lookup_table_callback_t cb,
1370                             void *user_ctx)
1371 {
1372         if (flags != 0)
1373                 return WIMLIB_ERR_INVALID_PARAM;
1374
1375         struct iterate_blob_context ctx = {
1376                 .cb = cb,
1377                 .user_ctx = user_ctx,
1378         };
1379         if (wim_has_metadata(wim)) {
1380                 int ret;
1381                 for (int i = 0; i < wim->hdr.image_count; i++) {
1382                         struct blob_descriptor *blob;
1383                         struct wim_image_metadata *imd = wim->image_metadata[i];
1384
1385                         ret = do_iterate_blob(imd->metadata_blob, &ctx);
1386                         if (ret)
1387                                 return ret;
1388                         image_for_each_unhashed_blob(blob, imd) {
1389                                 ret = do_iterate_blob(blob, &ctx);
1390                                 if (ret)
1391                                         return ret;
1392                         }
1393                 }
1394         }
1395         return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx);
1396 }