]> wimlib.net Git - wimlib/blob - src/blob_table.c
0ddd13ad09901413fc23e21893b4ba7f13cebac7
[wimlib] / src / blob_table.c
1 /*
2  * blob_table.c
3  *
4  * A blob table maps SHA-1 message digests to "blobs", which are nonempty
5  * sequences of binary data.  Within a WIM file, blobs are single-instanced.
6  *
7  * This file also contains code to read and write the corresponding on-disk
8  * representation of this table in the WIM file format.
9  */
10
11 /*
12  * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
13  *
14  * This file is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option) any
17  * later version.
18  *
19  * This file is distributed in the hope that it will be useful, but WITHOUT
20  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with this file; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h> /* for unlink()  */
35
36 #include "wimlib/assert.h"
37 #include "wimlib/blob_table.h"
38 #include "wimlib/encoding.h"
39 #include "wimlib/endianness.h"
40 #include "wimlib/error.h"
41 #include "wimlib/metadata.h"
42 #include "wimlib/ntfs_3g.h"
43 #include "wimlib/resource.h"
44 #include "wimlib/unaligned.h"
45 #include "wimlib/util.h"
46 #include "wimlib/write.h"
47
48 /* A hash table mapping SHA-1 message digests to blob descriptors  */
49 struct blob_table {
50         struct hlist_head *array;
51         size_t num_blobs;
52         size_t capacity;
53 };
54
55 struct blob_table *
56 new_blob_table(size_t capacity)
57 {
58         struct blob_table *table;
59         struct hlist_head *array;
60
61         table = MALLOC(sizeof(struct blob_table));
62         if (table == NULL)
63                 goto oom;
64
65         array = CALLOC(capacity, sizeof(array[0]));
66         if (array == NULL) {
67                 FREE(table);
68                 goto oom;
69         }
70
71         table->num_blobs = 0;
72         table->capacity = capacity;
73         table->array = array;
74         return table;
75
76 oom:
77         ERROR("Failed to allocate memory for blob table "
78               "with capacity %zu", capacity);
79         return NULL;
80 }
81
82 static int
83 do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore)
84 {
85         free_blob_descriptor(blob);
86         return 0;
87 }
88
89 void
90 free_blob_table(struct blob_table *table)
91 {
92         if (table) {
93                 for_blob_in_table(table, do_free_blob_descriptor, NULL);
94                 FREE(table->array);
95                 FREE(table);
96         }
97 }
98
99 struct blob_descriptor *
100 new_blob_descriptor(void)
101 {
102         BUILD_BUG_ON(BLOB_NONEXISTENT != 0);
103         return CALLOC(1, sizeof(struct blob_descriptor));
104 }
105
106 struct blob_descriptor *
107 clone_blob_descriptor(const struct blob_descriptor *old)
108 {
109         struct blob_descriptor *new;
110
111         new = memdup(old, sizeof(struct blob_descriptor));
112         if (new == NULL)
113                 return NULL;
114
115         switch (new->blob_location) {
116         case BLOB_IN_WIM:
117                 list_add(&new->rdesc_node, &new->rdesc->blob_list);
118                 break;
119
120         case BLOB_IN_FILE_ON_DISK:
121 #ifdef __WIN32__
122         case BLOB_IN_WINNT_FILE_ON_DISK:
123         case BLOB_WIN32_ENCRYPTED:
124 #endif
125 #ifdef WITH_FUSE
126         case BLOB_IN_STAGING_FILE:
127                 BUILD_BUG_ON((void*)&old->file_on_disk !=
128                              (void*)&old->staging_file_name);
129 #endif
130                 new->file_on_disk = TSTRDUP(old->file_on_disk);
131                 if (new->file_on_disk == NULL)
132                         goto out_free;
133                 break;
134         case BLOB_IN_ATTACHED_BUFFER:
135                 new->attached_buffer = memdup(old->attached_buffer, old->size);
136                 if (new->attached_buffer == NULL)
137                         goto out_free;
138                 break;
139 #ifdef WITH_NTFS_3G
140         case BLOB_IN_NTFS_VOLUME:
141                 if (old->ntfs_loc) {
142                         new->ntfs_loc = memdup(old->ntfs_loc,
143                                                sizeof(struct ntfs_location));
144                         if (new->ntfs_loc == NULL)
145                                 goto out_free;
146                         if (new->ntfs_loc->attr_name != NULL) {
147                                 new->ntfs_loc->attr_name =
148                                         utf16le_dup(new->ntfs_loc->attr_name);
149                                 if (new->ntfs_loc->attr_name == NULL)
150                                         goto out_free;
151                         }
152                 }
153                 break;
154 #endif
155         }
156         return new;
157
158 out_free:
159         free_blob_descriptor(new);
160         return NULL;
161 }
162
163 static void
164 blob_release_location(struct blob_descriptor *blob)
165 {
166         switch (blob->blob_location) {
167         case BLOB_IN_WIM:
168                 list_del(&blob->rdesc_node);
169                 if (list_empty(&blob->rdesc->blob_list))
170                         FREE(blob->rdesc);
171                 break;
172         case BLOB_IN_FILE_ON_DISK:
173 #ifdef __WIN32__
174         case BLOB_IN_WINNT_FILE_ON_DISK:
175         case BLOB_WIN32_ENCRYPTED:
176 #endif
177 #ifdef WITH_FUSE
178         case BLOB_IN_STAGING_FILE:
179                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
180                              (void*)&blob->staging_file_name);
181 #endif
182         case BLOB_IN_ATTACHED_BUFFER:
183                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
184                              (void*)&blob->attached_buffer);
185                 FREE(blob->file_on_disk);
186                 break;
187 #ifdef WITH_NTFS_3G
188         case BLOB_IN_NTFS_VOLUME:
189                 if (blob->ntfs_loc) {
190                         FREE(blob->ntfs_loc->attr_name);
191                         FREE(blob->ntfs_loc);
192                 }
193                 break;
194 #endif
195         default:
196                 break;
197         }
198 }
199
200 void
201 free_blob_descriptor(struct blob_descriptor *blob)
202 {
203         if (blob) {
204                 blob_release_location(blob);
205                 FREE(blob);
206         }
207 }
208
209 /* Should this blob be retained even if it has no references?  */
210 static bool
211 should_retain_blob(const struct blob_descriptor *blob)
212 {
213         return blob->blob_location == BLOB_IN_WIM;
214 }
215
216 static void
217 finalize_blob(struct blob_descriptor *blob)
218 {
219         if (!should_retain_blob(blob))
220                 free_blob_descriptor(blob);
221 }
222
223 /*
224  * Decrements the reference count of the specified blob, which must be either
225  * (a) unhashed, or (b) inserted in the specified blob table.
226  *
227  * If the blob's reference count reaches 0, we may unlink it from @table and
228  * free it.  However, we retain blobs with 0 reference count that originated
229  * from WIM files (BLOB_IN_WIM).  We do this for two reasons:
230  *
231  * 1. This prevents information about valid blobs in a WIM file --- blobs which
232  *    will continue to be present after appending to the WIM file --- from being
233  *    lost merely because we dropped all references to them.
234  *
235  * 2. Blob reference counts we read from WIM files can't be trusted.  It's
236  *    possible that a WIM has reference counts that are too low; WIMGAPI
237  *    sometimes creates WIMs where this is the case.  It's also possible that
238  *    blobs have been referenced from an external WIM; those blobs can
239  *    potentially have any reference count at all, either lower or higher than
240  *    would be expected for this WIM ("this WIM" meaning the owner of @table) if
241  *    it were a standalone WIM.
242  *
243  * So we can't take the reference counts too seriously.  But at least, we do
244  * recalculate by default when writing a new WIM file.
245  */
246 void
247 blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table)
248 {
249         blob_subtract_refcnt(blob, table, 1);
250 }
251
252 void
253 blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table,
254                      u32 count)
255 {
256         if (unlikely(blob->refcnt < count)) {
257                 blob->refcnt = 0; /* See comment above  */
258                 return;
259         }
260
261         blob->refcnt -= count;
262
263         if (blob->refcnt != 0)
264                 return;
265
266         if (blob->unhashed) {
267                 list_del(&blob->unhashed_list);
268         #ifdef WITH_FUSE
269                 /* If the blob has been extracted to a staging file for a FUSE
270                  * mount, unlink the staging file.  (Note that there still may
271                  * be open file descriptors to it.)  */
272                 if (blob->blob_location == BLOB_IN_STAGING_FILE)
273                         unlinkat(blob->staging_dir_fd,
274                                  blob->staging_file_name, 0);
275         #endif
276         } else {
277                 if (!should_retain_blob(blob))
278                         blob_table_unlink(table, blob);
279         }
280
281         /* If FUSE mounts are enabled, then don't actually free the blob
282          * descriptor until the last file descriptor to it has been closed.  */
283 #ifdef WITH_FUSE
284         if (blob->num_opened_fds == 0)
285 #endif
286                 finalize_blob(blob);
287 }
288
289 #ifdef WITH_FUSE
290 void
291 blob_decrement_num_opened_fds(struct blob_descriptor *blob)
292 {
293         wimlib_assert(blob->num_opened_fds != 0);
294
295         if (--blob->num_opened_fds == 0 && blob->refcnt == 0)
296                 finalize_blob(blob);
297 }
298 #endif
299
300 static void
301 blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob)
302 {
303         size_t i = blob->hash_short % table->capacity;
304
305         hlist_add_head(&blob->hash_list, &table->array[i]);
306 }
307
308 static void
309 enlarge_blob_table(struct blob_table *table)
310 {
311         size_t old_capacity, new_capacity;
312         struct hlist_head *old_array, *new_array;
313         struct blob_descriptor *blob;
314         struct hlist_node *tmp;
315         size_t i;
316
317         old_capacity = table->capacity;
318         new_capacity = old_capacity * 2;
319         new_array = CALLOC(new_capacity, sizeof(struct hlist_head));
320         if (new_array == NULL)
321                 return;
322         old_array = table->array;
323         table->array = new_array;
324         table->capacity = new_capacity;
325
326         for (i = 0; i < old_capacity; i++) {
327                 hlist_for_each_entry_safe(blob, tmp, &old_array[i], hash_list) {
328                         hlist_del(&blob->hash_list);
329                         blob_table_insert_raw(table, blob);
330                 }
331         }
332         FREE(old_array);
333 }
334
335 /* Insert a blob descriptor into the blob table.  */
336 void
337 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob)
338 {
339         blob_table_insert_raw(table, blob);
340         if (++table->num_blobs > table->capacity)
341                 enlarge_blob_table(table);
342 }
343
344 /* Unlinks a blob descriptor from the blob table; does not free it.  */
345 void
346 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob)
347 {
348         wimlib_assert(!blob->unhashed);
349         wimlib_assert(table->num_blobs != 0);
350
351         hlist_del(&blob->hash_list);
352         table->num_blobs--;
353 }
354
355 /* Given a SHA-1 message digest, return the corresponding blob descriptor from
356  * the specified blob table, or NULL if there is none.  */
357 struct blob_descriptor *
358 lookup_blob(const struct blob_table *table, const u8 *hash)
359 {
360         size_t i;
361         struct blob_descriptor *blob;
362
363         i = load_size_t_unaligned(hash) % table->capacity;
364         hlist_for_each_entry(blob, &table->array[i], hash_list)
365                 if (hashes_equal(hash, blob->hash))
366                         return blob;
367         return NULL;
368 }
369
370 /* Call a function on all blob descriptors in the specified blob table.  Stop
371  * early and return nonzero if any call to the function returns nonzero.  */
372 int
373 for_blob_in_table(struct blob_table *table,
374                   int (*visitor)(struct blob_descriptor *, void *), void *arg)
375 {
376         struct blob_descriptor *blob;
377         struct hlist_node *tmp;
378         int ret;
379
380         for (size_t i = 0; i < table->capacity; i++) {
381                 hlist_for_each_entry_safe(blob, tmp, &table->array[i],
382                                           hash_list)
383                 {
384                         ret = visitor(blob, arg);
385                         if (ret)
386                                 return ret;
387                 }
388         }
389         return 0;
390 }
391
392 /*
393  * This is a qsort() callback that sorts blobs into an order optimized for
394  * reading.  Sorting is done primarily by blob location, then secondarily by a
395  * location-dependent order.  For example, blobs in WIM resources are sorted
396  * such that the underlying WIM files will be read sequentially.  This is
397  * especially important for WIM files containing solid resources.
398  */
399 int
400 cmp_blobs_by_sequential_order(const void *p1, const void *p2)
401 {
402         const struct blob_descriptor *blob1, *blob2;
403         int v;
404         WIMStruct *wim1, *wim2;
405
406         blob1 = *(const struct blob_descriptor**)p1;
407         blob2 = *(const struct blob_descriptor**)p2;
408
409         v = (int)blob1->blob_location - (int)blob2->blob_location;
410
411         /* Different resource locations?  */
412         if (v)
413                 return v;
414
415         switch (blob1->blob_location) {
416         case BLOB_IN_WIM:
417                 wim1 = blob1->rdesc->wim;
418                 wim2 = blob2->rdesc->wim;
419
420                 /* Different (possibly split) WIMs?  */
421                 if (wim1 != wim2) {
422                         v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN);
423                         if (v)
424                                 return v;
425                 }
426
427                 /* Different part numbers in the same WIM?  */
428                 v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number;
429                 if (v)
430                         return v;
431
432                 if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim)
433                         return cmp_u64(blob1->rdesc->offset_in_wim,
434                                        blob2->rdesc->offset_in_wim);
435
436                 return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
437
438         case BLOB_IN_FILE_ON_DISK:
439 #ifdef WITH_FUSE
440         case BLOB_IN_STAGING_FILE:
441 #endif
442 #ifdef __WIN32__
443         case BLOB_IN_WINNT_FILE_ON_DISK:
444         case BLOB_WIN32_ENCRYPTED:
445 #endif
446                 /* Compare files by path: just a heuristic that will place files
447                  * in the same directory next to each other.  */
448                 return tstrcmp(blob1->file_on_disk, blob2->file_on_disk);
449 #ifdef WITH_NTFS_3G
450         case BLOB_IN_NTFS_VOLUME:
451                 return cmp_u64(blob1->ntfs_loc->sort_key, blob2->ntfs_loc->sort_key);
452 #endif
453         default:
454                 /* No additional sorting order defined for this resource
455                  * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare
456                  * everything equal to each other.  */
457                 return 0;
458         }
459 }
460
461 int
462 sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
463                int (*compar)(const void *, const void*))
464 {
465         struct list_head *cur;
466         struct blob_descriptor **array;
467         size_t i;
468         size_t array_size;
469         size_t num_blobs = 0;
470
471         list_for_each(cur, blob_list)
472                 num_blobs++;
473
474         if (num_blobs <= 1)
475                 return 0;
476
477         array_size = num_blobs * sizeof(array[0]);
478         array = MALLOC(array_size);
479         if (array == NULL)
480                 return WIMLIB_ERR_NOMEM;
481
482         cur = blob_list->next;
483         for (i = 0; i < num_blobs; i++) {
484                 array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset);
485                 cur = cur->next;
486         }
487
488         qsort(array, num_blobs, sizeof(array[0]), compar);
489
490         INIT_LIST_HEAD(blob_list);
491         for (i = 0; i < num_blobs; i++) {
492                 list_add_tail((struct list_head*)
493                                ((u8*)array[i] + list_head_offset), blob_list);
494         }
495         FREE(array);
496         return 0;
497 }
498
499 /* Sort the specified list of blobs in an order optimized for sequential
500  * reading.  */
501 int
502 sort_blob_list_by_sequential_order(struct list_head *blob_list,
503                                    size_t list_head_offset)
504 {
505         return sort_blob_list(blob_list, list_head_offset,
506                               cmp_blobs_by_sequential_order);
507 }
508
509 static int
510 add_blob_to_array(struct blob_descriptor *blob, void *_pp)
511 {
512         struct blob_descriptor ***pp = _pp;
513         *(*pp)++ = blob;
514         return 0;
515 }
516
517 /* Iterate through the blob descriptors in the specified blob table in an order
518  * optimized for sequential reading.  */
519 int
520 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
521                                              int (*visitor)(struct blob_descriptor *, void *),
522                                              void *arg)
523 {
524         struct blob_descriptor **blob_array, **p;
525         size_t num_blobs = table->num_blobs;
526         int ret;
527
528         blob_array = MALLOC(num_blobs * sizeof(blob_array[0]));
529         if (!blob_array)
530                 return WIMLIB_ERR_NOMEM;
531         p = blob_array;
532         for_blob_in_table(table, add_blob_to_array, &p);
533
534         wimlib_assert(p == blob_array + num_blobs);
535
536         qsort(blob_array, num_blobs, sizeof(blob_array[0]),
537               cmp_blobs_by_sequential_order);
538         ret = 0;
539         for (size_t i = 0; i < num_blobs; i++) {
540                 ret = visitor(blob_array[i], arg);
541                 if (ret)
542                         break;
543         }
544         FREE(blob_array);
545         return ret;
546 }
547
548 /* On-disk format of a blob descriptor in a WIM file.
549  *
550  * Note: if the WIM file contains solid resource(s), then this structure is
551  * sometimes overloaded to describe a "resource" rather than a "blob".  See the
552  * code for details.  */
553 struct blob_descriptor_disk {
554
555         /* Size, offset, and flags of the blob.  */
556         struct wim_reshdr_disk reshdr;
557
558         /* Which part of the split WIM this blob is in; indexed from 1. */
559         le16 part_number;
560
561         /* Reference count of this blob over all WIM images.  (But see comment
562          * above blob_decrement_refcnt().)  */
563         le32 refcnt;
564
565         /* SHA-1 message digest of the uncompressed data of this blob, or all
566          * zeroes if this blob is of zero length.  */
567         u8 hash[SHA1_HASH_SIZE];
568 } _packed_attribute;
569
570 /* Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
571  * count how many specify resources (as opposed to blobs within those
572  * resources).
573  *
574  * Returns the resulting count.  */
575 static size_t
576 count_solid_resources(const struct blob_descriptor_disk *entries, size_t max)
577 {
578         size_t count = 0;
579         do {
580                 struct wim_reshdr reshdr;
581
582                 get_wim_reshdr(&(entries++)->reshdr, &reshdr);
583
584                 if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) {
585                         /* Run was terminated by a stand-alone blob entry.  */
586                         break;
587                 }
588
589                 if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
590                         /* This is a resource entry.  */
591                         count++;
592                 }
593         } while (--max);
594         return count;
595 }
596
597 /*
598  * Given a run of consecutive blob descriptors with the SOLID flag set and
599  * having @num_rdescs resource entries, load resource information from them into
600  * the resource descriptors in the @rdescs array.
601  *
602  * Returns 0 on success, or a nonzero error code on failure.
603  */
604 static int
605 do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs,
606                    size_t num_rdescs,
607                    const struct blob_descriptor_disk *entries)
608 {
609         for (size_t i = 0; i < num_rdescs; i++) {
610                 struct wim_reshdr reshdr;
611                 struct alt_chunk_table_header_disk hdr;
612                 struct wim_resource_descriptor *rdesc;
613                 int ret;
614
615                 /* Advance to next resource entry.  */
616
617                 do {
618                         get_wim_reshdr(&(entries++)->reshdr, &reshdr);
619                 } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER);
620
621                 rdesc = rdescs[i];
622
623                 wim_res_hdr_to_desc(&reshdr, wim, rdesc);
624
625                 /* For solid resources, the uncompressed size, compression type,
626                  * and chunk size are stored in the resource itself, not in the
627                  * blob table.  */
628
629                 ret = full_pread(&wim->in_fd, &hdr,
630                                  sizeof(hdr), reshdr.offset_in_wim);
631                 if (ret) {
632                         ERROR("Failed to read header of solid resource "
633                               "(offset_in_wim=%"PRIu64")",
634                               reshdr.offset_in_wim);
635                         return ret;
636                 }
637
638                 rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize);
639
640                 /* Compression format numbers must be the same as in
641                  * WIMGAPI to be compatible here.  */
642                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
643                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
644                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
645                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
646                 rdesc->compression_type = le32_to_cpu(hdr.compression_format);
647
648                 rdesc->chunk_size = le32_to_cpu(hdr.chunk_size);
649
650                 DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" "
651                       "(%"TS"/%"PRIu32") @ +%"PRIu64"",
652                       i + 1, num_rdescs,
653                       rdesc->uncompressed_size,
654                       rdesc->size_in_wim,
655                       wimlib_get_compression_type_string(rdesc->compression_type),
656                       rdesc->chunk_size,
657                       rdesc->offset_in_wim);
658         }
659         return 0;
660 }
661
662 /*
663  * Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
664  * allocate a 'struct wim_resource_descriptor' for each resource within that
665  * run.
666  *
667  * Returns 0 on success, or a nonzero error code on failure.
668  * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret.
669  */
670 static int
671 load_solid_info(WIMStruct *wim,
672                 const struct blob_descriptor_disk *entries,
673                 size_t num_remaining_entries,
674                 struct wim_resource_descriptor ***rdescs_ret,
675                 size_t *num_rdescs_ret)
676 {
677         size_t num_rdescs;
678         struct wim_resource_descriptor **rdescs;
679         size_t i;
680         int ret;
681
682         num_rdescs = count_solid_resources(entries, num_remaining_entries);
683         rdescs = CALLOC(num_rdescs, sizeof(rdescs[0]));
684         if (!rdescs)
685                 return WIMLIB_ERR_NOMEM;
686
687         for (i = 0; i < num_rdescs; i++) {
688                 rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor));
689                 if (!rdescs[i]) {
690                         ret = WIMLIB_ERR_NOMEM;
691                         goto out_free_rdescs;
692                 }
693         }
694
695         ret = do_load_solid_info(wim, rdescs, num_rdescs, entries);
696         if (ret)
697                 goto out_free_rdescs;
698
699         *rdescs_ret = rdescs;
700         *num_rdescs_ret = num_rdescs;
701         return 0;
702
703 out_free_rdescs:
704         for (i = 0; i < num_rdescs; i++)
705                 FREE(rdescs[i]);
706         FREE(rdescs);
707         return ret;
708 }
709
710 /* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor
711  * with the SOLID flag set, try to assign it to resource in the current solid
712  * run.  */
713 static int
714 assign_blob_to_solid_resource(const struct wim_reshdr *reshdr,
715                               struct blob_descriptor *blob,
716                               struct wim_resource_descriptor **rdescs,
717                               size_t num_rdescs)
718 {
719         u64 offset = reshdr->offset_in_wim;
720
721         /* XXX: This linear search will be slow in the degenerate case where the
722          * number of solid resources in the run is huge.  */
723         blob->size = reshdr->size_in_wim;
724         for (size_t i = 0; i < num_rdescs; i++) {
725                 if (offset + blob->size <= rdescs[i]->uncompressed_size) {
726                         blob_set_is_located_in_wim_resource(blob, rdescs[i], offset);
727                         return 0;
728                 }
729                 offset -= rdescs[i]->uncompressed_size;
730         }
731         ERROR("blob could not be assigned to a solid resource");
732         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
733 }
734
735 static void
736 free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
737 {
738         if (rdescs) {
739                 for (size_t i = 0; i < num_rdescs; i++)
740                         if (list_empty(&rdescs[i]->blob_list))
741                                 FREE(rdescs[i]);
742                 FREE(rdescs);
743         }
744 }
745
746 static int
747 cmp_blobs_by_offset_in_res(const void *p1, const void *p2)
748 {
749         const struct blob_descriptor *blob1, *blob2;
750
751         blob1 = *(const struct blob_descriptor**)p1;
752         blob2 = *(const struct blob_descriptor**)p2;
753
754         return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
755 }
756
757 /* Validate the size and location of a WIM resource.  */
758 static int
759 validate_resource(struct wim_resource_descriptor *rdesc)
760 {
761         struct blob_descriptor *blob;
762         bool out_of_order;
763         u64 expected_next_offset;
764         int ret;
765
766         /* Verify that the resource itself has a valid offset and size.  */
767         if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim)
768                 goto invalid_due_to_overflow;
769
770         /* Verify that each blob in the resource has a valid offset and size.
771          */
772         expected_next_offset = 0;
773         out_of_order = false;
774         list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
775                 if (blob->offset_in_res + blob->size < blob->size ||
776                     blob->offset_in_res + blob->size > rdesc->uncompressed_size)
777                         goto invalid_due_to_overflow;
778
779                 if (blob->offset_in_res >= expected_next_offset)
780                         expected_next_offset = blob->offset_in_res + blob->size;
781                 else
782                         out_of_order = true;
783         }
784
785         /* If the blobs were not located at strictly increasing positions (not
786          * allowing for overlap), sort them.  Then make sure that none overlap.
787          */
788         if (out_of_order) {
789                 ret = sort_blob_list(&rdesc->blob_list,
790                                      offsetof(struct blob_descriptor,
791                                               rdesc_node),
792                                      cmp_blobs_by_offset_in_res);
793                 if (ret)
794                         return ret;
795
796                 expected_next_offset = 0;
797                 list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
798                         if (blob->offset_in_res >= expected_next_offset)
799                                 expected_next_offset = blob->offset_in_res + blob->size;
800                         else
801                                 goto invalid_due_to_overlap;
802                 }
803         }
804
805         return 0;
806
807 invalid_due_to_overflow:
808         ERROR("Invalid blob table (offset overflow)");
809         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
810
811 invalid_due_to_overlap:
812         ERROR("Invalid blob table (blobs in solid resource overlap)");
813         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
814 }
815
816 static int
817 finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
818 {
819         int ret = 0;
820         for (size_t i = 0; i < num_rdescs; i++) {
821                 ret = validate_resource(rdescs[i]);
822                 if (ret)
823                         break;
824         }
825         free_solid_rdescs(rdescs, num_rdescs);
826         return ret;
827 }
828
829 /*
830  * read_blob_table() -
831  *
832  * Read the blob table from a WIM file.  Usually, each entry in this table
833  * describes a "blob", or equivalently a "resource", that the WIM file contains,
834  * along with its location and SHA-1 message digest.  Descriptors for
835  * non-metadata blobs will be saved in the in-memory blob table
836  * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a
837  * special location per-image (the wim->image_metadata array).
838  *
839  * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple
840  * blobs that are compressed together.  Such a resource is called a "solid
841  * resource".  Solid resources are still described in the on-disk "blob table",
842  * although the format is not the most logical.  A consecutive sequence of
843  * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run".
844  * A solid run describes a set of solid resources, each of which contains a set
845  * of blobs.  In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size
846  * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource,
847  * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid
848  * resource.  There are some oddities in how we need to determine which solid
849  * resource a blob is actually in; see the code for details.
850  *
851  * Possible return values:
852  *      WIMLIB_ERR_SUCCESS (0)
853  *      WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
854  *      WIMLIB_ERR_NOMEM
855  *
856  *      Or an error code caused by failure to read the blob table from the WIM
857  *      file.
858  */
859 int
860 read_blob_table(WIMStruct *wim)
861 {
862         int ret;
863         size_t num_entries;
864         void *buf = NULL;
865         struct blob_table *table = NULL;
866         struct blob_descriptor *cur_blob = NULL;
867         size_t num_duplicate_blobs = 0;
868         size_t num_wrong_part_blobs = 0;
869         u32 image_index = 0;
870         struct wim_resource_descriptor **cur_solid_rdescs = NULL;
871         size_t cur_num_solid_rdescs = 0;
872
873         DEBUG("Reading blob table.");
874
875         /* Calculate the number of entries in the blob table.  */
876         num_entries = wim->hdr.blob_table_reshdr.uncompressed_size /
877                       sizeof(struct blob_descriptor_disk);
878
879         /* Read the blob table into a buffer.  */
880         ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf);
881         if (ret)
882                 goto out;
883
884         /* Allocate a hash table to map SHA-1 message digests into blob
885          * descriptors.  This is the in-memory "blob table".  */
886         table = new_blob_table(num_entries * 2 + 1);
887         if (!table)
888                 goto oom;
889
890         /* Allocate and initalize blob descriptors from the raw blob table
891          * buffer.  */
892         for (size_t i = 0; i < num_entries; i++) {
893                 const struct blob_descriptor_disk *disk_entry =
894                         &((const struct blob_descriptor_disk*)buf)[i];
895                 struct wim_reshdr reshdr;
896                 u16 part_number;
897
898                 /* Get the resource header  */
899                 get_wim_reshdr(&disk_entry->reshdr, &reshdr);
900
901                 DEBUG("reshdr: size_in_wim=%"PRIu64", "
902                       "uncompressed_size=%"PRIu64", "
903                       "offset_in_wim=%"PRIu64", "
904                       "flags=0x%02x",
905                       reshdr.size_in_wim, reshdr.uncompressed_size,
906                       reshdr.offset_in_wim, reshdr.flags);
907
908                 /* Ignore SOLID flag if it isn't supposed to be used in this WIM
909                  * version.  */
910                 if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
911                         reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID;
912
913                 /* Allocate a new 'struct blob_descriptor'.  */
914                 cur_blob = new_blob_descriptor();
915                 if (!cur_blob)
916                         goto oom;
917
918                 /* Get the part number, reference count, and hash.  */
919                 part_number = le16_to_cpu(disk_entry->part_number);
920                 cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt);
921                 copy_hash(cur_blob->hash, disk_entry->hash);
922
923                 if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
924
925                         /* SOLID entry  */
926
927                         if (!cur_solid_rdescs) {
928                                 /* Starting new run  */
929                                 ret = load_solid_info(wim, disk_entry,
930                                                       num_entries - i,
931                                                       &cur_solid_rdescs,
932                                                       &cur_num_solid_rdescs);
933                                 if (ret)
934                                         goto out;
935                         }
936
937                         if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
938                                 /* Resource entry, not blob entry  */
939                                 goto free_cur_blob_and_continue;
940                         }
941
942                         /* Blob entry  */
943
944                         ret = assign_blob_to_solid_resource(&reshdr,
945                                                             cur_blob,
946                                                             cur_solid_rdescs,
947                                                             cur_num_solid_rdescs);
948                         if (ret)
949                                 goto out;
950
951                 } else {
952                         /* Normal blob/resource entry; SOLID not set.  */
953
954                         struct wim_resource_descriptor *rdesc;
955
956                         if (unlikely(cur_solid_rdescs)) {
957                                 /* This entry terminated a solid run.  */
958                                 ret = finish_solid_rdescs(cur_solid_rdescs,
959                                                           cur_num_solid_rdescs);
960                                 cur_solid_rdescs = NULL;
961                                 if (ret)
962                                         goto out;
963                         }
964
965                         /* How to handle an uncompressed resource with its
966                          * uncompressed size different from its compressed size?
967                          *
968                          * Based on a simple test, WIMGAPI seems to handle this
969                          * as follows:
970                          *
971                          * if (size_in_wim > uncompressed_size) {
972                          *      Ignore uncompressed_size; use size_in_wim
973                          *      instead.
974                          * } else {
975                          *      Honor uncompressed_size, but treat the part of
976                          *      the file data above size_in_wim as all zeros.
977                          * }
978                          *
979                          * So we will do the same.  */
980                         if (unlikely(!(reshdr.flags &
981                                        WIM_RESHDR_FLAG_COMPRESSED) &&
982                                      (reshdr.size_in_wim >
983                                       reshdr.uncompressed_size)))
984                         {
985                                 reshdr.uncompressed_size = reshdr.size_in_wim;
986                         }
987
988                         /* Set up a resource descriptor for this blob.  */
989
990                         rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
991                         if (!rdesc)
992                                 goto oom;
993
994                         wim_res_hdr_to_desc(&reshdr, wim, rdesc);
995
996                         blob_set_is_located_in_nonsolid_wim_resource(cur_blob, rdesc);
997                 }
998
999                 /* cur_blob is now a blob bound to a resource.  */
1000
1001                 /* Ignore entries with all zeroes in the hash field.  */
1002                 if (is_zero_hash(cur_blob->hash))
1003                         goto free_cur_blob_and_continue;
1004
1005                 /* Verify that the part number matches that of the underlying
1006                  * WIM file.  */
1007                 if (part_number != wim->hdr.part_number) {
1008                         num_wrong_part_blobs++;
1009                         goto free_cur_blob_and_continue;
1010                 }
1011
1012                 if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
1013
1014                         cur_blob->is_metadata = 1;
1015
1016                         /* Blob table entry for a metadata resource.  */
1017
1018                         /* Metadata entries with no references must be ignored.
1019                          * See, for example, the WinPE WIMs from the WAIK v2.1.
1020                          */
1021                         if (cur_blob->refcnt == 0)
1022                                 goto free_cur_blob_and_continue;
1023
1024                         if (cur_blob->refcnt != 1) {
1025                                 /* We don't currently support this case due to
1026                                  * the complications of multiple images sharing
1027                                  * the same metadata resource or a metadata
1028                                  * resource also being referenced by files.  */
1029                                 ERROR("Found metadata resource with refcnt != 1");
1030                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1031                                 goto out;
1032                         }
1033
1034                         if (wim->hdr.part_number != 1) {
1035                                 WARNING("Ignoring metadata resource found in a "
1036                                         "non-first part of the split WIM");
1037                                 goto free_cur_blob_and_continue;
1038                         }
1039
1040                         /* The number of entries in the blob table with
1041                          * WIM_RESHDR_FLAG_METADATA set should be the same as
1042                          * the image_count field in the WIM header.  */
1043                         if (image_index == wim->hdr.image_count) {
1044                                 WARNING("Found more metadata resources than images");
1045                                 goto free_cur_blob_and_continue;
1046                         }
1047
1048                         /* Notice very carefully:  We are assigning the metadata
1049                          * resources to images in the same order in which their
1050                          * blob table entries occur on disk.  (This is also the
1051                          * behavior of Microsoft's software.)  In particular,
1052                          * this overrides the actual locations of the metadata
1053                          * resources themselves in the WIM file as well as any
1054                          * information written in the XML data.  */
1055                         DEBUG("Found metadata resource for image %"PRIu32" at "
1056                               "offset %"PRIu64".",
1057                               image_index + 1,
1058                               reshdr.offset_in_wim);
1059
1060                         wim->image_metadata[image_index++]->metadata_blob = cur_blob;
1061                 } else {
1062                         /* Blob table entry for a non-metadata blob.  */
1063
1064                         /* Ignore this blob if it's a duplicate.  */
1065                         if (lookup_blob(table, cur_blob->hash)) {
1066                                 num_duplicate_blobs++;
1067                                 goto free_cur_blob_and_continue;
1068                         }
1069
1070                         /* Insert the blob into the in-memory blob table, keyed
1071                          * by its SHA-1 message digest.  */
1072                         blob_table_insert(table, cur_blob);
1073                 }
1074
1075                 continue;
1076
1077         free_cur_blob_and_continue:
1078                 if (cur_solid_rdescs &&
1079                     cur_blob->blob_location == BLOB_IN_WIM)
1080                         blob_unset_is_located_in_wim_resource(cur_blob);
1081                 free_blob_descriptor(cur_blob);
1082         }
1083         cur_blob = NULL;
1084
1085         if (cur_solid_rdescs) {
1086                 /* End of blob table terminated a solid run.  */
1087                 ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1088                 cur_solid_rdescs = NULL;
1089                 if (ret)
1090                         goto out;
1091         }
1092
1093         if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) {
1094                 WARNING("Could not find metadata resources for all images");
1095                 for (u32 i = image_index; i < wim->hdr.image_count; i++)
1096                         put_image_metadata(wim->image_metadata[i], NULL);
1097                 wim->hdr.image_count = image_index;
1098         }
1099
1100         if (num_duplicate_blobs > 0)
1101                 WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs);
1102
1103         if (num_wrong_part_blobs > 0) {
1104                 WARNING("Ignoring %zu blobs with wrong part number",
1105                         num_wrong_part_blobs);
1106         }
1107
1108         DEBUG("Done reading blob table.");
1109         wim->blob_table = table;
1110         ret = 0;
1111         goto out_free_buf;
1112
1113 oom:
1114         ERROR("Not enough memory to read blob table!");
1115         ret = WIMLIB_ERR_NOMEM;
1116 out:
1117         free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1118         free_blob_descriptor(cur_blob);
1119         free_blob_table(table);
1120 out_free_buf:
1121         FREE(buf);
1122         return ret;
1123 }
1124
1125 static void
1126 write_blob_descriptor(struct blob_descriptor_disk *disk_entry,
1127                       const struct wim_reshdr *out_reshdr,
1128                       u16 part_number, u32 refcnt, const u8 *hash)
1129 {
1130         put_wim_reshdr(out_reshdr, &disk_entry->reshdr);
1131         disk_entry->part_number = cpu_to_le16(part_number);
1132         disk_entry->refcnt = cpu_to_le32(refcnt);
1133         copy_hash(disk_entry->hash, hash);
1134 }
1135
1136 /* Note: the list of blob descriptors must be sorted so that all entries for the
1137  * same solid resource are consecutive.  In addition, blob descriptors for
1138  * metadata resources must be in the same order as the indices of the underlying
1139  * images.  */
1140 int
1141 write_blob_table_from_blob_list(struct list_head *blob_list,
1142                                 struct filedes *out_fd,
1143                                 u16 part_number,
1144                                 struct wim_reshdr *out_reshdr,
1145                                 int write_resource_flags)
1146 {
1147         size_t table_size;
1148         struct blob_descriptor *blob;
1149         struct blob_descriptor_disk *table_buf;
1150         struct blob_descriptor_disk *table_buf_ptr;
1151         int ret;
1152         u64 prev_res_offset_in_wim = ~0ULL;
1153         u64 prev_uncompressed_size;
1154         u64 logical_offset;
1155
1156         table_size = 0;
1157         list_for_each_entry(blob, blob_list, blob_table_list) {
1158                 table_size += sizeof(struct blob_descriptor_disk);
1159
1160                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID &&
1161                     blob->out_res_offset_in_wim != prev_res_offset_in_wim)
1162                 {
1163                         table_size += sizeof(struct blob_descriptor_disk);
1164                         prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1165                 }
1166         }
1167
1168         DEBUG("Writing WIM blob table (size=%zu, offset=%"PRIu64")",
1169               table_size, out_fd->offset);
1170
1171         table_buf = MALLOC(table_size);
1172         if (table_buf == NULL) {
1173                 ERROR("Failed to allocate %zu bytes for temporary blob table",
1174                       table_size);
1175                 return WIMLIB_ERR_NOMEM;
1176         }
1177         table_buf_ptr = table_buf;
1178
1179         prev_res_offset_in_wim = ~0ULL;
1180         prev_uncompressed_size = 0;
1181         logical_offset = 0;
1182         list_for_each_entry(blob, blob_list, blob_table_list) {
1183                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
1184                         struct wim_reshdr tmp_reshdr;
1185
1186                         /* Eww.  When WIMGAPI sees multiple solid resources, it
1187                          * expects the offsets to be adjusted as if there were
1188                          * really only one solid resource.  */
1189
1190                         if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) {
1191                                 /* Put the resource entry for solid resource  */
1192                                 tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim;
1193                                 tmp_reshdr.size_in_wim = blob->out_res_size_in_wim;
1194                                 tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER;
1195                                 tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID;
1196
1197                                 write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1198                                                       part_number, 1, zero_hash);
1199
1200                                 logical_offset += prev_uncompressed_size;
1201
1202                                 prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1203                                 prev_uncompressed_size = blob->out_res_uncompressed_size;
1204                         }
1205                         tmp_reshdr = blob->out_reshdr;
1206                         tmp_reshdr.offset_in_wim += logical_offset;
1207                         write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1208                                               part_number, blob->out_refcnt, blob->hash);
1209                 } else {
1210                         write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr,
1211                                               part_number, blob->out_refcnt, blob->hash);
1212                 }
1213
1214         }
1215         wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size);
1216
1217         /* Write the blob table uncompressed.  Although wimlib can handle a
1218          * compressed blob table, MS software cannot.  */
1219         ret = write_wim_resource_from_buffer(table_buf,
1220                                              table_size,
1221                                              true,
1222                                              out_fd,
1223                                              WIMLIB_COMPRESSION_TYPE_NONE,
1224                                              0,
1225                                              out_reshdr,
1226                                              NULL,
1227                                              write_resource_flags);
1228         FREE(table_buf);
1229         DEBUG("ret=%d", ret);
1230         return ret;
1231 }
1232
1233 /* Allocate a blob descriptor for the contents of the buffer, or re-use an
1234  * existing descriptor in @blob_table for an identical blob.  */
1235 struct blob_descriptor *
1236 new_blob_from_data_buffer(const void *buffer, size_t size,
1237                           struct blob_table *blob_table)
1238 {
1239         u8 hash[SHA1_HASH_SIZE];
1240         struct blob_descriptor *blob;
1241         void *buffer_copy;
1242
1243         sha1_buffer(buffer, size, hash);
1244
1245         blob = lookup_blob(blob_table, hash);
1246         if (blob)
1247                 return blob;
1248
1249         blob = new_blob_descriptor();
1250         if (!blob)
1251                 return NULL;
1252
1253         buffer_copy = memdup(buffer, size);
1254         if (!buffer_copy) {
1255                 free_blob_descriptor(blob);
1256                 return NULL;
1257         }
1258         blob_set_is_located_in_attached_buffer(blob, buffer_copy, size);
1259         copy_hash(blob->hash, hash);
1260         blob_table_insert(blob_table, blob);
1261         return blob;
1262 }
1263
1264 struct blob_descriptor *
1265 after_blob_hashed(struct blob_descriptor *blob,
1266                   struct blob_descriptor **back_ptr,
1267                   struct blob_table *blob_table)
1268 {
1269         struct blob_descriptor *duplicate_blob;
1270
1271         list_del(&blob->unhashed_list);
1272         blob->unhashed = 0;
1273
1274         /* Look for a duplicate blob  */
1275         duplicate_blob = lookup_blob(blob_table, blob->hash);
1276         if (duplicate_blob) {
1277                 /* We have a duplicate blob.  Transfer the reference counts from
1278                  * this blob to the duplicate and update the reference to this
1279                  * blob (from a stream) to point to the duplicate.  The caller
1280                  * is responsible for freeing @blob if needed.  */
1281                 wimlib_assert(duplicate_blob->size == blob->size);
1282                 duplicate_blob->refcnt += blob->refcnt;
1283                 blob->refcnt = 0;
1284                 *back_ptr = duplicate_blob;
1285                 return duplicate_blob;
1286         } else {
1287                 /* No duplicate blob, so we need to insert this blob into the
1288                  * blob table and treat it as a hashed blob.  */
1289                 blob_table_insert(blob_table, blob);
1290                 return blob;
1291         }
1292 }
1293
1294 /*
1295  * Calculate the SHA-1 message digest of a blob and move its descriptor from the
1296  * list of unhashed blobs to the blob table, possibly joining it with an
1297  * identical blob.
1298  *
1299  * @blob:
1300  *      The blob to hash
1301  * @blob_table:
1302  *      The blob table in which the blob needs to be indexed
1303  * @blob_ret:
1304  *      On success, a pointer to the resulting blob descriptor is written to
1305  *      this location.  This will be the same as @blob if it was inserted into
1306  *      the blob table, or different if a duplicate blob was found.
1307  *
1308  * Returns 0 on success; nonzero if there is an error reading the blob data.
1309  */
1310 int
1311 hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
1312                    struct blob_descriptor **blob_ret)
1313 {
1314         struct blob_descriptor **back_ptr;
1315         int ret;
1316
1317         back_ptr = retrieve_pointer_to_unhashed_blob(blob);
1318
1319         ret = sha1_blob(blob);
1320         if (ret)
1321                 return ret;
1322
1323         *blob_ret = after_blob_hashed(blob, back_ptr, blob_table);
1324         return 0;
1325 }
1326
1327 void
1328 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
1329                               struct wimlib_resource_entry *wentry)
1330 {
1331         memset(wentry, 0, sizeof(*wentry));
1332
1333         wentry->uncompressed_size = blob->size;
1334         if (blob->blob_location == BLOB_IN_WIM) {
1335                 unsigned res_flags = blob->rdesc->flags;
1336
1337                 wentry->part_number = blob->rdesc->wim->hdr.part_number;
1338                 if (res_flags & WIM_RESHDR_FLAG_SOLID) {
1339                         wentry->offset = blob->offset_in_res;
1340                 } else {
1341                         wentry->compressed_size = blob->rdesc->size_in_wim;
1342                         wentry->offset = blob->rdesc->offset_in_wim;
1343                 }
1344                 wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim;
1345                 wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim;
1346                 wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size;
1347
1348                 wentry->is_compressed = (res_flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
1349                 wentry->is_free = (res_flags & WIM_RESHDR_FLAG_FREE) != 0;
1350                 wentry->is_spanned = (res_flags & WIM_RESHDR_FLAG_SPANNED) != 0;
1351                 wentry->packed = (res_flags & WIM_RESHDR_FLAG_SOLID) != 0;
1352         }
1353         if (!blob->unhashed)
1354                 copy_hash(wentry->sha1_hash, blob->hash);
1355         wentry->reference_count = blob->refcnt;
1356         wentry->is_metadata = blob->is_metadata;
1357 }
1358
1359 struct iterate_blob_context {
1360         wimlib_iterate_lookup_table_callback_t cb;
1361         void *user_ctx;
1362 };
1363
1364 static int
1365 do_iterate_blob(struct blob_descriptor *blob, void *_ctx)
1366 {
1367         struct iterate_blob_context *ctx = _ctx;
1368         struct wimlib_resource_entry entry;
1369
1370         blob_to_wimlib_resource_entry(blob, &entry);
1371         return (*ctx->cb)(&entry, ctx->user_ctx);
1372 }
1373
1374 /* API function documented in wimlib.h  */
1375 WIMLIBAPI int
1376 wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
1377                             wimlib_iterate_lookup_table_callback_t cb,
1378                             void *user_ctx)
1379 {
1380         if (flags != 0)
1381                 return WIMLIB_ERR_INVALID_PARAM;
1382
1383         struct iterate_blob_context ctx = {
1384                 .cb = cb,
1385                 .user_ctx = user_ctx,
1386         };
1387         if (wim_has_metadata(wim)) {
1388                 int ret;
1389                 for (int i = 0; i < wim->hdr.image_count; i++) {
1390                         struct blob_descriptor *blob;
1391                         struct wim_image_metadata *imd = wim->image_metadata[i];
1392
1393                         ret = do_iterate_blob(imd->metadata_blob, &ctx);
1394                         if (ret)
1395                                 return ret;
1396                         image_for_each_unhashed_blob(blob, imd) {
1397                                 ret = do_iterate_blob(blob, &ctx);
1398                                 if (ret)
1399                                         return ret;
1400                         }
1401                 }
1402         }
1403         return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx);
1404 }