]> wimlib.net Git - wimlib/blob - src/blob_table.c
blob_table.c: fix NTFS location clone error path
[wimlib] / src / blob_table.c
1 /*
2  * blob_table.c
3  *
4  * A blob table maps SHA-1 message digests to "blobs", which are nonempty
5  * sequences of binary data.  Within a WIM file, blobs are single-instanced.
6  *
7  * This file also contains code to read and write the corresponding on-disk
8  * representation of this table in the WIM file format.
9  */
10
11 /*
12  * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
13  *
14  * This file is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option) any
17  * later version.
18  *
19  * This file is distributed in the hope that it will be useful, but WITHOUT
20  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with this file; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h> /* for unlink()  */
35
36 #include "wimlib/assert.h"
37 #include "wimlib/blob_table.h"
38 #include "wimlib/encoding.h"
39 #include "wimlib/endianness.h"
40 #include "wimlib/error.h"
41 #include "wimlib/metadata.h"
42 #include "wimlib/ntfs_3g.h"
43 #include "wimlib/resource.h"
44 #include "wimlib/unaligned.h"
45 #include "wimlib/util.h"
46 #include "wimlib/write.h"
47
48 /* A hash table mapping SHA-1 message digests to blob descriptors  */
49 struct blob_table {
50         struct hlist_head *array;
51         size_t num_blobs;
52         size_t capacity;
53 };
54
55 struct blob_table *
56 new_blob_table(size_t capacity)
57 {
58         struct blob_table *table;
59         struct hlist_head *array;
60
61         table = MALLOC(sizeof(struct blob_table));
62         if (table == NULL)
63                 goto oom;
64
65         array = CALLOC(capacity, sizeof(array[0]));
66         if (array == NULL) {
67                 FREE(table);
68                 goto oom;
69         }
70
71         table->num_blobs = 0;
72         table->capacity = capacity;
73         table->array = array;
74         return table;
75
76 oom:
77         ERROR("Failed to allocate memory for blob table "
78               "with capacity %zu", capacity);
79         return NULL;
80 }
81
82 static int
83 do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore)
84 {
85         free_blob_descriptor(blob);
86         return 0;
87 }
88
89 void
90 free_blob_table(struct blob_table *table)
91 {
92         if (table) {
93                 for_blob_in_table(table, do_free_blob_descriptor, NULL);
94                 FREE(table->array);
95                 FREE(table);
96         }
97 }
98
99 struct blob_descriptor *
100 new_blob_descriptor(void)
101 {
102         struct blob_descriptor *blob;
103
104         blob = CALLOC(1, sizeof(struct blob_descriptor));
105         if (blob == NULL)
106                 return NULL;
107
108         blob->refcnt = 1;
109
110         /* blob->blob_location = BLOB_NONEXISTENT  */
111         BUILD_BUG_ON(BLOB_NONEXISTENT != 0);
112
113         return blob;
114 }
115
116 struct blob_descriptor *
117 clone_blob_descriptor(const struct blob_descriptor *old)
118 {
119         struct blob_descriptor *new;
120
121         new = memdup(old, sizeof(struct blob_descriptor));
122         if (new == NULL)
123                 return NULL;
124
125         switch (new->blob_location) {
126         case BLOB_IN_WIM:
127                 list_add(&new->rdesc_node, &new->rdesc->blob_list);
128                 break;
129
130         case BLOB_IN_FILE_ON_DISK:
131 #ifdef __WIN32__
132         case BLOB_IN_WINNT_FILE_ON_DISK:
133         case BLOB_WIN32_ENCRYPTED:
134 #endif
135 #ifdef WITH_FUSE
136         case BLOB_IN_STAGING_FILE:
137                 BUILD_BUG_ON((void*)&old->file_on_disk !=
138                              (void*)&old->staging_file_name);
139 #endif
140                 new->file_on_disk = TSTRDUP(old->file_on_disk);
141                 if (new->file_on_disk == NULL)
142                         goto out_free;
143                 break;
144         case BLOB_IN_ATTACHED_BUFFER:
145                 new->attached_buffer = memdup(old->attached_buffer, old->size);
146                 if (new->attached_buffer == NULL)
147                         goto out_free;
148                 break;
149 #ifdef WITH_NTFS_3G
150         case BLOB_IN_NTFS_VOLUME:
151                 if (old->ntfs_loc) {
152                         new->ntfs_loc = memdup(old->ntfs_loc,
153                                                sizeof(struct ntfs_location));
154                         if (new->ntfs_loc == NULL)
155                                 goto out_free;
156                         new->ntfs_loc->path = STRDUP(old->ntfs_loc->path);
157                         new->ntfs_loc->attr_name = NULL;
158                         if (new->ntfs_loc->path == NULL)
159                                 goto out_free;
160                         if (new->ntfs_loc->attr_name_nchars != 0) {
161                                 new->ntfs_loc->attr_name =
162                                         utf16le_dup(old->ntfs_loc->attr_name);
163                                 if (new->ntfs_loc->attr_name == NULL)
164                                         goto out_free;
165                         }
166                 }
167                 break;
168 #endif
169         default:
170                 break;
171         }
172         return new;
173
174 out_free:
175         free_blob_descriptor(new);
176         return NULL;
177 }
178
179 static void
180 blob_release_location(struct blob_descriptor *blob)
181 {
182         switch (blob->blob_location) {
183         case BLOB_IN_WIM:
184                 list_del(&blob->rdesc_node);
185                 if (list_empty(&blob->rdesc->blob_list))
186                         FREE(blob->rdesc);
187                 break;
188         case BLOB_IN_FILE_ON_DISK:
189 #ifdef __WIN32__
190         case BLOB_IN_WINNT_FILE_ON_DISK:
191         case BLOB_WIN32_ENCRYPTED:
192 #endif
193 #ifdef WITH_FUSE
194         case BLOB_IN_STAGING_FILE:
195                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
196                              (void*)&blob->staging_file_name);
197 #endif
198         case BLOB_IN_ATTACHED_BUFFER:
199                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
200                              (void*)&blob->attached_buffer);
201                 FREE(blob->file_on_disk);
202                 break;
203 #ifdef WITH_NTFS_3G
204         case BLOB_IN_NTFS_VOLUME:
205                 if (blob->ntfs_loc) {
206                         FREE(blob->ntfs_loc->path);
207                         FREE(blob->ntfs_loc->attr_name);
208                         FREE(blob->ntfs_loc);
209                 }
210                 break;
211 #endif
212         default:
213                 break;
214         }
215 }
216
217 void
218 free_blob_descriptor(struct blob_descriptor *blob)
219 {
220         if (blob) {
221                 blob_release_location(blob);
222                 FREE(blob);
223         }
224 }
225
226 /* Should this blob be retained even if it has no references?  */
227 static bool
228 should_retain_blob(const struct blob_descriptor *blob)
229 {
230         return blob->blob_location == BLOB_IN_WIM;
231 }
232
233 static void
234 finalize_blob(struct blob_descriptor *blob)
235 {
236         if (!should_retain_blob(blob))
237                 free_blob_descriptor(blob);
238 }
239
240 /*
241  * Decrements the reference count of the specified blob, which must be either
242  * (a) unhashed, or (b) inserted in the specified blob table.
243  *
244  * If the blob's reference count reaches 0, we may unlink it from @table and
245  * free it.  However, we retain blobs with 0 reference count that originated
246  * from WIM files (BLOB_IN_WIM).  We do this for two reasons:
247  *
248  * 1. This prevents information about valid blobs in a WIM file --- blobs which
249  *    will continue to be present after appending to the WIM file --- from being
250  *    lost merely because we dropped all references to them.
251  *
252  * 2. Blob reference counts we read from WIM files can't be trusted.  It's
253  *    possible that a WIM has reference counts that are too low; WIMGAPI
254  *    sometimes creates WIMs where this is the case.  It's also possible that
255  *    blobs have been referenced from an external WIM; those blobs can
256  *    potentially have any reference count at all, either lower or higher than
257  *    would be expected for this WIM ("this WIM" meaning the owner of @table) if
258  *    it were a standalone WIM.
259  *
260  * So we can't take the reference counts too seriously.  But at least, we do
261  * recalculate by default when writing a new WIM file.
262  */
263 void
264 blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table)
265 {
266         if (unlikely(blob->refcnt == 0))  /* See comment above  */
267                 return;
268
269         if (--blob->refcnt != 0)
270                 return;
271
272         if (blob->unhashed) {
273                 list_del(&blob->unhashed_list);
274         #ifdef WITH_FUSE
275                 /* If the blob has been extracted to a staging file for a FUSE
276                  * mount, unlink the staging file.  (Note that there still may
277                  * be open file descriptors to it.)  */
278                 if (blob->blob_location == BLOB_IN_STAGING_FILE)
279                         unlinkat(blob->staging_dir_fd,
280                                  blob->staging_file_name, 0);
281         #endif
282         } else {
283                 if (!should_retain_blob(blob))
284                         blob_table_unlink(table, blob);
285         }
286
287         /* If FUSE mounts are enabled, then don't actually free the blob
288          * descriptor until the last file descriptor to it has been closed.  */
289 #ifdef WITH_FUSE
290         if (blob->num_opened_fds == 0)
291 #endif
292                 finalize_blob(blob);
293 }
294
295 #ifdef WITH_FUSE
296 void
297 blob_decrement_num_opened_fds(struct blob_descriptor *blob)
298 {
299         wimlib_assert(blob->num_opened_fds != 0);
300
301         if (--blob->num_opened_fds == 0 && blob->refcnt == 0)
302                 finalize_blob(blob);
303 }
304 #endif
305
306 static void
307 blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob)
308 {
309         size_t i = blob->hash_short % table->capacity;
310
311         hlist_add_head(&blob->hash_list, &table->array[i]);
312 }
313
314 static void
315 enlarge_blob_table(struct blob_table *table)
316 {
317         size_t old_capacity, new_capacity;
318         struct hlist_head *old_array, *new_array;
319         struct blob_descriptor *blob;
320         struct hlist_node *cur, *tmp;
321         size_t i;
322
323         old_capacity = table->capacity;
324         new_capacity = old_capacity * 2;
325         new_array = CALLOC(new_capacity, sizeof(struct hlist_head));
326         if (new_array == NULL)
327                 return;
328         old_array = table->array;
329         table->array = new_array;
330         table->capacity = new_capacity;
331
332         for (i = 0; i < old_capacity; i++) {
333                 hlist_for_each_entry_safe(blob, cur, tmp, &old_array[i], hash_list) {
334                         hlist_del(&blob->hash_list);
335                         blob_table_insert_raw(table, blob);
336                 }
337         }
338         FREE(old_array);
339 }
340
341 /* Insert a blob descriptor into the blob table.  */
342 void
343 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob)
344 {
345         blob_table_insert_raw(table, blob);
346         if (++table->num_blobs > table->capacity)
347                 enlarge_blob_table(table);
348 }
349
350 /* Unlinks a blob descriptor from the blob table; does not free it.  */
351 void
352 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob)
353 {
354         wimlib_assert(!blob->unhashed);
355         wimlib_assert(table->num_blobs != 0);
356
357         hlist_del(&blob->hash_list);
358         table->num_blobs--;
359 }
360
361 /* Given a SHA-1 message digest, return the corresponding blob descriptor from
362  * the specified blob table, or NULL if there is none.  */
363 struct blob_descriptor *
364 lookup_blob(const struct blob_table *table, const u8 *hash)
365 {
366         size_t i;
367         struct blob_descriptor *blob;
368         struct hlist_node *pos;
369
370         i = load_size_t_unaligned(hash) % table->capacity;
371         hlist_for_each_entry(blob, pos, &table->array[i], hash_list)
372                 if (hashes_equal(hash, blob->hash))
373                         return blob;
374         return NULL;
375 }
376
377 /* Call a function on all blob descriptors in the specified blob table.  Stop
378  * early and return nonzero if any call to the function returns nonzero.  */
379 int
380 for_blob_in_table(struct blob_table *table,
381                   int (*visitor)(struct blob_descriptor *, void *), void *arg)
382 {
383         struct blob_descriptor *blob;
384         struct hlist_node *pos, *tmp;
385         int ret;
386
387         for (size_t i = 0; i < table->capacity; i++) {
388                 hlist_for_each_entry_safe(blob, pos, tmp, &table->array[i],
389                                           hash_list)
390                 {
391                         ret = visitor(blob, arg);
392                         if (ret)
393                                 return ret;
394                 }
395         }
396         return 0;
397 }
398
399 /*
400  * This is a qsort() callback that sorts blobs into an order optimized for
401  * reading.  Sorting is done primarily by blob location, then secondarily by a
402  * location-dependent order.  For example, blobs in WIM resources are sorted
403  * such that the underlying WIM files will be read sequentially.  This is
404  * especially important for WIM files containing solid resources.
405  */
406 int
407 cmp_blobs_by_sequential_order(const void *p1, const void *p2)
408 {
409         const struct blob_descriptor *blob1, *blob2;
410         int v;
411         WIMStruct *wim1, *wim2;
412
413         blob1 = *(const struct blob_descriptor**)p1;
414         blob2 = *(const struct blob_descriptor**)p2;
415
416         v = (int)blob1->blob_location - (int)blob2->blob_location;
417
418         /* Different resource locations?  */
419         if (v)
420                 return v;
421
422         switch (blob1->blob_location) {
423         case BLOB_IN_WIM:
424                 wim1 = blob1->rdesc->wim;
425                 wim2 = blob2->rdesc->wim;
426
427                 /* Different (possibly split) WIMs?  */
428                 if (wim1 != wim2) {
429                         v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN);
430                         if (v)
431                                 return v;
432                 }
433
434                 /* Different part numbers in the same WIM?  */
435                 v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number;
436                 if (v)
437                         return v;
438
439                 if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim)
440                         return cmp_u64(blob1->rdesc->offset_in_wim,
441                                        blob2->rdesc->offset_in_wim);
442
443                 return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
444
445         case BLOB_IN_FILE_ON_DISK:
446 #ifdef WITH_FUSE
447         case BLOB_IN_STAGING_FILE:
448 #endif
449 #ifdef __WIN32__
450         case BLOB_IN_WINNT_FILE_ON_DISK:
451         case BLOB_WIN32_ENCRYPTED:
452 #endif
453                 /* Compare files by path: just a heuristic that will place files
454                  * in the same directory next to each other.  */
455                 return tstrcmp(blob1->file_on_disk, blob2->file_on_disk);
456 #ifdef WITH_NTFS_3G
457         case BLOB_IN_NTFS_VOLUME:
458                 return tstrcmp(blob1->ntfs_loc->path, blob2->ntfs_loc->path);
459 #endif
460         default:
461                 /* No additional sorting order defined for this resource
462                  * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare
463                  * everything equal to each other.  */
464                 return 0;
465         }
466 }
467
468 int
469 sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
470                int (*compar)(const void *, const void*))
471 {
472         struct list_head *cur;
473         struct blob_descriptor **array;
474         size_t i;
475         size_t array_size;
476         size_t num_blobs = 0;
477
478         list_for_each(cur, blob_list)
479                 num_blobs++;
480
481         if (num_blobs <= 1)
482                 return 0;
483
484         array_size = num_blobs * sizeof(array[0]);
485         array = MALLOC(array_size);
486         if (array == NULL)
487                 return WIMLIB_ERR_NOMEM;
488
489         cur = blob_list->next;
490         for (i = 0; i < num_blobs; i++) {
491                 array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset);
492                 cur = cur->next;
493         }
494
495         qsort(array, num_blobs, sizeof(array[0]), compar);
496
497         INIT_LIST_HEAD(blob_list);
498         for (i = 0; i < num_blobs; i++) {
499                 list_add_tail((struct list_head*)
500                                ((u8*)array[i] + list_head_offset), blob_list);
501         }
502         FREE(array);
503         return 0;
504 }
505
506 /* Sort the specified list of blobs in an order optimized for sequential
507  * reading.  */
508 int
509 sort_blob_list_by_sequential_order(struct list_head *blob_list,
510                                    size_t list_head_offset)
511 {
512         return sort_blob_list(blob_list, list_head_offset,
513                               cmp_blobs_by_sequential_order);
514 }
515
516 static int
517 add_blob_to_array(struct blob_descriptor *blob, void *_pp)
518 {
519         struct blob_descriptor ***pp = _pp;
520         *(*pp)++ = blob;
521         return 0;
522 }
523
524 /* Iterate through the blob descriptors in the specified blob table in an order
525  * optimized for sequential reading.  */
526 int
527 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
528                                              int (*visitor)(struct blob_descriptor *, void *),
529                                              void *arg)
530 {
531         struct blob_descriptor **blob_array, **p;
532         size_t num_blobs = table->num_blobs;
533         int ret;
534
535         blob_array = MALLOC(num_blobs * sizeof(blob_array[0]));
536         if (!blob_array)
537                 return WIMLIB_ERR_NOMEM;
538         p = blob_array;
539         for_blob_in_table(table, add_blob_to_array, &p);
540
541         wimlib_assert(p == blob_array + num_blobs);
542
543         qsort(blob_array, num_blobs, sizeof(blob_array[0]),
544               cmp_blobs_by_sequential_order);
545         ret = 0;
546         for (size_t i = 0; i < num_blobs; i++) {
547                 ret = visitor(blob_array[i], arg);
548                 if (ret)
549                         break;
550         }
551         FREE(blob_array);
552         return ret;
553 }
554
555 /* On-disk format of a blob descriptor in a WIM file.
556  *
557  * Note: if the WIM file contains solid resource(s), then this structure is
558  * sometimes overloaded to describe a "resource" rather than a "blob".  See the
559  * code for details.  */
560 struct blob_descriptor_disk {
561
562         /* Size, offset, and flags of the blob.  */
563         struct wim_reshdr_disk reshdr;
564
565         /* Which part of the split WIM this blob is in; indexed from 1. */
566         le16 part_number;
567
568         /* Reference count of this blob over all WIM images.  (But see comment
569          * above blob_decrement_refcnt().)  */
570         le32 refcnt;
571
572         /* SHA-1 message digest of the uncompressed data of this blob, or all
573          * zeroes if this blob is of zero length.  */
574         u8 hash[SHA1_HASH_SIZE];
575 } _packed_attribute;
576
577 /* Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
578  * count how many specify resources (as opposed to blobs within those
579  * resources).
580  *
581  * Returns the resulting count.  */
582 static size_t
583 count_solid_resources(const struct blob_descriptor_disk *entries, size_t max)
584 {
585         size_t count = 0;
586         do {
587                 struct wim_reshdr reshdr;
588
589                 get_wim_reshdr(&(entries++)->reshdr, &reshdr);
590
591                 if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) {
592                         /* Run was terminated by a stand-alone blob entry.  */
593                         break;
594                 }
595
596                 if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
597                         /* This is a resource entry.  */
598                         count++;
599                 }
600         } while (--max);
601         return count;
602 }
603
604 /*
605  * Given a run of consecutive blob descriptors with the SOLID flag set and
606  * having @num_rdescs resource entries, load resource information from them into
607  * the resource descriptors in the @rdescs array.
608  *
609  * Returns 0 on success, or a nonzero error code on failure.
610  */
611 static int
612 do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs,
613                    size_t num_rdescs,
614                    const struct blob_descriptor_disk *entries)
615 {
616         for (size_t i = 0; i < num_rdescs; i++) {
617                 struct wim_reshdr reshdr;
618                 struct alt_chunk_table_header_disk hdr;
619                 struct wim_resource_descriptor *rdesc;
620                 int ret;
621
622                 /* Advance to next resource entry.  */
623
624                 do {
625                         get_wim_reshdr(&(entries++)->reshdr, &reshdr);
626                 } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER);
627
628                 rdesc = rdescs[i];
629
630                 wim_res_hdr_to_desc(&reshdr, wim, rdesc);
631
632                 /* For solid resources, the uncompressed size, compression type,
633                  * and chunk size are stored in the resource itself, not in the
634                  * blob table.  */
635
636                 ret = full_pread(&wim->in_fd, &hdr,
637                                  sizeof(hdr), reshdr.offset_in_wim);
638                 if (ret) {
639                         ERROR("Failed to read header of solid resource "
640                               "(offset_in_wim=%"PRIu64")",
641                               reshdr.offset_in_wim);
642                         return ret;
643                 }
644
645                 rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize);
646
647                 /* Compression format numbers must be the same as in
648                  * WIMGAPI to be compatible here.  */
649                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
650                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
651                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
652                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
653                 rdesc->compression_type = le32_to_cpu(hdr.compression_format);
654
655                 rdesc->chunk_size = le32_to_cpu(hdr.chunk_size);
656
657                 DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" "
658                       "(%"TS"/%"PRIu32") @ +%"PRIu64"",
659                       i + 1, num_rdescs,
660                       rdesc->uncompressed_size,
661                       rdesc->size_in_wim,
662                       wimlib_get_compression_type_string(rdesc->compression_type),
663                       rdesc->chunk_size,
664                       rdesc->offset_in_wim);
665         }
666         return 0;
667 }
668
669 /*
670  * Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
671  * allocate a 'struct wim_resource_descriptor' for each resource within that
672  * run.
673  *
674  * Returns 0 on success, or a nonzero error code on failure.
675  * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret.
676  */
677 static int
678 load_solid_info(WIMStruct *wim,
679                 const struct blob_descriptor_disk *entries,
680                 size_t num_remaining_entries,
681                 struct wim_resource_descriptor ***rdescs_ret,
682                 size_t *num_rdescs_ret)
683 {
684         size_t num_rdescs;
685         struct wim_resource_descriptor **rdescs;
686         size_t i;
687         int ret;
688
689         num_rdescs = count_solid_resources(entries, num_remaining_entries);
690         rdescs = CALLOC(num_rdescs, sizeof(rdescs[0]));
691         if (!rdescs)
692                 return WIMLIB_ERR_NOMEM;
693
694         for (i = 0; i < num_rdescs; i++) {
695                 rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor));
696                 if (!rdescs[i]) {
697                         ret = WIMLIB_ERR_NOMEM;
698                         goto out_free_rdescs;
699                 }
700         }
701
702         ret = do_load_solid_info(wim, rdescs, num_rdescs, entries);
703         if (ret)
704                 goto out_free_rdescs;
705
706         *rdescs_ret = rdescs;
707         *num_rdescs_ret = num_rdescs;
708         return 0;
709
710 out_free_rdescs:
711         for (i = 0; i < num_rdescs; i++)
712                 FREE(rdescs[i]);
713         FREE(rdescs);
714         return ret;
715 }
716
717 /* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor
718  * with the SOLID flag set, try to assign it to resource in the current solid
719  * run.  */
720 static int
721 assign_blob_to_solid_resource(const struct wim_reshdr *reshdr,
722                               struct blob_descriptor *blob,
723                               struct wim_resource_descriptor **rdescs,
724                               size_t num_rdescs)
725 {
726         u64 offset = reshdr->offset_in_wim;
727
728         /* XXX: This linear search will be slow in the degenerate case where the
729          * number of solid resources in the run is huge.  */
730         blob->size = reshdr->size_in_wim;
731         blob->flags = reshdr->flags;
732         for (size_t i = 0; i < num_rdescs; i++) {
733                 if (offset + blob->size <= rdescs[i]->uncompressed_size) {
734                         blob->offset_in_res = offset;
735                         blob_set_is_located_in_wim_resource(blob, rdescs[i]);
736                         return 0;
737                 }
738                 offset -= rdescs[i]->uncompressed_size;
739         }
740         ERROR("blob could not be assigned to a solid resource");
741         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
742 }
743
744 static void
745 free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
746 {
747         if (rdescs) {
748                 for (size_t i = 0; i < num_rdescs; i++)
749                         if (list_empty(&rdescs[i]->blob_list))
750                                 FREE(rdescs[i]);
751                 FREE(rdescs);
752         }
753 }
754
755 static int
756 cmp_blobs_by_offset_in_res(const void *p1, const void *p2)
757 {
758         const struct blob_descriptor *blob1, *blob2;
759
760         blob1 = *(const struct blob_descriptor**)p1;
761         blob2 = *(const struct blob_descriptor**)p2;
762
763         return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
764 }
765
766 /* Validate the size and location of a WIM resource.  */
767 static int
768 validate_resource(struct wim_resource_descriptor *rdesc)
769 {
770         struct blob_descriptor *blob;
771         bool out_of_order;
772         u64 expected_next_offset;
773         int ret;
774
775         /* Verify that the resource itself has a valid offset and size.  */
776         if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim)
777                 goto invalid_due_to_overflow;
778
779         /* Verify that each blob in the resource has a valid offset and size.
780          */
781         expected_next_offset = 0;
782         out_of_order = false;
783         list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
784                 if (blob->offset_in_res + blob->size < blob->size ||
785                     blob->offset_in_res + blob->size > rdesc->uncompressed_size)
786                         goto invalid_due_to_overflow;
787
788                 if (blob->offset_in_res >= expected_next_offset)
789                         expected_next_offset = blob->offset_in_res + blob->size;
790                 else
791                         out_of_order = true;
792         }
793
794         /* If the blobs were not located at strictly increasing positions (not
795          * allowing for overlap), sort them.  Then make sure that none overlap.
796          */
797         if (out_of_order) {
798                 ret = sort_blob_list(&rdesc->blob_list,
799                                      offsetof(struct blob_descriptor,
800                                               rdesc_node),
801                                      cmp_blobs_by_offset_in_res);
802                 if (ret)
803                         return ret;
804
805                 expected_next_offset = 0;
806                 list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
807                         if (blob->offset_in_res >= expected_next_offset)
808                                 expected_next_offset = blob->offset_in_res + blob->size;
809                         else
810                                 goto invalid_due_to_overlap;
811                 }
812         }
813
814         return 0;
815
816 invalid_due_to_overflow:
817         ERROR("Invalid blob table (offset overflow)");
818         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
819
820 invalid_due_to_overlap:
821         ERROR("Invalid blob table (blobs in solid resource overlap)");
822         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
823 }
824
825 static int
826 finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
827 {
828         int ret = 0;
829         for (size_t i = 0; i < num_rdescs; i++) {
830                 ret = validate_resource(rdescs[i]);
831                 if (ret)
832                         break;
833         }
834         free_solid_rdescs(rdescs, num_rdescs);
835         return ret;
836 }
837
838 /*
839  * read_blob_table() -
840  *
841  * Read the blob table from a WIM file.  Usually, each entry in this table
842  * describes a "blob", or equivalently a "resource", that the WIM file contains,
843  * along with its location and SHA-1 message digest.  Descriptors for
844  * non-metadata blobs will be saved in the in-memory blob table
845  * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a
846  * special location per-image (the wim->image_metadata array).
847  *
848  * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple
849  * blobs that are compressed together.  Such a resource is called a "solid
850  * resource".  Solid resources are still described in the on-disk "blob table",
851  * although the format is not the most logical.  A consecutive sequence of
852  * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run".
853  * A solid run describes a set of solid resources, each of which contains a set
854  * of blobs.  In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size
855  * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource,
856  * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid
857  * resource.  There are some oddities in how we need to determine which solid
858  * resource a blob is actually in; see the code for details.
859  *
860  * Possible return values:
861  *      WIMLIB_ERR_SUCCESS (0)
862  *      WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
863  *      WIMLIB_ERR_NOMEM
864  *
865  *      Or an error code caused by failure to read the blob table from the WIM
866  *      file.
867  */
868 int
869 read_blob_table(WIMStruct *wim)
870 {
871         int ret;
872         size_t num_entries;
873         void *buf = NULL;
874         struct blob_table *table = NULL;
875         struct blob_descriptor *cur_blob = NULL;
876         size_t num_duplicate_blobs = 0;
877         size_t num_wrong_part_blobs = 0;
878         u32 image_index = 0;
879         struct wim_resource_descriptor **cur_solid_rdescs = NULL;
880         size_t cur_num_solid_rdescs = 0;
881
882         DEBUG("Reading blob table.");
883
884         /* Calculate the number of entries in the blob table.  */
885         num_entries = wim->hdr.blob_table_reshdr.uncompressed_size /
886                       sizeof(struct blob_descriptor_disk);
887
888         /* Read the blob table into a buffer.  */
889         ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf);
890         if (ret)
891                 goto out;
892
893         /* Allocate a hash table to map SHA-1 message digests into blob
894          * descriptors.  This is the in-memory "blob table".  */
895         table = new_blob_table(num_entries * 2 + 1);
896         if (!table)
897                 goto oom;
898
899         /* Allocate and initalize blob descriptors from the raw blob table
900          * buffer.  */
901         for (size_t i = 0; i < num_entries; i++) {
902                 const struct blob_descriptor_disk *disk_entry =
903                         &((const struct blob_descriptor_disk*)buf)[i];
904                 struct wim_reshdr reshdr;
905                 u16 part_number;
906
907                 /* Get the resource header  */
908                 get_wim_reshdr(&disk_entry->reshdr, &reshdr);
909
910                 DEBUG("reshdr: size_in_wim=%"PRIu64", "
911                       "uncompressed_size=%"PRIu64", "
912                       "offset_in_wim=%"PRIu64", "
913                       "flags=0x%02x",
914                       reshdr.size_in_wim, reshdr.uncompressed_size,
915                       reshdr.offset_in_wim, reshdr.flags);
916
917                 /* Ignore SOLID flag if it isn't supposed to be used in this WIM
918                  * version.  */
919                 if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
920                         reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID;
921
922                 /* Allocate a new 'struct blob_descriptor'.  */
923                 cur_blob = new_blob_descriptor();
924                 if (!cur_blob)
925                         goto oom;
926
927                 /* Get the part number, reference count, and hash.  */
928                 part_number = le16_to_cpu(disk_entry->part_number);
929                 cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt);
930                 copy_hash(cur_blob->hash, disk_entry->hash);
931
932                 if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
933
934                         /* SOLID entry  */
935
936                         if (!cur_solid_rdescs) {
937                                 /* Starting new run  */
938                                 ret = load_solid_info(wim, disk_entry,
939                                                       num_entries - i,
940                                                       &cur_solid_rdescs,
941                                                       &cur_num_solid_rdescs);
942                                 if (ret)
943                                         goto out;
944                         }
945
946                         if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
947                                 /* Resource entry, not blob entry  */
948                                 goto free_cur_blob_and_continue;
949                         }
950
951                         /* Blob entry  */
952
953                         ret = assign_blob_to_solid_resource(&reshdr,
954                                                             cur_blob,
955                                                             cur_solid_rdescs,
956                                                             cur_num_solid_rdescs);
957                         if (ret)
958                                 goto out;
959
960                 } else {
961                         /* Normal blob/resource entry; SOLID not set.  */
962
963                         struct wim_resource_descriptor *rdesc;
964
965                         if (unlikely(cur_solid_rdescs)) {
966                                 /* This entry terminated a solid run.  */
967                                 ret = finish_solid_rdescs(cur_solid_rdescs,
968                                                           cur_num_solid_rdescs);
969                                 cur_solid_rdescs = NULL;
970                                 if (ret)
971                                         goto out;
972                         }
973
974                         /* How to handle an uncompressed resource with its
975                          * uncompressed size different from its compressed size?
976                          *
977                          * Based on a simple test, WIMGAPI seems to handle this
978                          * as follows:
979                          *
980                          * if (size_in_wim > uncompressed_size) {
981                          *      Ignore uncompressed_size; use size_in_wim
982                          *      instead.
983                          * } else {
984                          *      Honor uncompressed_size, but treat the part of
985                          *      the file data above size_in_wim as all zeros.
986                          * }
987                          *
988                          * So we will do the same.  */
989                         if (unlikely(!(reshdr.flags &
990                                        WIM_RESHDR_FLAG_COMPRESSED) &&
991                                      (reshdr.size_in_wim >
992                                       reshdr.uncompressed_size)))
993                         {
994                                 reshdr.uncompressed_size = reshdr.size_in_wim;
995                         }
996
997                         /* Set up a resource descriptor for this blob.  */
998
999                         rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
1000                         if (!rdesc)
1001                                 goto oom;
1002
1003                         wim_res_hdr_to_desc(&reshdr, wim, rdesc);
1004
1005                         cur_blob->offset_in_res = 0;
1006                         cur_blob->size = reshdr.uncompressed_size;
1007                         cur_blob->flags = reshdr.flags;
1008
1009                         blob_set_is_located_in_wim_resource(cur_blob, rdesc);
1010                 }
1011
1012                 /* cur_blob is now a blob bound to a resource.  */
1013
1014                 /* Ignore entries with all zeroes in the hash field.  */
1015                 if (is_zero_hash(cur_blob->hash))
1016                         goto free_cur_blob_and_continue;
1017
1018                 /* Verify that the part number matches that of the underlying
1019                  * WIM file.  */
1020                 if (part_number != wim->hdr.part_number) {
1021                         num_wrong_part_blobs++;
1022                         goto free_cur_blob_and_continue;
1023                 }
1024
1025                 if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
1026
1027                         /* Blob table entry for a metadata resource.  */
1028
1029                         /* Metadata entries with no references must be ignored.
1030                          * See, for example, the WinPE WIMs from the WAIK v2.1.
1031                          */
1032                         if (cur_blob->refcnt == 0)
1033                                 goto free_cur_blob_and_continue;
1034
1035                         if (cur_blob->refcnt != 1) {
1036                                 /* We don't currently support this case due to
1037                                  * the complications of multiple images sharing
1038                                  * the same metadata resource or a metadata
1039                                  * resource also being referenced by files.  */
1040                                 ERROR("Found metadata resource with refcnt != 1");
1041                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1042                                 goto out;
1043                         }
1044
1045                         if (wim->hdr.part_number != 1) {
1046                                 WARNING("Ignoring metadata resource found in a "
1047                                         "non-first part of the split WIM");
1048                                 goto free_cur_blob_and_continue;
1049                         }
1050
1051                         /* The number of entries in the blob table with
1052                          * WIM_RESHDR_FLAG_METADATA set should be the same as
1053                          * the image_count field in the WIM header.  */
1054                         if (image_index == wim->hdr.image_count) {
1055                                 WARNING("Found more metadata resources than images");
1056                                 goto free_cur_blob_and_continue;
1057                         }
1058
1059                         /* Notice very carefully:  We are assigning the metadata
1060                          * resources to images in the same order in which their
1061                          * blob table entries occur on disk.  (This is also the
1062                          * behavior of Microsoft's software.)  In particular,
1063                          * this overrides the actual locations of the metadata
1064                          * resources themselves in the WIM file as well as any
1065                          * information written in the XML data.  */
1066                         DEBUG("Found metadata resource for image %"PRIu32" at "
1067                               "offset %"PRIu64".",
1068                               image_index + 1,
1069                               reshdr.offset_in_wim);
1070
1071                         wim->image_metadata[image_index++]->metadata_blob = cur_blob;
1072                 } else {
1073                         /* Blob table entry for a non-metadata blob.  */
1074
1075                         /* Ignore this blob if it's a duplicate.  */
1076                         if (lookup_blob(table, cur_blob->hash)) {
1077                                 num_duplicate_blobs++;
1078                                 goto free_cur_blob_and_continue;
1079                         }
1080
1081                         /* Insert the blob into the in-memory blob table, keyed
1082                          * by its SHA-1 message digest.  */
1083                         blob_table_insert(table, cur_blob);
1084                 }
1085
1086                 continue;
1087
1088         free_cur_blob_and_continue:
1089                 if (cur_solid_rdescs &&
1090                     cur_blob->blob_location == BLOB_IN_WIM)
1091                         blob_unset_is_located_in_wim_resource(cur_blob);
1092                 free_blob_descriptor(cur_blob);
1093         }
1094         cur_blob = NULL;
1095
1096         if (cur_solid_rdescs) {
1097                 /* End of blob table terminated a solid run.  */
1098                 ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1099                 cur_solid_rdescs = NULL;
1100                 if (ret)
1101                         goto out;
1102         }
1103
1104         if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) {
1105                 WARNING("Could not find metadata resources for all images");
1106                 for (u32 i = image_index; i < wim->hdr.image_count; i++)
1107                         put_image_metadata(wim->image_metadata[i], NULL);
1108                 wim->hdr.image_count = image_index;
1109         }
1110
1111         if (num_duplicate_blobs > 0)
1112                 WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs);
1113
1114         if (num_wrong_part_blobs > 0) {
1115                 WARNING("Ignoring %zu blobs with wrong part number",
1116                         num_wrong_part_blobs);
1117         }
1118
1119         DEBUG("Done reading blob table.");
1120         wim->blob_table = table;
1121         ret = 0;
1122         goto out_free_buf;
1123
1124 oom:
1125         ERROR("Not enough memory to read blob table!");
1126         ret = WIMLIB_ERR_NOMEM;
1127 out:
1128         free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1129         free_blob_descriptor(cur_blob);
1130         free_blob_table(table);
1131 out_free_buf:
1132         FREE(buf);
1133         return ret;
1134 }
1135
1136 static void
1137 write_blob_descriptor(struct blob_descriptor_disk *disk_entry,
1138                       const struct wim_reshdr *out_reshdr,
1139                       u16 part_number, u32 refcnt, const u8 *hash)
1140 {
1141         put_wim_reshdr(out_reshdr, &disk_entry->reshdr);
1142         disk_entry->part_number = cpu_to_le16(part_number);
1143         disk_entry->refcnt = cpu_to_le32(refcnt);
1144         copy_hash(disk_entry->hash, hash);
1145 }
1146
1147 /* Note: the list of blob descriptors must be sorted so that all entries for the
1148  * same solid resource are consecutive.  In addition, blob descriptors with
1149  * WIM_RESHDR_FLAG_METADATA set must be in the same order as the indices of the
1150  * underlying images.  */
1151 int
1152 write_blob_table_from_blob_list(struct list_head *blob_list,
1153                                 struct filedes *out_fd,
1154                                 u16 part_number,
1155                                 struct wim_reshdr *out_reshdr,
1156                                 int write_resource_flags)
1157 {
1158         size_t table_size;
1159         struct blob_descriptor *blob;
1160         struct blob_descriptor_disk *table_buf;
1161         struct blob_descriptor_disk *table_buf_ptr;
1162         int ret;
1163         u64 prev_res_offset_in_wim = ~0ULL;
1164         u64 prev_uncompressed_size;
1165         u64 logical_offset;
1166
1167         table_size = 0;
1168         list_for_each_entry(blob, blob_list, blob_table_list) {
1169                 table_size += sizeof(struct blob_descriptor_disk);
1170
1171                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID &&
1172                     blob->out_res_offset_in_wim != prev_res_offset_in_wim)
1173                 {
1174                         table_size += sizeof(struct blob_descriptor_disk);
1175                         prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1176                 }
1177         }
1178
1179         DEBUG("Writing WIM blob table (size=%zu, offset=%"PRIu64")",
1180               table_size, out_fd->offset);
1181
1182         table_buf = MALLOC(table_size);
1183         if (table_buf == NULL) {
1184                 ERROR("Failed to allocate %zu bytes for temporary blob table",
1185                       table_size);
1186                 return WIMLIB_ERR_NOMEM;
1187         }
1188         table_buf_ptr = table_buf;
1189
1190         prev_res_offset_in_wim = ~0ULL;
1191         prev_uncompressed_size = 0;
1192         logical_offset = 0;
1193         list_for_each_entry(blob, blob_list, blob_table_list) {
1194                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
1195                         struct wim_reshdr tmp_reshdr;
1196
1197                         /* Eww.  When WIMGAPI sees multiple solid resources, it
1198                          * expects the offsets to be adjusted as if there were
1199                          * really only one solid resource.  */
1200
1201                         if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) {
1202                                 /* Put the resource entry for solid resource  */
1203                                 tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim;
1204                                 tmp_reshdr.size_in_wim = blob->out_res_size_in_wim;
1205                                 tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER;
1206                                 tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID;
1207
1208                                 write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1209                                                       part_number, 1, zero_hash);
1210
1211                                 logical_offset += prev_uncompressed_size;
1212
1213                                 prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1214                                 prev_uncompressed_size = blob->out_res_uncompressed_size;
1215                         }
1216                         tmp_reshdr = blob->out_reshdr;
1217                         tmp_reshdr.offset_in_wim += logical_offset;
1218                         write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1219                                               part_number, blob->out_refcnt, blob->hash);
1220                 } else {
1221                         write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr,
1222                                               part_number, blob->out_refcnt, blob->hash);
1223                 }
1224
1225         }
1226         wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size);
1227
1228         /* Write the blob table uncompressed.  Although wimlib can handle a
1229          * compressed blob table, MS software cannot.  */
1230         ret = write_wim_resource_from_buffer(table_buf,
1231                                              table_size,
1232                                              WIM_RESHDR_FLAG_METADATA,
1233                                              out_fd,
1234                                              WIMLIB_COMPRESSION_TYPE_NONE,
1235                                              0,
1236                                              out_reshdr,
1237                                              NULL,
1238                                              write_resource_flags);
1239         FREE(table_buf);
1240         DEBUG("ret=%d", ret);
1241         return ret;
1242 }
1243
1244 /* Allocate a blob descriptor for the contents of the buffer, or re-use an
1245  * existing descriptor in @blob_table for an identical blob.  */
1246 struct blob_descriptor *
1247 new_blob_from_data_buffer(const void *buffer, size_t size,
1248                           struct blob_table *blob_table)
1249 {
1250         u8 hash[SHA1_HASH_SIZE];
1251         struct blob_descriptor *blob, *existing_blob;
1252
1253         sha1_buffer(buffer, size, hash);
1254         existing_blob = lookup_blob(blob_table, hash);
1255         if (existing_blob) {
1256                 wimlib_assert(existing_blob->size == size);
1257                 blob = existing_blob;
1258                 blob->refcnt++;
1259         } else {
1260                 void *buffer_copy;
1261                 blob = new_blob_descriptor();
1262                 if (blob == NULL)
1263                         return NULL;
1264                 buffer_copy = memdup(buffer, size);
1265                 if (buffer_copy == NULL) {
1266                         free_blob_descriptor(blob);
1267                         return NULL;
1268                 }
1269                 blob->blob_location = BLOB_IN_ATTACHED_BUFFER;
1270                 blob->attached_buffer = buffer_copy;
1271                 blob->size = size;
1272                 copy_hash(blob->hash, hash);
1273                 blob_table_insert(blob_table, blob);
1274         }
1275         return blob;
1276 }
1277
1278 /*
1279  * Calculate the SHA-1 message digest of a blob and move its descriptor from the
1280  * list of unhashed blobs to the blob table, possibly joining it with an
1281  * identical blob.
1282  *
1283  * @blob:
1284  *      The blob to hash
1285  * @blob_table:
1286  *      The blob table in which the blob needs to be indexed
1287  * @blob_ret:
1288  *      On success, a pointer to the resulting blob descriptor is written to
1289  *      this location.  This will be the same as @blob if it was inserted into
1290  *      the blob table, or different if a duplicate blob was found.
1291  *
1292  * Returns 0 on success; nonzero if there is an error reading the blob data.
1293  */
1294 int
1295 hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
1296                    struct blob_descriptor **blob_ret)
1297 {
1298         int ret;
1299         struct blob_descriptor *duplicate_blob;
1300         struct blob_descriptor **back_ptr;
1301
1302         wimlib_assert(blob->unhashed);
1303
1304         /* back_ptr must be saved because @back_inode and @back_stream_id are in
1305          * union with the SHA-1 message digest and will no longer be valid once
1306          * the SHA-1 has been calculated. */
1307         back_ptr = retrieve_pointer_to_unhashed_blob(blob);
1308
1309         ret = sha1_blob(blob);
1310         if (ret)
1311                 return ret;
1312
1313         list_del(&blob->unhashed_list);
1314         blob->unhashed = 0;
1315
1316         /* Look for a duplicate blob  */
1317         duplicate_blob = lookup_blob(blob_table, blob->hash);
1318         if (duplicate_blob) {
1319                 /* We have a duplicate blob.  Transfer the reference counts from
1320                  * this blob to the duplicate and update the reference to this
1321                  * blob (from an stream) to point to the duplicate.  The caller
1322                  * is responsible for freeing @blob if needed.  */
1323                 wimlib_assert(duplicate_blob->size == blob->size);
1324                 duplicate_blob->refcnt += blob->refcnt;
1325                 blob->refcnt = 0;
1326                 *back_ptr = duplicate_blob;
1327                 blob = duplicate_blob;
1328         } else {
1329                 /* No duplicate blob, so we need to insert this blob into the
1330                  * blob table and treat it as a hashed blob. */
1331                 blob_table_insert(blob_table, blob);
1332         }
1333         *blob_ret = blob;
1334         return 0;
1335 }
1336
1337 void
1338 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
1339                               struct wimlib_resource_entry *wentry)
1340 {
1341         memset(wentry, 0, sizeof(*wentry));
1342
1343         wentry->uncompressed_size = blob->size;
1344         if (blob->blob_location == BLOB_IN_WIM) {
1345                 wentry->part_number = blob->rdesc->wim->hdr.part_number;
1346                 if (blob->flags & WIM_RESHDR_FLAG_SOLID) {
1347                         wentry->offset = blob->offset_in_res;
1348                 } else {
1349                         wentry->compressed_size = blob->rdesc->size_in_wim;
1350                         wentry->offset = blob->rdesc->offset_in_wim;
1351                 }
1352                 wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim;
1353                 wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim;
1354                 wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size;
1355         }
1356         copy_hash(wentry->sha1_hash, blob->hash);
1357         wentry->reference_count = blob->refcnt;
1358         wentry->is_compressed = (blob->flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
1359         wentry->is_metadata = (blob->flags & WIM_RESHDR_FLAG_METADATA) != 0;
1360         wentry->is_free = (blob->flags & WIM_RESHDR_FLAG_FREE) != 0;
1361         wentry->is_spanned = (blob->flags & WIM_RESHDR_FLAG_SPANNED) != 0;
1362         wentry->packed = (blob->flags & WIM_RESHDR_FLAG_SOLID) != 0;
1363 }
1364
1365 struct iterate_blob_context {
1366         wimlib_iterate_lookup_table_callback_t cb;
1367         void *user_ctx;
1368 };
1369
1370 static int
1371 do_iterate_blob(struct blob_descriptor *blob, void *_ctx)
1372 {
1373         struct iterate_blob_context *ctx = _ctx;
1374         struct wimlib_resource_entry entry;
1375
1376         blob_to_wimlib_resource_entry(blob, &entry);
1377         return (*ctx->cb)(&entry, ctx->user_ctx);
1378 }
1379
1380 /* API function documented in wimlib.h  */
1381 WIMLIBAPI int
1382 wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
1383                             wimlib_iterate_lookup_table_callback_t cb,
1384                             void *user_ctx)
1385 {
1386         if (flags != 0)
1387                 return WIMLIB_ERR_INVALID_PARAM;
1388
1389         struct iterate_blob_context ctx = {
1390                 .cb = cb,
1391                 .user_ctx = user_ctx,
1392         };
1393         if (wim_has_metadata(wim)) {
1394                 int ret;
1395                 for (int i = 0; i < wim->hdr.image_count; i++) {
1396                         ret = do_iterate_blob(wim->image_metadata[i]->metadata_blob,
1397                                               &ctx);
1398                         if (ret)
1399                                 return ret;
1400                 }
1401         }
1402         return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx);
1403 }