2d346e3539ecca661b774802a98f046dd7873960
[wimlib] / src / blob_table.c
1 /*
2  * blob_table.c
3  *
4  * A blob table maps SHA-1 message digests to "blobs", which are nonempty
5  * sequences of binary data.  Within a WIM file, blobs are single-instanced.
6  *
7  * This file also contains code to read and write the corresponding on-disk
8  * representation of this table in the WIM file format.
9  */
10
11 /*
12  * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
13  *
14  * This file is free software; you can redistribute it and/or modify it under
15  * the terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option) any
17  * later version.
18  *
19  * This file is distributed in the hope that it will be useful, but WITHOUT
20  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
21  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with this file; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h> /* for unlink()  */
35
36 #include "wimlib/assert.h"
37 #include "wimlib/blob_table.h"
38 #include "wimlib/encoding.h"
39 #include "wimlib/endianness.h"
40 #include "wimlib/error.h"
41 #include "wimlib/metadata.h"
42 #include "wimlib/ntfs_3g.h"
43 #include "wimlib/resource.h"
44 #include "wimlib/unaligned.h"
45 #include "wimlib/util.h"
46 #include "wimlib/write.h"
47
48 /* A hash table mapping SHA-1 message digests to blob descriptors  */
49 struct blob_table {
50         struct hlist_head *array;
51         size_t num_blobs;
52         size_t capacity;
53 };
54
55 struct blob_table *
56 new_blob_table(size_t capacity)
57 {
58         struct blob_table *table;
59         struct hlist_head *array;
60
61         table = MALLOC(sizeof(struct blob_table));
62         if (table == NULL)
63                 goto oom;
64
65         array = CALLOC(capacity, sizeof(array[0]));
66         if (array == NULL) {
67                 FREE(table);
68                 goto oom;
69         }
70
71         table->num_blobs = 0;
72         table->capacity = capacity;
73         table->array = array;
74         return table;
75
76 oom:
77         ERROR("Failed to allocate memory for blob table "
78               "with capacity %zu", capacity);
79         return NULL;
80 }
81
82 static int
83 do_free_blob_descriptor(struct blob_descriptor *blob, void *_ignore)
84 {
85         free_blob_descriptor(blob);
86         return 0;
87 }
88
89 void
90 free_blob_table(struct blob_table *table)
91 {
92         if (table) {
93                 for_blob_in_table(table, do_free_blob_descriptor, NULL);
94                 FREE(table->array);
95                 FREE(table);
96         }
97 }
98
99 struct blob_descriptor *
100 new_blob_descriptor(void)
101 {
102         BUILD_BUG_ON(BLOB_NONEXISTENT != 0);
103         return CALLOC(1, sizeof(struct blob_descriptor));
104 }
105
106 struct blob_descriptor *
107 clone_blob_descriptor(const struct blob_descriptor *old)
108 {
109         struct blob_descriptor *new;
110
111         new = memdup(old, sizeof(struct blob_descriptor));
112         if (new == NULL)
113                 return NULL;
114
115         switch (new->blob_location) {
116         case BLOB_IN_WIM:
117                 list_add(&new->rdesc_node, &new->rdesc->blob_list);
118                 break;
119
120         case BLOB_IN_FILE_ON_DISK:
121 #ifdef __WIN32__
122         case BLOB_IN_WINNT_FILE_ON_DISK:
123         case BLOB_WIN32_ENCRYPTED:
124 #endif
125 #ifdef WITH_FUSE
126         case BLOB_IN_STAGING_FILE:
127                 BUILD_BUG_ON((void*)&old->file_on_disk !=
128                              (void*)&old->staging_file_name);
129 #endif
130                 new->file_on_disk = TSTRDUP(old->file_on_disk);
131                 if (new->file_on_disk == NULL)
132                         goto out_free;
133                 break;
134         case BLOB_IN_ATTACHED_BUFFER:
135                 new->attached_buffer = memdup(old->attached_buffer, old->size);
136                 if (new->attached_buffer == NULL)
137                         goto out_free;
138                 break;
139 #ifdef WITH_NTFS_3G
140         case BLOB_IN_NTFS_VOLUME:
141                 if (old->ntfs_loc) {
142                         new->ntfs_loc = memdup(old->ntfs_loc,
143                                                sizeof(struct ntfs_location));
144                         if (new->ntfs_loc == NULL)
145                                 goto out_free;
146                         new->ntfs_loc->path = STRDUP(old->ntfs_loc->path);
147                         new->ntfs_loc->attr_name = NULL;
148                         if (new->ntfs_loc->path == NULL)
149                                 goto out_free;
150                         if (new->ntfs_loc->attr_name_nchars != 0) {
151                                 new->ntfs_loc->attr_name =
152                                         utf16le_dup(old->ntfs_loc->attr_name);
153                                 if (new->ntfs_loc->attr_name == NULL)
154                                         goto out_free;
155                         }
156                 }
157                 break;
158 #endif
159         default:
160                 break;
161         }
162         return new;
163
164 out_free:
165         free_blob_descriptor(new);
166         return NULL;
167 }
168
169 static void
170 blob_release_location(struct blob_descriptor *blob)
171 {
172         switch (blob->blob_location) {
173         case BLOB_IN_WIM:
174                 list_del(&blob->rdesc_node);
175                 if (list_empty(&blob->rdesc->blob_list))
176                         FREE(blob->rdesc);
177                 break;
178         case BLOB_IN_FILE_ON_DISK:
179 #ifdef __WIN32__
180         case BLOB_IN_WINNT_FILE_ON_DISK:
181         case BLOB_WIN32_ENCRYPTED:
182 #endif
183 #ifdef WITH_FUSE
184         case BLOB_IN_STAGING_FILE:
185                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
186                              (void*)&blob->staging_file_name);
187 #endif
188         case BLOB_IN_ATTACHED_BUFFER:
189                 BUILD_BUG_ON((void*)&blob->file_on_disk !=
190                              (void*)&blob->attached_buffer);
191                 FREE(blob->file_on_disk);
192                 break;
193 #ifdef WITH_NTFS_3G
194         case BLOB_IN_NTFS_VOLUME:
195                 if (blob->ntfs_loc) {
196                         FREE(blob->ntfs_loc->path);
197                         FREE(blob->ntfs_loc->attr_name);
198                         FREE(blob->ntfs_loc);
199                 }
200                 break;
201 #endif
202         default:
203                 break;
204         }
205 }
206
207 void
208 free_blob_descriptor(struct blob_descriptor *blob)
209 {
210         if (blob) {
211                 blob_release_location(blob);
212                 FREE(blob);
213         }
214 }
215
216 /* Should this blob be retained even if it has no references?  */
217 static bool
218 should_retain_blob(const struct blob_descriptor *blob)
219 {
220         return blob->blob_location == BLOB_IN_WIM;
221 }
222
223 static void
224 finalize_blob(struct blob_descriptor *blob)
225 {
226         if (!should_retain_blob(blob))
227                 free_blob_descriptor(blob);
228 }
229
230 /*
231  * Decrements the reference count of the specified blob, which must be either
232  * (a) unhashed, or (b) inserted in the specified blob table.
233  *
234  * If the blob's reference count reaches 0, we may unlink it from @table and
235  * free it.  However, we retain blobs with 0 reference count that originated
236  * from WIM files (BLOB_IN_WIM).  We do this for two reasons:
237  *
238  * 1. This prevents information about valid blobs in a WIM file --- blobs which
239  *    will continue to be present after appending to the WIM file --- from being
240  *    lost merely because we dropped all references to them.
241  *
242  * 2. Blob reference counts we read from WIM files can't be trusted.  It's
243  *    possible that a WIM has reference counts that are too low; WIMGAPI
244  *    sometimes creates WIMs where this is the case.  It's also possible that
245  *    blobs have been referenced from an external WIM; those blobs can
246  *    potentially have any reference count at all, either lower or higher than
247  *    would be expected for this WIM ("this WIM" meaning the owner of @table) if
248  *    it were a standalone WIM.
249  *
250  * So we can't take the reference counts too seriously.  But at least, we do
251  * recalculate by default when writing a new WIM file.
252  */
253 void
254 blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table)
255 {
256         blob_subtract_refcnt(blob, table, 1);
257 }
258
259 void
260 blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table,
261                      u32 count)
262 {
263         if (unlikely(blob->refcnt < count)) {
264                 blob->refcnt = 0; /* See comment above  */
265                 return;
266         }
267
268         blob->refcnt -= count;
269
270         if (blob->refcnt != 0)
271                 return;
272
273         if (blob->unhashed) {
274                 list_del(&blob->unhashed_list);
275         #ifdef WITH_FUSE
276                 /* If the blob has been extracted to a staging file for a FUSE
277                  * mount, unlink the staging file.  (Note that there still may
278                  * be open file descriptors to it.)  */
279                 if (blob->blob_location == BLOB_IN_STAGING_FILE)
280                         unlinkat(blob->staging_dir_fd,
281                                  blob->staging_file_name, 0);
282         #endif
283         } else {
284                 if (!should_retain_blob(blob))
285                         blob_table_unlink(table, blob);
286         }
287
288         /* If FUSE mounts are enabled, then don't actually free the blob
289          * descriptor until the last file descriptor to it has been closed.  */
290 #ifdef WITH_FUSE
291         if (blob->num_opened_fds == 0)
292 #endif
293                 finalize_blob(blob);
294 }
295
296 #ifdef WITH_FUSE
297 void
298 blob_decrement_num_opened_fds(struct blob_descriptor *blob)
299 {
300         wimlib_assert(blob->num_opened_fds != 0);
301
302         if (--blob->num_opened_fds == 0 && blob->refcnt == 0)
303                 finalize_blob(blob);
304 }
305 #endif
306
307 static void
308 blob_table_insert_raw(struct blob_table *table, struct blob_descriptor *blob)
309 {
310         size_t i = blob->hash_short % table->capacity;
311
312         hlist_add_head(&blob->hash_list, &table->array[i]);
313 }
314
315 static void
316 enlarge_blob_table(struct blob_table *table)
317 {
318         size_t old_capacity, new_capacity;
319         struct hlist_head *old_array, *new_array;
320         struct blob_descriptor *blob;
321         struct hlist_node *cur, *tmp;
322         size_t i;
323
324         old_capacity = table->capacity;
325         new_capacity = old_capacity * 2;
326         new_array = CALLOC(new_capacity, sizeof(struct hlist_head));
327         if (new_array == NULL)
328                 return;
329         old_array = table->array;
330         table->array = new_array;
331         table->capacity = new_capacity;
332
333         for (i = 0; i < old_capacity; i++) {
334                 hlist_for_each_entry_safe(blob, cur, tmp, &old_array[i], hash_list) {
335                         hlist_del(&blob->hash_list);
336                         blob_table_insert_raw(table, blob);
337                 }
338         }
339         FREE(old_array);
340 }
341
342 /* Insert a blob descriptor into the blob table.  */
343 void
344 blob_table_insert(struct blob_table *table, struct blob_descriptor *blob)
345 {
346         blob_table_insert_raw(table, blob);
347         if (++table->num_blobs > table->capacity)
348                 enlarge_blob_table(table);
349 }
350
351 /* Unlinks a blob descriptor from the blob table; does not free it.  */
352 void
353 blob_table_unlink(struct blob_table *table, struct blob_descriptor *blob)
354 {
355         wimlib_assert(!blob->unhashed);
356         wimlib_assert(table->num_blobs != 0);
357
358         hlist_del(&blob->hash_list);
359         table->num_blobs--;
360 }
361
362 /* Given a SHA-1 message digest, return the corresponding blob descriptor from
363  * the specified blob table, or NULL if there is none.  */
364 struct blob_descriptor *
365 lookup_blob(const struct blob_table *table, const u8 *hash)
366 {
367         size_t i;
368         struct blob_descriptor *blob;
369         struct hlist_node *pos;
370
371         i = load_size_t_unaligned(hash) % table->capacity;
372         hlist_for_each_entry(blob, pos, &table->array[i], hash_list)
373                 if (hashes_equal(hash, blob->hash))
374                         return blob;
375         return NULL;
376 }
377
378 /* Call a function on all blob descriptors in the specified blob table.  Stop
379  * early and return nonzero if any call to the function returns nonzero.  */
380 int
381 for_blob_in_table(struct blob_table *table,
382                   int (*visitor)(struct blob_descriptor *, void *), void *arg)
383 {
384         struct blob_descriptor *blob;
385         struct hlist_node *pos, *tmp;
386         int ret;
387
388         for (size_t i = 0; i < table->capacity; i++) {
389                 hlist_for_each_entry_safe(blob, pos, tmp, &table->array[i],
390                                           hash_list)
391                 {
392                         ret = visitor(blob, arg);
393                         if (ret)
394                                 return ret;
395                 }
396         }
397         return 0;
398 }
399
400 /*
401  * This is a qsort() callback that sorts blobs into an order optimized for
402  * reading.  Sorting is done primarily by blob location, then secondarily by a
403  * location-dependent order.  For example, blobs in WIM resources are sorted
404  * such that the underlying WIM files will be read sequentially.  This is
405  * especially important for WIM files containing solid resources.
406  */
407 int
408 cmp_blobs_by_sequential_order(const void *p1, const void *p2)
409 {
410         const struct blob_descriptor *blob1, *blob2;
411         int v;
412         WIMStruct *wim1, *wim2;
413
414         blob1 = *(const struct blob_descriptor**)p1;
415         blob2 = *(const struct blob_descriptor**)p2;
416
417         v = (int)blob1->blob_location - (int)blob2->blob_location;
418
419         /* Different resource locations?  */
420         if (v)
421                 return v;
422
423         switch (blob1->blob_location) {
424         case BLOB_IN_WIM:
425                 wim1 = blob1->rdesc->wim;
426                 wim2 = blob2->rdesc->wim;
427
428                 /* Different (possibly split) WIMs?  */
429                 if (wim1 != wim2) {
430                         v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GUID_LEN);
431                         if (v)
432                                 return v;
433                 }
434
435                 /* Different part numbers in the same WIM?  */
436                 v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number;
437                 if (v)
438                         return v;
439
440                 if (blob1->rdesc->offset_in_wim != blob2->rdesc->offset_in_wim)
441                         return cmp_u64(blob1->rdesc->offset_in_wim,
442                                        blob2->rdesc->offset_in_wim);
443
444                 return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
445
446         case BLOB_IN_FILE_ON_DISK:
447 #ifdef WITH_FUSE
448         case BLOB_IN_STAGING_FILE:
449 #endif
450 #ifdef __WIN32__
451         case BLOB_IN_WINNT_FILE_ON_DISK:
452         case BLOB_WIN32_ENCRYPTED:
453 #endif
454                 /* Compare files by path: just a heuristic that will place files
455                  * in the same directory next to each other.  */
456                 return tstrcmp(blob1->file_on_disk, blob2->file_on_disk);
457 #ifdef WITH_NTFS_3G
458         case BLOB_IN_NTFS_VOLUME:
459                 return tstrcmp(blob1->ntfs_loc->path, blob2->ntfs_loc->path);
460 #endif
461         default:
462                 /* No additional sorting order defined for this resource
463                  * location (e.g. BLOB_IN_ATTACHED_BUFFER); simply compare
464                  * everything equal to each other.  */
465                 return 0;
466         }
467 }
468
469 int
470 sort_blob_list(struct list_head *blob_list, size_t list_head_offset,
471                int (*compar)(const void *, const void*))
472 {
473         struct list_head *cur;
474         struct blob_descriptor **array;
475         size_t i;
476         size_t array_size;
477         size_t num_blobs = 0;
478
479         list_for_each(cur, blob_list)
480                 num_blobs++;
481
482         if (num_blobs <= 1)
483                 return 0;
484
485         array_size = num_blobs * sizeof(array[0]);
486         array = MALLOC(array_size);
487         if (array == NULL)
488                 return WIMLIB_ERR_NOMEM;
489
490         cur = blob_list->next;
491         for (i = 0; i < num_blobs; i++) {
492                 array[i] = (struct blob_descriptor*)((u8*)cur - list_head_offset);
493                 cur = cur->next;
494         }
495
496         qsort(array, num_blobs, sizeof(array[0]), compar);
497
498         INIT_LIST_HEAD(blob_list);
499         for (i = 0; i < num_blobs; i++) {
500                 list_add_tail((struct list_head*)
501                                ((u8*)array[i] + list_head_offset), blob_list);
502         }
503         FREE(array);
504         return 0;
505 }
506
507 /* Sort the specified list of blobs in an order optimized for sequential
508  * reading.  */
509 int
510 sort_blob_list_by_sequential_order(struct list_head *blob_list,
511                                    size_t list_head_offset)
512 {
513         return sort_blob_list(blob_list, list_head_offset,
514                               cmp_blobs_by_sequential_order);
515 }
516
517 static int
518 add_blob_to_array(struct blob_descriptor *blob, void *_pp)
519 {
520         struct blob_descriptor ***pp = _pp;
521         *(*pp)++ = blob;
522         return 0;
523 }
524
525 /* Iterate through the blob descriptors in the specified blob table in an order
526  * optimized for sequential reading.  */
527 int
528 for_blob_in_table_sorted_by_sequential_order(struct blob_table *table,
529                                              int (*visitor)(struct blob_descriptor *, void *),
530                                              void *arg)
531 {
532         struct blob_descriptor **blob_array, **p;
533         size_t num_blobs = table->num_blobs;
534         int ret;
535
536         blob_array = MALLOC(num_blobs * sizeof(blob_array[0]));
537         if (!blob_array)
538                 return WIMLIB_ERR_NOMEM;
539         p = blob_array;
540         for_blob_in_table(table, add_blob_to_array, &p);
541
542         wimlib_assert(p == blob_array + num_blobs);
543
544         qsort(blob_array, num_blobs, sizeof(blob_array[0]),
545               cmp_blobs_by_sequential_order);
546         ret = 0;
547         for (size_t i = 0; i < num_blobs; i++) {
548                 ret = visitor(blob_array[i], arg);
549                 if (ret)
550                         break;
551         }
552         FREE(blob_array);
553         return ret;
554 }
555
556 /* On-disk format of a blob descriptor in a WIM file.
557  *
558  * Note: if the WIM file contains solid resource(s), then this structure is
559  * sometimes overloaded to describe a "resource" rather than a "blob".  See the
560  * code for details.  */
561 struct blob_descriptor_disk {
562
563         /* Size, offset, and flags of the blob.  */
564         struct wim_reshdr_disk reshdr;
565
566         /* Which part of the split WIM this blob is in; indexed from 1. */
567         le16 part_number;
568
569         /* Reference count of this blob over all WIM images.  (But see comment
570          * above blob_decrement_refcnt().)  */
571         le32 refcnt;
572
573         /* SHA-1 message digest of the uncompressed data of this blob, or all
574          * zeroes if this blob is of zero length.  */
575         u8 hash[SHA1_HASH_SIZE];
576 } _packed_attribute;
577
578 /* Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
579  * count how many specify resources (as opposed to blobs within those
580  * resources).
581  *
582  * Returns the resulting count.  */
583 static size_t
584 count_solid_resources(const struct blob_descriptor_disk *entries, size_t max)
585 {
586         size_t count = 0;
587         do {
588                 struct wim_reshdr reshdr;
589
590                 get_wim_reshdr(&(entries++)->reshdr, &reshdr);
591
592                 if (!(reshdr.flags & WIM_RESHDR_FLAG_SOLID)) {
593                         /* Run was terminated by a stand-alone blob entry.  */
594                         break;
595                 }
596
597                 if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
598                         /* This is a resource entry.  */
599                         count++;
600                 }
601         } while (--max);
602         return count;
603 }
604
605 /*
606  * Given a run of consecutive blob descriptors with the SOLID flag set and
607  * having @num_rdescs resource entries, load resource information from them into
608  * the resource descriptors in the @rdescs array.
609  *
610  * Returns 0 on success, or a nonzero error code on failure.
611  */
612 static int
613 do_load_solid_info(WIMStruct *wim, struct wim_resource_descriptor **rdescs,
614                    size_t num_rdescs,
615                    const struct blob_descriptor_disk *entries)
616 {
617         for (size_t i = 0; i < num_rdescs; i++) {
618                 struct wim_reshdr reshdr;
619                 struct alt_chunk_table_header_disk hdr;
620                 struct wim_resource_descriptor *rdesc;
621                 int ret;
622
623                 /* Advance to next resource entry.  */
624
625                 do {
626                         get_wim_reshdr(&(entries++)->reshdr, &reshdr);
627                 } while (reshdr.uncompressed_size != SOLID_RESOURCE_MAGIC_NUMBER);
628
629                 rdesc = rdescs[i];
630
631                 wim_res_hdr_to_desc(&reshdr, wim, rdesc);
632
633                 /* For solid resources, the uncompressed size, compression type,
634                  * and chunk size are stored in the resource itself, not in the
635                  * blob table.  */
636
637                 ret = full_pread(&wim->in_fd, &hdr,
638                                  sizeof(hdr), reshdr.offset_in_wim);
639                 if (ret) {
640                         ERROR("Failed to read header of solid resource "
641                               "(offset_in_wim=%"PRIu64")",
642                               reshdr.offset_in_wim);
643                         return ret;
644                 }
645
646                 rdesc->uncompressed_size = le64_to_cpu(hdr.res_usize);
647
648                 /* Compression format numbers must be the same as in
649                  * WIMGAPI to be compatible here.  */
650                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
651                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
652                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
653                 BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
654                 rdesc->compression_type = le32_to_cpu(hdr.compression_format);
655
656                 rdesc->chunk_size = le32_to_cpu(hdr.chunk_size);
657
658                 DEBUG("Solid resource %zu/%zu: %"PRIu64" => %"PRIu64" "
659                       "(%"TS"/%"PRIu32") @ +%"PRIu64"",
660                       i + 1, num_rdescs,
661                       rdesc->uncompressed_size,
662                       rdesc->size_in_wim,
663                       wimlib_get_compression_type_string(rdesc->compression_type),
664                       rdesc->chunk_size,
665                       rdesc->offset_in_wim);
666         }
667         return 0;
668 }
669
670 /*
671  * Given a nonempty run of consecutive blob descriptors with the SOLID flag set,
672  * allocate a 'struct wim_resource_descriptor' for each resource within that
673  * run.
674  *
675  * Returns 0 on success, or a nonzero error code on failure.
676  * Returns the pointers and count in *rdescs_ret and *num_rdescs_ret.
677  */
678 static int
679 load_solid_info(WIMStruct *wim,
680                 const struct blob_descriptor_disk *entries,
681                 size_t num_remaining_entries,
682                 struct wim_resource_descriptor ***rdescs_ret,
683                 size_t *num_rdescs_ret)
684 {
685         size_t num_rdescs;
686         struct wim_resource_descriptor **rdescs;
687         size_t i;
688         int ret;
689
690         num_rdescs = count_solid_resources(entries, num_remaining_entries);
691         rdescs = CALLOC(num_rdescs, sizeof(rdescs[0]));
692         if (!rdescs)
693                 return WIMLIB_ERR_NOMEM;
694
695         for (i = 0; i < num_rdescs; i++) {
696                 rdescs[i] = MALLOC(sizeof(struct wim_resource_descriptor));
697                 if (!rdescs[i]) {
698                         ret = WIMLIB_ERR_NOMEM;
699                         goto out_free_rdescs;
700                 }
701         }
702
703         ret = do_load_solid_info(wim, rdescs, num_rdescs, entries);
704         if (ret)
705                 goto out_free_rdescs;
706
707         *rdescs_ret = rdescs;
708         *num_rdescs_ret = num_rdescs;
709         return 0;
710
711 out_free_rdescs:
712         for (i = 0; i < num_rdescs; i++)
713                 FREE(rdescs[i]);
714         FREE(rdescs);
715         return ret;
716 }
717
718 /* Given a 'struct blob_descriptor' allocated for an on-disk blob descriptor
719  * with the SOLID flag set, try to assign it to resource in the current solid
720  * run.  */
721 static int
722 assign_blob_to_solid_resource(const struct wim_reshdr *reshdr,
723                               struct blob_descriptor *blob,
724                               struct wim_resource_descriptor **rdescs,
725                               size_t num_rdescs)
726 {
727         u64 offset = reshdr->offset_in_wim;
728
729         /* XXX: This linear search will be slow in the degenerate case where the
730          * number of solid resources in the run is huge.  */
731         blob->size = reshdr->size_in_wim;
732         for (size_t i = 0; i < num_rdescs; i++) {
733                 if (offset + blob->size <= rdescs[i]->uncompressed_size) {
734                         blob_set_is_located_in_wim_resource(blob, rdescs[i], offset);
735                         return 0;
736                 }
737                 offset -= rdescs[i]->uncompressed_size;
738         }
739         ERROR("blob could not be assigned to a solid resource");
740         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
741 }
742
743 static void
744 free_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
745 {
746         if (rdescs) {
747                 for (size_t i = 0; i < num_rdescs; i++)
748                         if (list_empty(&rdescs[i]->blob_list))
749                                 FREE(rdescs[i]);
750                 FREE(rdescs);
751         }
752 }
753
754 static int
755 cmp_blobs_by_offset_in_res(const void *p1, const void *p2)
756 {
757         const struct blob_descriptor *blob1, *blob2;
758
759         blob1 = *(const struct blob_descriptor**)p1;
760         blob2 = *(const struct blob_descriptor**)p2;
761
762         return cmp_u64(blob1->offset_in_res, blob2->offset_in_res);
763 }
764
765 /* Validate the size and location of a WIM resource.  */
766 static int
767 validate_resource(struct wim_resource_descriptor *rdesc)
768 {
769         struct blob_descriptor *blob;
770         bool out_of_order;
771         u64 expected_next_offset;
772         int ret;
773
774         /* Verify that the resource itself has a valid offset and size.  */
775         if (rdesc->offset_in_wim + rdesc->size_in_wim < rdesc->size_in_wim)
776                 goto invalid_due_to_overflow;
777
778         /* Verify that each blob in the resource has a valid offset and size.
779          */
780         expected_next_offset = 0;
781         out_of_order = false;
782         list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
783                 if (blob->offset_in_res + blob->size < blob->size ||
784                     blob->offset_in_res + blob->size > rdesc->uncompressed_size)
785                         goto invalid_due_to_overflow;
786
787                 if (blob->offset_in_res >= expected_next_offset)
788                         expected_next_offset = blob->offset_in_res + blob->size;
789                 else
790                         out_of_order = true;
791         }
792
793         /* If the blobs were not located at strictly increasing positions (not
794          * allowing for overlap), sort them.  Then make sure that none overlap.
795          */
796         if (out_of_order) {
797                 ret = sort_blob_list(&rdesc->blob_list,
798                                      offsetof(struct blob_descriptor,
799                                               rdesc_node),
800                                      cmp_blobs_by_offset_in_res);
801                 if (ret)
802                         return ret;
803
804                 expected_next_offset = 0;
805                 list_for_each_entry(blob, &rdesc->blob_list, rdesc_node) {
806                         if (blob->offset_in_res >= expected_next_offset)
807                                 expected_next_offset = blob->offset_in_res + blob->size;
808                         else
809                                 goto invalid_due_to_overlap;
810                 }
811         }
812
813         return 0;
814
815 invalid_due_to_overflow:
816         ERROR("Invalid blob table (offset overflow)");
817         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
818
819 invalid_due_to_overlap:
820         ERROR("Invalid blob table (blobs in solid resource overlap)");
821         return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
822 }
823
824 static int
825 finish_solid_rdescs(struct wim_resource_descriptor **rdescs, size_t num_rdescs)
826 {
827         int ret = 0;
828         for (size_t i = 0; i < num_rdescs; i++) {
829                 ret = validate_resource(rdescs[i]);
830                 if (ret)
831                         break;
832         }
833         free_solid_rdescs(rdescs, num_rdescs);
834         return ret;
835 }
836
837 /*
838  * read_blob_table() -
839  *
840  * Read the blob table from a WIM file.  Usually, each entry in this table
841  * describes a "blob", or equivalently a "resource", that the WIM file contains,
842  * along with its location and SHA-1 message digest.  Descriptors for
843  * non-metadata blobs will be saved in the in-memory blob table
844  * (wim->blob_table), whereas descriptors for metadata blobs will be saved in a
845  * special location per-image (the wim->image_metadata array).
846  *
847  * However, in WIM_VERSION_SOLID (3584) WIMs, a resource may contain multiple
848  * blobs that are compressed together.  Such a resource is called a "solid
849  * resource".  Solid resources are still described in the on-disk "blob table",
850  * although the format is not the most logical.  A consecutive sequence of
851  * entries that all have flag WIM_RESHDR_FLAG_SOLID (0x10) set is a "solid run".
852  * A solid run describes a set of solid resources, each of which contains a set
853  * of blobs.  In a solid run, a 'struct wim_reshdr_disk' with 'uncompressed_size
854  * = SOLID_RESOURCE_MAGIC_NUMBER (0x100000000)' specifies a solid resource,
855  * whereas any other 'struct wim_reshdr_disk' specifies a blob within a solid
856  * resource.  There are some oddities in how we need to determine which solid
857  * resource a blob is actually in; see the code for details.
858  *
859  * Possible return values:
860  *      WIMLIB_ERR_SUCCESS (0)
861  *      WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
862  *      WIMLIB_ERR_NOMEM
863  *
864  *      Or an error code caused by failure to read the blob table from the WIM
865  *      file.
866  */
867 int
868 read_blob_table(WIMStruct *wim)
869 {
870         int ret;
871         size_t num_entries;
872         void *buf = NULL;
873         struct blob_table *table = NULL;
874         struct blob_descriptor *cur_blob = NULL;
875         size_t num_duplicate_blobs = 0;
876         size_t num_wrong_part_blobs = 0;
877         u32 image_index = 0;
878         struct wim_resource_descriptor **cur_solid_rdescs = NULL;
879         size_t cur_num_solid_rdescs = 0;
880
881         DEBUG("Reading blob table.");
882
883         /* Calculate the number of entries in the blob table.  */
884         num_entries = wim->hdr.blob_table_reshdr.uncompressed_size /
885                       sizeof(struct blob_descriptor_disk);
886
887         /* Read the blob table into a buffer.  */
888         ret = wim_reshdr_to_data(&wim->hdr.blob_table_reshdr, wim, &buf);
889         if (ret)
890                 goto out;
891
892         /* Allocate a hash table to map SHA-1 message digests into blob
893          * descriptors.  This is the in-memory "blob table".  */
894         table = new_blob_table(num_entries * 2 + 1);
895         if (!table)
896                 goto oom;
897
898         /* Allocate and initalize blob descriptors from the raw blob table
899          * buffer.  */
900         for (size_t i = 0; i < num_entries; i++) {
901                 const struct blob_descriptor_disk *disk_entry =
902                         &((const struct blob_descriptor_disk*)buf)[i];
903                 struct wim_reshdr reshdr;
904                 u16 part_number;
905
906                 /* Get the resource header  */
907                 get_wim_reshdr(&disk_entry->reshdr, &reshdr);
908
909                 DEBUG("reshdr: size_in_wim=%"PRIu64", "
910                       "uncompressed_size=%"PRIu64", "
911                       "offset_in_wim=%"PRIu64", "
912                       "flags=0x%02x",
913                       reshdr.size_in_wim, reshdr.uncompressed_size,
914                       reshdr.offset_in_wim, reshdr.flags);
915
916                 /* Ignore SOLID flag if it isn't supposed to be used in this WIM
917                  * version.  */
918                 if (wim->hdr.wim_version == WIM_VERSION_DEFAULT)
919                         reshdr.flags &= ~WIM_RESHDR_FLAG_SOLID;
920
921                 /* Allocate a new 'struct blob_descriptor'.  */
922                 cur_blob = new_blob_descriptor();
923                 if (!cur_blob)
924                         goto oom;
925
926                 /* Get the part number, reference count, and hash.  */
927                 part_number = le16_to_cpu(disk_entry->part_number);
928                 cur_blob->refcnt = le32_to_cpu(disk_entry->refcnt);
929                 copy_hash(cur_blob->hash, disk_entry->hash);
930
931                 if (reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
932
933                         /* SOLID entry  */
934
935                         if (!cur_solid_rdescs) {
936                                 /* Starting new run  */
937                                 ret = load_solid_info(wim, disk_entry,
938                                                       num_entries - i,
939                                                       &cur_solid_rdescs,
940                                                       &cur_num_solid_rdescs);
941                                 if (ret)
942                                         goto out;
943                         }
944
945                         if (reshdr.uncompressed_size == SOLID_RESOURCE_MAGIC_NUMBER) {
946                                 /* Resource entry, not blob entry  */
947                                 goto free_cur_blob_and_continue;
948                         }
949
950                         /* Blob entry  */
951
952                         ret = assign_blob_to_solid_resource(&reshdr,
953                                                             cur_blob,
954                                                             cur_solid_rdescs,
955                                                             cur_num_solid_rdescs);
956                         if (ret)
957                                 goto out;
958
959                 } else {
960                         /* Normal blob/resource entry; SOLID not set.  */
961
962                         struct wim_resource_descriptor *rdesc;
963
964                         if (unlikely(cur_solid_rdescs)) {
965                                 /* This entry terminated a solid run.  */
966                                 ret = finish_solid_rdescs(cur_solid_rdescs,
967                                                           cur_num_solid_rdescs);
968                                 cur_solid_rdescs = NULL;
969                                 if (ret)
970                                         goto out;
971                         }
972
973                         /* How to handle an uncompressed resource with its
974                          * uncompressed size different from its compressed size?
975                          *
976                          * Based on a simple test, WIMGAPI seems to handle this
977                          * as follows:
978                          *
979                          * if (size_in_wim > uncompressed_size) {
980                          *      Ignore uncompressed_size; use size_in_wim
981                          *      instead.
982                          * } else {
983                          *      Honor uncompressed_size, but treat the part of
984                          *      the file data above size_in_wim as all zeros.
985                          * }
986                          *
987                          * So we will do the same.  */
988                         if (unlikely(!(reshdr.flags &
989                                        WIM_RESHDR_FLAG_COMPRESSED) &&
990                                      (reshdr.size_in_wim >
991                                       reshdr.uncompressed_size)))
992                         {
993                                 reshdr.uncompressed_size = reshdr.size_in_wim;
994                         }
995
996                         /* Set up a resource descriptor for this blob.  */
997
998                         rdesc = MALLOC(sizeof(struct wim_resource_descriptor));
999                         if (!rdesc)
1000                                 goto oom;
1001
1002                         wim_res_hdr_to_desc(&reshdr, wim, rdesc);
1003
1004                         blob_set_is_located_in_nonsolid_wim_resource(cur_blob, rdesc);
1005                 }
1006
1007                 /* cur_blob is now a blob bound to a resource.  */
1008
1009                 /* Ignore entries with all zeroes in the hash field.  */
1010                 if (is_zero_hash(cur_blob->hash))
1011                         goto free_cur_blob_and_continue;
1012
1013                 /* Verify that the part number matches that of the underlying
1014                  * WIM file.  */
1015                 if (part_number != wim->hdr.part_number) {
1016                         num_wrong_part_blobs++;
1017                         goto free_cur_blob_and_continue;
1018                 }
1019
1020                 if (reshdr.flags & WIM_RESHDR_FLAG_METADATA) {
1021
1022                         cur_blob->is_metadata = 1;
1023
1024                         /* Blob table entry for a metadata resource.  */
1025
1026                         /* Metadata entries with no references must be ignored.
1027                          * See, for example, the WinPE WIMs from the WAIK v2.1.
1028                          */
1029                         if (cur_blob->refcnt == 0)
1030                                 goto free_cur_blob_and_continue;
1031
1032                         if (cur_blob->refcnt != 1) {
1033                                 /* We don't currently support this case due to
1034                                  * the complications of multiple images sharing
1035                                  * the same metadata resource or a metadata
1036                                  * resource also being referenced by files.  */
1037                                 ERROR("Found metadata resource with refcnt != 1");
1038                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
1039                                 goto out;
1040                         }
1041
1042                         if (wim->hdr.part_number != 1) {
1043                                 WARNING("Ignoring metadata resource found in a "
1044                                         "non-first part of the split WIM");
1045                                 goto free_cur_blob_and_continue;
1046                         }
1047
1048                         /* The number of entries in the blob table with
1049                          * WIM_RESHDR_FLAG_METADATA set should be the same as
1050                          * the image_count field in the WIM header.  */
1051                         if (image_index == wim->hdr.image_count) {
1052                                 WARNING("Found more metadata resources than images");
1053                                 goto free_cur_blob_and_continue;
1054                         }
1055
1056                         /* Notice very carefully:  We are assigning the metadata
1057                          * resources to images in the same order in which their
1058                          * blob table entries occur on disk.  (This is also the
1059                          * behavior of Microsoft's software.)  In particular,
1060                          * this overrides the actual locations of the metadata
1061                          * resources themselves in the WIM file as well as any
1062                          * information written in the XML data.  */
1063                         DEBUG("Found metadata resource for image %"PRIu32" at "
1064                               "offset %"PRIu64".",
1065                               image_index + 1,
1066                               reshdr.offset_in_wim);
1067
1068                         wim->image_metadata[image_index++]->metadata_blob = cur_blob;
1069                 } else {
1070                         /* Blob table entry for a non-metadata blob.  */
1071
1072                         /* Ignore this blob if it's a duplicate.  */
1073                         if (lookup_blob(table, cur_blob->hash)) {
1074                                 num_duplicate_blobs++;
1075                                 goto free_cur_blob_and_continue;
1076                         }
1077
1078                         /* Insert the blob into the in-memory blob table, keyed
1079                          * by its SHA-1 message digest.  */
1080                         blob_table_insert(table, cur_blob);
1081                 }
1082
1083                 continue;
1084
1085         free_cur_blob_and_continue:
1086                 if (cur_solid_rdescs &&
1087                     cur_blob->blob_location == BLOB_IN_WIM)
1088                         blob_unset_is_located_in_wim_resource(cur_blob);
1089                 free_blob_descriptor(cur_blob);
1090         }
1091         cur_blob = NULL;
1092
1093         if (cur_solid_rdescs) {
1094                 /* End of blob table terminated a solid run.  */
1095                 ret = finish_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1096                 cur_solid_rdescs = NULL;
1097                 if (ret)
1098                         goto out;
1099         }
1100
1101         if (wim->hdr.part_number == 1 && image_index != wim->hdr.image_count) {
1102                 WARNING("Could not find metadata resources for all images");
1103                 for (u32 i = image_index; i < wim->hdr.image_count; i++)
1104                         put_image_metadata(wim->image_metadata[i], NULL);
1105                 wim->hdr.image_count = image_index;
1106         }
1107
1108         if (num_duplicate_blobs > 0)
1109                 WARNING("Ignoring %zu duplicate blobs", num_duplicate_blobs);
1110
1111         if (num_wrong_part_blobs > 0) {
1112                 WARNING("Ignoring %zu blobs with wrong part number",
1113                         num_wrong_part_blobs);
1114         }
1115
1116         DEBUG("Done reading blob table.");
1117         wim->blob_table = table;
1118         ret = 0;
1119         goto out_free_buf;
1120
1121 oom:
1122         ERROR("Not enough memory to read blob table!");
1123         ret = WIMLIB_ERR_NOMEM;
1124 out:
1125         free_solid_rdescs(cur_solid_rdescs, cur_num_solid_rdescs);
1126         free_blob_descriptor(cur_blob);
1127         free_blob_table(table);
1128 out_free_buf:
1129         FREE(buf);
1130         return ret;
1131 }
1132
1133 static void
1134 write_blob_descriptor(struct blob_descriptor_disk *disk_entry,
1135                       const struct wim_reshdr *out_reshdr,
1136                       u16 part_number, u32 refcnt, const u8 *hash)
1137 {
1138         put_wim_reshdr(out_reshdr, &disk_entry->reshdr);
1139         disk_entry->part_number = cpu_to_le16(part_number);
1140         disk_entry->refcnt = cpu_to_le32(refcnt);
1141         copy_hash(disk_entry->hash, hash);
1142 }
1143
1144 /* Note: the list of blob descriptors must be sorted so that all entries for the
1145  * same solid resource are consecutive.  In addition, blob descriptors for
1146  * metadata resources must be in the same order as the indices of the underlying
1147  * images.  */
1148 int
1149 write_blob_table_from_blob_list(struct list_head *blob_list,
1150                                 struct filedes *out_fd,
1151                                 u16 part_number,
1152                                 struct wim_reshdr *out_reshdr,
1153                                 int write_resource_flags)
1154 {
1155         size_t table_size;
1156         struct blob_descriptor *blob;
1157         struct blob_descriptor_disk *table_buf;
1158         struct blob_descriptor_disk *table_buf_ptr;
1159         int ret;
1160         u64 prev_res_offset_in_wim = ~0ULL;
1161         u64 prev_uncompressed_size;
1162         u64 logical_offset;
1163
1164         table_size = 0;
1165         list_for_each_entry(blob, blob_list, blob_table_list) {
1166                 table_size += sizeof(struct blob_descriptor_disk);
1167
1168                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID &&
1169                     blob->out_res_offset_in_wim != prev_res_offset_in_wim)
1170                 {
1171                         table_size += sizeof(struct blob_descriptor_disk);
1172                         prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1173                 }
1174         }
1175
1176         DEBUG("Writing WIM blob table (size=%zu, offset=%"PRIu64")",
1177               table_size, out_fd->offset);
1178
1179         table_buf = MALLOC(table_size);
1180         if (table_buf == NULL) {
1181                 ERROR("Failed to allocate %zu bytes for temporary blob table",
1182                       table_size);
1183                 return WIMLIB_ERR_NOMEM;
1184         }
1185         table_buf_ptr = table_buf;
1186
1187         prev_res_offset_in_wim = ~0ULL;
1188         prev_uncompressed_size = 0;
1189         logical_offset = 0;
1190         list_for_each_entry(blob, blob_list, blob_table_list) {
1191                 if (blob->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
1192                         struct wim_reshdr tmp_reshdr;
1193
1194                         /* Eww.  When WIMGAPI sees multiple solid resources, it
1195                          * expects the offsets to be adjusted as if there were
1196                          * really only one solid resource.  */
1197
1198                         if (blob->out_res_offset_in_wim != prev_res_offset_in_wim) {
1199                                 /* Put the resource entry for solid resource  */
1200                                 tmp_reshdr.offset_in_wim = blob->out_res_offset_in_wim;
1201                                 tmp_reshdr.size_in_wim = blob->out_res_size_in_wim;
1202                                 tmp_reshdr.uncompressed_size = SOLID_RESOURCE_MAGIC_NUMBER;
1203                                 tmp_reshdr.flags = WIM_RESHDR_FLAG_SOLID;
1204
1205                                 write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1206                                                       part_number, 1, zero_hash);
1207
1208                                 logical_offset += prev_uncompressed_size;
1209
1210                                 prev_res_offset_in_wim = blob->out_res_offset_in_wim;
1211                                 prev_uncompressed_size = blob->out_res_uncompressed_size;
1212                         }
1213                         tmp_reshdr = blob->out_reshdr;
1214                         tmp_reshdr.offset_in_wim += logical_offset;
1215                         write_blob_descriptor(table_buf_ptr++, &tmp_reshdr,
1216                                               part_number, blob->out_refcnt, blob->hash);
1217                 } else {
1218                         write_blob_descriptor(table_buf_ptr++, &blob->out_reshdr,
1219                                               part_number, blob->out_refcnt, blob->hash);
1220                 }
1221
1222         }
1223         wimlib_assert((u8*)table_buf_ptr - (u8*)table_buf == table_size);
1224
1225         /* Write the blob table uncompressed.  Although wimlib can handle a
1226          * compressed blob table, MS software cannot.  */
1227         ret = write_wim_resource_from_buffer(table_buf,
1228                                              table_size,
1229                                              true,
1230                                              out_fd,
1231                                              WIMLIB_COMPRESSION_TYPE_NONE,
1232                                              0,
1233                                              out_reshdr,
1234                                              NULL,
1235                                              write_resource_flags);
1236         FREE(table_buf);
1237         DEBUG("ret=%d", ret);
1238         return ret;
1239 }
1240
1241 /* Allocate a blob descriptor for the contents of the buffer, or re-use an
1242  * existing descriptor in @blob_table for an identical blob.  */
1243 struct blob_descriptor *
1244 new_blob_from_data_buffer(const void *buffer, size_t size,
1245                           struct blob_table *blob_table)
1246 {
1247         u8 hash[SHA1_HASH_SIZE];
1248         struct blob_descriptor *blob;
1249         void *buffer_copy;
1250
1251         sha1_buffer(buffer, size, hash);
1252
1253         blob = lookup_blob(blob_table, hash);
1254         if (blob)
1255                 return blob;
1256
1257         blob = new_blob_descriptor();
1258         if (!blob)
1259                 return NULL;
1260
1261         buffer_copy = memdup(buffer, size);
1262         if (!buffer_copy) {
1263                 free_blob_descriptor(blob);
1264                 return NULL;
1265         }
1266         blob_set_is_located_in_attached_buffer(blob, buffer_copy, size);
1267         copy_hash(blob->hash, hash);
1268         blob_table_insert(blob_table, blob);
1269         return blob;
1270 }
1271
1272 struct blob_descriptor *
1273 after_blob_hashed(struct blob_descriptor *blob,
1274                   struct blob_descriptor **back_ptr,
1275                   struct blob_table *blob_table)
1276 {
1277         struct blob_descriptor *duplicate_blob;
1278
1279         list_del(&blob->unhashed_list);
1280         blob->unhashed = 0;
1281
1282         /* Look for a duplicate blob  */
1283         duplicate_blob = lookup_blob(blob_table, blob->hash);
1284         if (duplicate_blob) {
1285                 /* We have a duplicate blob.  Transfer the reference counts from
1286                  * this blob to the duplicate and update the reference to this
1287                  * blob (from a stream) to point to the duplicate.  The caller
1288                  * is responsible for freeing @blob if needed.  */
1289                 wimlib_assert(duplicate_blob->size == blob->size);
1290                 duplicate_blob->refcnt += blob->refcnt;
1291                 blob->refcnt = 0;
1292                 *back_ptr = duplicate_blob;
1293                 return duplicate_blob;
1294         } else {
1295                 /* No duplicate blob, so we need to insert this blob into the
1296                  * blob table and treat it as a hashed blob.  */
1297                 blob_table_insert(blob_table, blob);
1298                 return blob;
1299         }
1300 }
1301
1302 /*
1303  * Calculate the SHA-1 message digest of a blob and move its descriptor from the
1304  * list of unhashed blobs to the blob table, possibly joining it with an
1305  * identical blob.
1306  *
1307  * @blob:
1308  *      The blob to hash
1309  * @blob_table:
1310  *      The blob table in which the blob needs to be indexed
1311  * @blob_ret:
1312  *      On success, a pointer to the resulting blob descriptor is written to
1313  *      this location.  This will be the same as @blob if it was inserted into
1314  *      the blob table, or different if a duplicate blob was found.
1315  *
1316  * Returns 0 on success; nonzero if there is an error reading the blob data.
1317  */
1318 int
1319 hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table,
1320                    struct blob_descriptor **blob_ret)
1321 {
1322         struct blob_descriptor **back_ptr;
1323         int ret;
1324
1325         back_ptr = retrieve_pointer_to_unhashed_blob(blob);
1326
1327         ret = sha1_blob(blob);
1328         if (ret)
1329                 return ret;
1330
1331         *blob_ret = after_blob_hashed(blob, back_ptr, blob_table);
1332         return 0;
1333 }
1334
1335 void
1336 blob_to_wimlib_resource_entry(const struct blob_descriptor *blob,
1337                               struct wimlib_resource_entry *wentry)
1338 {
1339         memset(wentry, 0, sizeof(*wentry));
1340
1341         wentry->uncompressed_size = blob->size;
1342         if (blob->blob_location == BLOB_IN_WIM) {
1343                 unsigned res_flags = blob->rdesc->flags;
1344
1345                 wentry->part_number = blob->rdesc->wim->hdr.part_number;
1346                 if (res_flags & WIM_RESHDR_FLAG_SOLID) {
1347                         wentry->offset = blob->offset_in_res;
1348                 } else {
1349                         wentry->compressed_size = blob->rdesc->size_in_wim;
1350                         wentry->offset = blob->rdesc->offset_in_wim;
1351                 }
1352                 wentry->raw_resource_offset_in_wim = blob->rdesc->offset_in_wim;
1353                 wentry->raw_resource_compressed_size = blob->rdesc->size_in_wim;
1354                 wentry->raw_resource_uncompressed_size = blob->rdesc->uncompressed_size;
1355
1356                 wentry->is_compressed = (res_flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
1357                 wentry->is_free = (res_flags & WIM_RESHDR_FLAG_FREE) != 0;
1358                 wentry->is_spanned = (res_flags & WIM_RESHDR_FLAG_SPANNED) != 0;
1359                 wentry->packed = (res_flags & WIM_RESHDR_FLAG_SOLID) != 0;
1360         }
1361         copy_hash(wentry->sha1_hash, blob->hash);
1362         wentry->reference_count = blob->refcnt;
1363         wentry->is_metadata = blob->is_metadata;
1364 }
1365
1366 struct iterate_blob_context {
1367         wimlib_iterate_lookup_table_callback_t cb;
1368         void *user_ctx;
1369 };
1370
1371 static int
1372 do_iterate_blob(struct blob_descriptor *blob, void *_ctx)
1373 {
1374         struct iterate_blob_context *ctx = _ctx;
1375         struct wimlib_resource_entry entry;
1376
1377         blob_to_wimlib_resource_entry(blob, &entry);
1378         return (*ctx->cb)(&entry, ctx->user_ctx);
1379 }
1380
1381 /* API function documented in wimlib.h  */
1382 WIMLIBAPI int
1383 wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
1384                             wimlib_iterate_lookup_table_callback_t cb,
1385                             void *user_ctx)
1386 {
1387         if (flags != 0)
1388                 return WIMLIB_ERR_INVALID_PARAM;
1389
1390         struct iterate_blob_context ctx = {
1391                 .cb = cb,
1392                 .user_ctx = user_ctx,
1393         };
1394         if (wim_has_metadata(wim)) {
1395                 int ret;
1396                 for (int i = 0; i < wim->hdr.image_count; i++) {
1397                         ret = do_iterate_blob(wim->image_metadata[i]->metadata_blob,
1398                                               &ctx);
1399                         if (ret)
1400                                 return ret;
1401                 }
1402         }
1403         return for_blob_in_table(wim->blob_table, do_iterate_blob, &ctx);
1404 }