]> wimlib.net Git - wimlib/blob - src/lookup_table.c
update; add lzms_decompress() stub
[wimlib] / src / lookup_table.c
1 /*
2  * lookup_table.c
3  *
4  * Lookup table, implemented as a hash table, that maps SHA1 message digests to
5  * data streams; plus code to read and write the corresponding on-disk data.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/glob.h"
35 #include "wimlib/lookup_table.h"
36 #include "wimlib/metadata.h"
37 #include "wimlib/paths.h"
38 #include "wimlib/resource.h"
39 #include "wimlib/util.h"
40 #include "wimlib/write.h"
41
42 #include <errno.h>
43 #include <stdlib.h>
44 #ifdef WITH_FUSE
45 #  include <unistd.h> /* for unlink() */
46 #endif
47
48 struct wim_lookup_table *
49 new_lookup_table(size_t capacity)
50 {
51         struct wim_lookup_table *table;
52         struct hlist_head *array;
53
54         table = CALLOC(1, sizeof(struct wim_lookup_table));
55         if (table) {
56                 array = CALLOC(capacity, sizeof(array[0]));
57                 if (array) {
58                         table->num_entries = 0;
59                         table->capacity = capacity;
60                         table->array = array;
61                 } else {
62                         FREE(table);
63                         table = NULL;
64                         ERROR("Failed to allocate memory for lookup table "
65                               "with capacity %zu", capacity);
66                 }
67         }
68         return table;
69 }
70
71 struct wim_lookup_table_entry *
72 new_lookup_table_entry(void)
73 {
74         struct wim_lookup_table_entry *lte;
75
76         lte = CALLOC(1, sizeof(struct wim_lookup_table_entry));
77         if (lte == NULL) {
78                 ERROR("Out of memory (tried to allocate %zu bytes for "
79                       "lookup table entry)",
80                       sizeof(struct wim_lookup_table_entry));
81                 return NULL;
82         }
83         lte->refcnt = 1;
84         BUILD_BUG_ON(RESOURCE_NONEXISTENT != 0);
85         BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_NONE != 0);
86         return lte;
87 }
88
89 struct wim_lookup_table_entry *
90 clone_lookup_table_entry(const struct wim_lookup_table_entry *old)
91 {
92         struct wim_lookup_table_entry *new;
93
94         new = memdup(old, sizeof(struct wim_lookup_table_entry));
95         if (!new)
96                 return NULL;
97
98         new->extracted_file = NULL;
99         switch (new->resource_location) {
100         case RESOURCE_IN_WIM:
101                 list_add(&new->wim_resource_list, &new->rspec->lte_list);
102                 break;
103
104         case RESOURCE_IN_FILE_ON_DISK:
105 #ifdef __WIN32__
106         case RESOURCE_WIN32_ENCRYPTED:
107 #endif
108 #ifdef WITH_FUSE
109         case RESOURCE_IN_STAGING_FILE:
110                 BUILD_BUG_ON((void*)&old->file_on_disk !=
111                              (void*)&old->staging_file_name);
112 #endif
113                 new->file_on_disk = TSTRDUP(old->file_on_disk);
114                 if (!new->file_on_disk)
115                         goto out_free;
116                 break;
117         case RESOURCE_IN_ATTACHED_BUFFER:
118                 new->attached_buffer = memdup(old->attached_buffer, old->size);
119                 if (!new->attached_buffer)
120                         goto out_free;
121                 break;
122 #ifdef WITH_NTFS_3G
123         case RESOURCE_IN_NTFS_VOLUME:
124                 if (old->ntfs_loc) {
125                         struct ntfs_location *loc;
126                         loc = memdup(old->ntfs_loc, sizeof(struct ntfs_location));
127                         if (!loc)
128                                 goto out_free;
129                         loc->path = NULL;
130                         loc->stream_name = NULL;
131                         new->ntfs_loc = loc;
132                         loc->path = STRDUP(old->ntfs_loc->path);
133                         if (!loc->path)
134                                 goto out_free;
135                         if (loc->stream_name_nchars) {
136                                 loc->stream_name = memdup(old->ntfs_loc->stream_name,
137                                                           loc->stream_name_nchars * 2);
138                                 if (!loc->stream_name)
139                                         goto out_free;
140                         }
141                 }
142                 break;
143 #endif
144         default:
145                 break;
146         }
147         return new;
148 out_free:
149         free_lookup_table_entry(new);
150         return NULL;
151 }
152
153 void
154 free_lookup_table_entry(struct wim_lookup_table_entry *lte)
155 {
156         if (lte) {
157                 switch (lte->resource_location) {
158                 case RESOURCE_IN_WIM:
159                         list_del(&lte->wim_resource_list);
160                         if (list_empty(&lte->rspec->lte_list))
161                                 FREE(lte->rspec);
162                         break;
163                 case RESOURCE_IN_FILE_ON_DISK:
164         #ifdef __WIN32__
165                 case RESOURCE_WIN32_ENCRYPTED:
166         #endif
167         #ifdef WITH_FUSE
168                 case RESOURCE_IN_STAGING_FILE:
169                         BUILD_BUG_ON((void*)&lte->file_on_disk !=
170                                      (void*)&lte->staging_file_name);
171         #endif
172                 case RESOURCE_IN_ATTACHED_BUFFER:
173                         BUILD_BUG_ON((void*)&lte->file_on_disk !=
174                                      (void*)&lte->attached_buffer);
175                         FREE(lte->file_on_disk);
176                         break;
177 #ifdef WITH_NTFS_3G
178                 case RESOURCE_IN_NTFS_VOLUME:
179                         if (lte->ntfs_loc) {
180                                 FREE(lte->ntfs_loc->path);
181                                 FREE(lte->ntfs_loc->stream_name);
182                                 FREE(lte->ntfs_loc);
183                         }
184                         break;
185 #endif
186                 default:
187                         break;
188                 }
189                 FREE(lte);
190         }
191 }
192
193 static int
194 do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore)
195 {
196         free_lookup_table_entry(entry);
197         return 0;
198 }
199
200
201 void
202 free_lookup_table(struct wim_lookup_table *table)
203 {
204         DEBUG2("Freeing lookup table");
205         if (table) {
206                 if (table->array) {
207                         for_lookup_table_entry(table,
208                                                do_free_lookup_table_entry,
209                                                NULL);
210                         FREE(table->array);
211                 }
212                 FREE(table);
213         }
214 }
215
216 /*
217  * Inserts an entry into the lookup table.
218  *
219  * @table:      A pointer to the lookup table.
220  * @lte:        A pointer to the entry to insert.
221  */
222 void
223 lookup_table_insert(struct wim_lookup_table *table,
224                     struct wim_lookup_table_entry *lte)
225 {
226         size_t i = lte->hash_short % table->capacity;
227         hlist_add_head(&lte->hash_list, &table->array[i]);
228
229         /* XXX Make the table grow when too many entries have been inserted. */
230         table->num_entries++;
231 }
232
233 static void
234 finalize_lte(struct wim_lookup_table_entry *lte)
235 {
236         #ifdef WITH_FUSE
237         if (lte->resource_location == RESOURCE_IN_STAGING_FILE) {
238                 unlink(lte->staging_file_name);
239                 list_del(&lte->unhashed_list);
240         }
241         #endif
242         free_lookup_table_entry(lte);
243 }
244
245 /* Decrements the reference count for the lookup table entry @lte.  If its
246  * reference count reaches 0, it is unlinked from the lookup table.  If,
247  * furthermore, the entry has no opened file descriptors associated with it, the
248  * entry is freed.  */
249 void
250 lte_decrement_refcnt(struct wim_lookup_table_entry *lte,
251                      struct wim_lookup_table *table)
252 {
253         wimlib_assert(lte != NULL);
254         wimlib_assert(lte->refcnt != 0);
255         if (--lte->refcnt == 0) {
256                 if (lte->unhashed)
257                         list_del(&lte->unhashed_list);
258                 else
259                         lookup_table_unlink(table, lte);
260         #ifdef WITH_FUSE
261                 if (lte->num_opened_fds == 0)
262         #endif
263                         finalize_lte(lte);
264         }
265 }
266
267 #ifdef WITH_FUSE
268 void
269 lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte)
270 {
271         if (lte->num_opened_fds != 0)
272                 if (--lte->num_opened_fds == 0 && lte->refcnt == 0)
273                         finalize_lte(lte);
274 }
275 #endif
276
277 /* Calls a function on all the entries in the WIM lookup table.  Stop early and
278  * return nonzero if any call to the function returns nonzero. */
279 int
280 for_lookup_table_entry(struct wim_lookup_table *table,
281                        int (*visitor)(struct wim_lookup_table_entry *, void *),
282                        void *arg)
283 {
284         struct wim_lookup_table_entry *lte;
285         struct hlist_node *pos, *tmp;
286         int ret;
287
288         for (size_t i = 0; i < table->capacity; i++) {
289                 hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i],
290                                           hash_list)
291                 {
292                         wimlib_assert2(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA));
293                         ret = visitor(lte, arg);
294                         if (ret)
295                                 return ret;
296                 }
297         }
298         return 0;
299 }
300
301 /* qsort() callback that sorts streams (represented by `struct
302  * wim_lookup_table_entry's) into an order optimized for reading and writing.
303  *
304  * Sorting is done primarily by resource location, then secondarily by a
305  * per-resource location order.  For example, resources in WIM files are sorted
306  * primarily by part number, then secondarily by offset, as to implement optimal
307  * reading of either a standalone or split WIM.  */
308 static int
309 cmp_streams_by_sequential_order(const void *p1, const void *p2)
310 {
311         const struct wim_lookup_table_entry *lte1, *lte2;
312         int v;
313         WIMStruct *wim1, *wim2;
314
315         lte1 = *(const struct wim_lookup_table_entry**)p1;
316         lte2 = *(const struct wim_lookup_table_entry**)p2;
317
318         v = (int)lte1->resource_location - (int)lte2->resource_location;
319
320         /* Different resource locations?  */
321         if (v)
322                 return v;
323
324         switch (lte1->resource_location) {
325         case RESOURCE_IN_WIM:
326                 wim1 = lte1->rspec->wim;
327                 wim2 = lte2->rspec->wim;
328
329                 /* Different (possibly split) WIMs?  */
330                 if (wim1 != wim2) {
331                         v = memcmp(wim1->hdr.guid, wim2->hdr.guid, WIM_GID_LEN);
332                         if (v)
333                                 return v;
334                 }
335
336                 /* Different part numbers in the same WIM?  */
337                 v = (int)wim1->hdr.part_number - (int)wim2->hdr.part_number;
338                 if (v)
339                         return v;
340
341                 /* Compare by offset.  */
342                 if (lte1->rspec->offset_in_wim < lte2->rspec->offset_in_wim)
343                         return -1;
344                 if (lte1->rspec->offset_in_wim > lte2->rspec->offset_in_wim)
345                         return 1;
346                 return 0;
347         case RESOURCE_IN_FILE_ON_DISK:
348 #ifdef WITH_FUSE
349         case RESOURCE_IN_STAGING_FILE:
350 #endif
351 #ifdef __WIN32__
352         case RESOURCE_WIN32_ENCRYPTED:
353 #endif
354                 /* Compare files by path: just a heuristic that will place files
355                  * in the same directory next to each other.  */
356                 return tstrcmp(lte1->file_on_disk, lte2->file_on_disk);
357 #ifdef WITH_NTFS_3G
358         case RESOURCE_IN_NTFS_VOLUME:
359                 return tstrcmp(lte1->ntfs_loc->path, lte2->ntfs_loc->path);
360 #endif
361         default:
362                 /* No additional sorting order defined for this resource
363                  * location (e.g. RESOURCE_IN_ATTACHED_BUFFER); simply compare
364                  * everything equal to each other.  */
365                 return 0;
366         }
367 }
368
369 int
370 sort_stream_list_by_sequential_order(struct list_head *stream_list,
371                                      size_t list_head_offset)
372 {
373         struct list_head *cur;
374         struct wim_lookup_table_entry **array;
375         size_t i;
376         size_t array_size;
377         size_t num_streams = 0;
378
379         list_for_each(cur, stream_list)
380                 num_streams++;
381
382         array_size = num_streams * sizeof(array[0]);
383         array = MALLOC(array_size);
384         if (!array)
385                 return WIMLIB_ERR_NOMEM;
386         cur = stream_list->next;
387         for (i = 0; i < num_streams; i++) {
388                 array[i] = (struct wim_lookup_table_entry*)((u8*)cur -
389                                                             list_head_offset);
390                 cur = cur->next;
391         }
392
393         qsort(array, num_streams, sizeof(array[0]),
394               cmp_streams_by_sequential_order);
395
396         INIT_LIST_HEAD(stream_list);
397         for (i = 0; i < num_streams; i++) {
398                 list_add_tail((struct list_head*)
399                                ((u8*)array[i] + list_head_offset),
400                               stream_list);
401         }
402         FREE(array);
403         return 0;
404 }
405
406
407 static int
408 add_lte_to_array(struct wim_lookup_table_entry *lte,
409                  void *_pp)
410 {
411         struct wim_lookup_table_entry ***pp = _pp;
412         *(*pp)++ = lte;
413         return 0;
414 }
415
416 /* Iterate through the lookup table entries, but first sort them by stream
417  * offset in the WIM.  Caution: this is intended to be used when the stream
418  * offset field has actually been set. */
419 int
420 for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table,
421                                   int (*visitor)(struct wim_lookup_table_entry *,
422                                                  void *),
423                                   void *arg)
424 {
425         struct wim_lookup_table_entry **lte_array, **p;
426         size_t num_streams = table->num_entries;
427         int ret;
428
429         lte_array = MALLOC(num_streams * sizeof(lte_array[0]));
430         if (!lte_array)
431                 return WIMLIB_ERR_NOMEM;
432         p = lte_array;
433         for_lookup_table_entry(table, add_lte_to_array, &p);
434
435         wimlib_assert(p == lte_array + num_streams);
436
437         qsort(lte_array, num_streams, sizeof(lte_array[0]),
438               cmp_streams_by_sequential_order);
439         ret = 0;
440         for (size_t i = 0; i < num_streams; i++) {
441                 ret = visitor(lte_array[i], arg);
442                 if (ret)
443                         break;
444         }
445         FREE(lte_array);
446         return ret;
447 }
448
449 /* On-disk format of a WIM lookup table entry (stream entry). */
450 struct wim_lookup_table_entry_disk {
451         /* Size, offset, and flags of the stream.  */
452         struct wim_reshdr_disk reshdr;
453
454         /* Which part of the split WIM this stream is in; indexed from 1. */
455         le16 part_number;
456
457         /* Reference count of this stream over all WIM images. */
458         le32 refcnt;
459
460         /* SHA1 message digest of the uncompressed data of this stream, or
461          * optionally all zeroes if this stream is of zero length. */
462         u8 hash[SHA1_HASH_SIZE];
463 } _packed_attribute;
464
465 #define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50
466
467 static int
468 validate_resource(const struct wim_resource_spec *rspec,
469                   u64 offset_save, u64 size_save)
470 {
471         struct wim_lookup_table_entry *lte;
472         if (!list_is_singular(&rspec->lte_list)) {
473                 list_for_each_entry(lte, &rspec->lte_list, wim_resource_list) {
474                         if (rspec->flags & WIM_RESHDR_FLAG_COMPRESSED)
475                                 lte->flags |= WIM_RESHDR_FLAG_COMPRESSED;
476                         else
477                                 lte->flags &= ~WIM_RESHDR_FLAG_COMPRESSED;
478
479                         if (!(lte->flags & WIM_RESHDR_FLAG_CONCAT)) {
480                                 lte->offset_in_res = offset_save;
481                                 lte->size = size_save;
482                         }
483
484                         if (lte->offset_in_res + lte->size < lte->size ||
485                             lte->offset_in_res + lte->size > rspec->uncompressed_size)
486                         {
487                                 return WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
488                         }
489                 }
490         }
491         return 0;
492 }
493
494 /*
495  * Reads the lookup table from a WIM file.
496  *
497  * Saves lookup table entries for non-metadata streams in a hash table, and
498  * saves the metadata entry for each image in a special per-image location (the
499  * image_metadata array).
500  *
501  * Return values:
502  *      WIMLIB_ERR_SUCCESS (0)
503  *      WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
504  *      WIMLIB_ERR_RESOURCE_NOT_FOUND
505  */
506 int
507 read_wim_lookup_table(WIMStruct *wim)
508 {
509         int ret;
510         size_t i;
511         size_t num_entries;
512         struct wim_lookup_table *table;
513         struct wim_lookup_table_entry *cur_entry, *duplicate_entry;
514         struct wim_resource_spec *cur_rspec;
515         u64 size_save;
516         u64 offset_save;
517         void *buf;
518
519         BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) !=
520                      WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE);
521
522         DEBUG("Reading lookup table.");
523
524         /* Calculate number of entries in the lookup table.  */
525         num_entries = wim->hdr.lookup_table_reshdr.uncompressed_size /
526                       sizeof(struct wim_lookup_table_entry_disk);
527
528         /* Read the lookup table into a buffer.  */
529         ret = wim_reshdr_to_data(&wim->hdr.lookup_table_reshdr, wim, &buf);
530         if (ret)
531                 goto out;
532
533         /* Allocate hash table.  */
534         table = new_lookup_table(num_entries * 2 + 1);
535         if (table == NULL) {
536                 ERROR("Not enough memory to read lookup table.");
537                 ret = WIMLIB_ERR_NOMEM;
538                 goto out_free_buf;
539         }
540
541         /* Allocate and initalize `struct wim_lookup_table_entry's from the
542          * on-disk lookup table.  */
543         wim->current_image = 0;
544         cur_rspec = NULL;
545         for (i = 0; i < num_entries; i++) {
546                 const struct wim_lookup_table_entry_disk *disk_entry =
547                         &((const struct wim_lookup_table_entry_disk*)buf)[i];
548                 u16 part_number;
549                 struct wim_reshdr reshdr;
550
551                 ret = get_wim_reshdr(&disk_entry->reshdr, &reshdr);
552                 if (ret) {
553                         ERROR("Resource header is invalid!");
554                         goto out_free_lookup_table;
555                 }
556
557                 DEBUG("reshdr: size_in_wim=%"PRIu64", "
558                       "uncompressed_size=%"PRIu64", "
559                       "offset_in_wim=%"PRIu64", "
560                       "flags=0x%02x",
561                       reshdr.size_in_wim, reshdr.uncompressed_size,
562                       reshdr.offset_in_wim, reshdr.flags);
563
564                 cur_entry = new_lookup_table_entry();
565                 if (cur_entry == NULL) {
566                         ERROR("Not enough memory to read lookup table!");
567                         ret = WIMLIB_ERR_NOMEM;
568                         goto out_free_lookup_table;
569                 }
570
571                 part_number = le16_to_cpu(disk_entry->part_number);
572                 cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt);
573                 copy_hash(cur_entry->hash, disk_entry->hash);
574
575                 if (part_number != wim->hdr.part_number) {
576                         WARNING("A lookup table entry in part %hu of the WIM "
577                                 "points to part %hu (ignoring it)",
578                                 wim->hdr.part_number, part_number);
579                         free_lookup_table_entry(cur_entry);
580                         continue;
581                 }
582
583                 if (cur_rspec == NULL ||
584                     !(reshdr.flags & WIM_RESHDR_FLAG_CONCAT))
585                 {
586                         /* Starting new run of stream entries that all share the
587                          * same WIM resource (streams concatenated together); or
588                          * simply a single normal entry by itself.  */
589
590                         if (cur_rspec != NULL) {
591                                 ret = validate_resource(cur_rspec, offset_save,
592                                                         size_save);
593                                 if (ret)
594                                         goto out_free_cur_entry;
595                         }
596
597                         cur_rspec = MALLOC(sizeof(struct wim_resource_spec));
598                         if (cur_rspec == NULL) {
599                                 ERROR("Not enough memory to read lookup table!");
600                                 ret = WIMLIB_ERR_NOMEM;
601                                 goto out_free_cur_entry;
602                         }
603                         offset_save = reshdr.offset_in_wim;
604                         size_save = reshdr.size_in_wim;
605                         wim_res_hdr_to_spec(&reshdr, wim, cur_rspec);
606                 } else if (is_zero_hash(cur_entry->hash)) {
607                         /* Found the resource specification for the run.  */
608                         cur_rspec->offset_in_wim = reshdr.offset_in_wim;
609                         cur_rspec->size_in_wim = reshdr.size_in_wim;
610                         cur_rspec->flags = reshdr.flags;
611                         DEBUG("Full run is %"PRIu64" compressed bytes "
612                               "at file offset %"PRIu64" (flags 0x%02x)",
613                               cur_rspec->size_in_wim,
614                               cur_rspec->offset_in_wim,
615                               cur_rspec->flags);
616                         free_lookup_table_entry(cur_entry);
617                         continue;
618                 } else {
619                         /* Continuing the run with another stream.  */
620                         DEBUG("Continuing concat run with stream: "
621                               "%"PRIu64" uncompressed bytes @ resource offset %"PRIu64")",
622                               reshdr.size_in_wim, reshdr.offset_in_wim);
623                         cur_rspec->uncompressed_size += reshdr.size_in_wim;
624                 }
625
626                 lte_bind_wim_resource_spec(cur_entry, cur_rspec);
627                 if (reshdr.flags & WIM_RESHDR_FLAG_CONCAT) {
628                         /* In concatenation runs, the offset field is used for
629                          * in-resource offset, not the in-WIM offset, and the
630                          * size field is used for the uncompressed size, not the
631                          * compressed size.  */
632                         cur_entry->offset_in_res = reshdr.offset_in_wim;
633                         cur_entry->size = reshdr.size_in_wim;
634                         cur_entry->flags = reshdr.flags;
635                 } else {
636                         /* These may be overwritten in validate_resource() if
637                          * the run turns out to be a concatenation.  */
638                         cur_entry->offset_in_res = 0;
639                         cur_entry->size = reshdr.uncompressed_size;
640                         cur_entry->flags = reshdr.flags;
641                 }
642
643                 if (is_zero_hash(cur_entry->hash)) {
644                         WARNING("The WIM lookup table contains an entry with a "
645                                 "SHA1 message digest of all 0's (ignoring it)");
646                         free_lookup_table_entry(cur_entry);
647                         continue;
648                 }
649
650                 if (cur_entry->flags & WIM_RESHDR_FLAG_METADATA) {
651                         /* Lookup table entry for a metadata resource */
652                         if (cur_entry->refcnt != 1) {
653                                 /* Metadata entries with no references must be
654                                  * ignored.  See for example the WinPE WIMs from
655                                  * WAIK v2.1.  */
656                                 if (cur_entry->refcnt == 0) {
657                                         free_lookup_table_entry(cur_entry);
658                                         continue;
659                                 }
660                                 if (wimlib_print_errors) {
661                                         ERROR("Found metadata resource with refcnt != 1:");
662                                         print_lookup_table_entry(cur_entry, stderr);
663                                 }
664                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
665                                 goto out_free_cur_entry;
666                         }
667
668                         if (wim->hdr.part_number != 1) {
669                                 WARNING("Ignoring metadata resource found in a "
670                                         "non-first part of the split WIM");
671                                 free_lookup_table_entry(cur_entry);
672                                 continue;
673                         }
674                         if (wim->current_image == wim->hdr.image_count) {
675                                 WARNING("The WIM header says there are %u images "
676                                         "in the WIM, but we found more metadata "
677                                         "resources than this (ignoring the extra)",
678                                         wim->hdr.image_count);
679                                 free_lookup_table_entry(cur_entry);
680                                 continue;
681                         }
682
683                         /* Notice very carefully:  We are assigning the metadata
684                          * resources in the exact order mirrored by their lookup
685                          * table entries on disk, which is the behavior of
686                          * Microsoft's software.  In particular, this overrides
687                          * the actual locations of the metadata resources
688                          * themselves in the WIM file as well as any information
689                          * written in the XML data. */
690                         DEBUG("Found metadata resource for image %u at "
691                               "offset %"PRIu64".",
692                               wim->current_image + 1,
693                               cur_entry->rspec->offset_in_wim);
694                         wim->image_metadata[
695                                 wim->current_image++]->metadata_lte = cur_entry;
696                 } else {
697                         /* Lookup table entry for a stream that is not a
698                          * metadata resource */
699                         duplicate_entry = lookup_resource(table, cur_entry->hash);
700                         if (duplicate_entry) {
701                                 if (wimlib_print_errors) {
702                                         WARNING("The WIM lookup table contains two entries with the "
703                                               "same SHA1 message digest!");
704                                         WARNING("The first entry is:");
705                                         print_lookup_table_entry(duplicate_entry, stderr);
706                                         WARNING("The second entry is:");
707                                         print_lookup_table_entry(cur_entry, stderr);
708                                 }
709                                 free_lookup_table_entry(cur_entry);
710                                 continue;
711                         } else {
712                                 lookup_table_insert(table, cur_entry);
713                         }
714                 }
715         }
716
717         if (cur_rspec != NULL) {
718                 ret = validate_resource(cur_rspec, offset_save, size_save);
719                 if (ret)
720                         goto out_free_cur_entry;
721         }
722
723         if (wim->hdr.part_number == 1 && wim->current_image != wim->hdr.image_count) {
724                 WARNING("The header of \"%"TS"\" says there are %u images in\n"
725                         "          the WIM, but we only found %d metadata resources!  Acting as if\n"
726                         "          the header specified only %d images instead.",
727                         wim->filename, wim->hdr.image_count,
728                         wim->current_image, wim->current_image);
729                 for (int i = wim->current_image; i < wim->hdr.image_count; i++)
730                         put_image_metadata(wim->image_metadata[i], NULL);
731                 wim->hdr.image_count = wim->current_image;
732         }
733         DEBUG("Done reading lookup table.");
734         wim->lookup_table = table;
735         ret = 0;
736         goto out_free_buf;
737
738 out_free_cur_entry:
739         FREE(cur_entry);
740 out_free_lookup_table:
741         free_lookup_table(table);
742 out_free_buf:
743         FREE(buf);
744 out:
745         wim->current_image = 0;
746         return ret;
747 }
748
749
750 static void
751 write_wim_lookup_table_entry(const struct wim_lookup_table_entry *lte,
752                              struct wim_lookup_table_entry_disk *disk_entry,
753                              u16 part_number)
754 {
755         put_wim_reshdr(&lte->out_reshdr, &disk_entry->reshdr);
756         disk_entry->part_number = cpu_to_le16(part_number);
757         disk_entry->refcnt = cpu_to_le32(lte->out_refcnt);
758         copy_hash(disk_entry->hash, lte->hash);
759 }
760
761 static int
762 write_wim_lookup_table_from_stream_list(struct list_head *stream_list,
763                                         struct filedes *out_fd,
764                                         u16 part_number,
765                                         struct wim_reshdr *out_reshdr,
766                                         int write_resource_flags,
767                                         struct wimlib_lzx_context **comp_ctx)
768 {
769         size_t table_size;
770         struct wim_lookup_table_entry *lte;
771         struct wim_lookup_table_entry_disk *table_buf;
772         struct wim_lookup_table_entry_disk *table_buf_ptr;
773         int ret;
774
775         table_size = 0;
776         list_for_each_entry(lte, stream_list, lookup_table_list)
777                 table_size += sizeof(struct wim_lookup_table_entry_disk);
778
779         DEBUG("Writing WIM lookup table (size=%zu, offset=%"PRIu64")",
780               table_size, out_fd->offset);
781
782         table_buf = MALLOC(table_size);
783         if (!table_buf) {
784                 ERROR("Failed to allocate %zu bytes for temporary lookup table",
785                       table_size);
786                 return WIMLIB_ERR_NOMEM;
787         }
788         table_buf_ptr = table_buf;
789         list_for_each_entry(lte, stream_list, lookup_table_list)
790                 write_wim_lookup_table_entry(lte, table_buf_ptr++, part_number);
791
792         /* Write the lookup table uncompressed.  Although wimlib can handle a
793          * compressed lookup table, MS software cannot.  */
794         ret = write_wim_resource_from_buffer(table_buf,
795                                              table_size,
796                                              WIM_RESHDR_FLAG_METADATA,
797                                              out_fd,
798                                              WIMLIB_COMPRESSION_TYPE_NONE,
799                                              0,
800                                              out_reshdr,
801                                              NULL,
802                                              write_resource_flags,
803                                              comp_ctx);
804         FREE(table_buf);
805         DEBUG("ret=%d", ret);
806         return ret;
807 }
808
809 static int
810 append_lookup_table_entry(struct wim_lookup_table_entry *lte, void *_list)
811 {
812         /* Lookup table entries with 'out_refcnt' == 0 correspond to streams not
813          * written and not present in the resulting WIM file, and should not be
814          * included in the lookup table.
815          *
816          * Lookup table entries marked as filtered (EXTERNAL_WIM) with
817          * 'out_refcnt != 0' were referenced as part of the logical write but
818          * correspond to streams that were not in fact written, and should not
819          * be included in the lookup table.
820          *
821          * Lookup table entries marked as filtered (SAME_WIM) with 'out_refcnt
822          * != 0' were referenced as part of the logical write but correspond to
823          * streams that were not in fact written, but nevertheless were already
824          * present in the WIM being overwritten in-place.  These entries must be
825          * included in the lookup table, and the resource information to write
826          * needs to be copied from the resource information read originally.
827          */
828         if (lte->out_refcnt != 0 && !(lte->filtered & FILTERED_EXTERNAL_WIM)) {
829                 if (lte->filtered & FILTERED_SAME_WIM)
830                         wim_res_spec_to_hdr(lte->rspec, &lte->out_reshdr);
831                 list_add_tail(&lte->lookup_table_list, (struct list_head*)_list);
832         }
833         return 0;
834 }
835
836 int
837 write_wim_lookup_table(WIMStruct *wim, int image, int write_flags,
838                        struct wim_reshdr *out_reshdr,
839                        struct list_head *stream_list_override)
840 {
841         int write_resource_flags;
842         struct list_head _stream_list;
843         struct list_head *stream_list;
844
845         if (stream_list_override) {
846                 stream_list = stream_list_override;
847         } else {
848                 stream_list = &_stream_list;
849                 INIT_LIST_HEAD(stream_list);
850         }
851
852         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_METADATA)) {
853                 int start_image;
854                 int end_image;
855
856                 if (image == WIMLIB_ALL_IMAGES) {
857                         start_image = 1;
858                         end_image = wim->hdr.image_count;
859                 } else {
860                         start_image = image;
861                         end_image = image;
862                 }
863
864                 /* Push metadata resource lookup table entries onto the front of
865                  * the list in reverse order, so that they're written in order.
866                  */
867                 for (int i = end_image; i >= start_image; i--) {
868                         struct wim_lookup_table_entry *metadata_lte;
869
870                         metadata_lte = wim->image_metadata[i - 1]->metadata_lte;
871                         metadata_lte->out_refcnt = 1;
872                         metadata_lte->out_reshdr.flags |= WIM_RESHDR_FLAG_METADATA;
873                         list_add(&metadata_lte->lookup_table_list, stream_list);
874                 }
875         }
876
877         /* Append additional lookup table entries that need to be written, with
878          * some special handling for streams that have been marked as filtered.
879          */
880         if (!stream_list_override) {
881                 for_lookup_table_entry(wim->lookup_table,
882                                        append_lookup_table_entry, stream_list);
883         }
884
885         write_resource_flags = 0;
886         if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)
887                 write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE;
888         return write_wim_lookup_table_from_stream_list(stream_list,
889                                                        &wim->out_fd,
890                                                        wim->hdr.part_number,
891                                                        out_reshdr,
892                                                        write_resource_flags,
893                                                        &wim->lzx_context);
894 }
895
896
897 int
898 lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore)
899 {
900         lte->real_refcnt = 0;
901         return 0;
902 }
903
904 int
905 lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore)
906 {
907         lte->out_refcnt = 0;
908         return 0;
909 }
910
911 int
912 lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore)
913 {
914         if (lte->extracted_file != NULL) {
915                 FREE(lte->extracted_file);
916                 lte->extracted_file = NULL;
917         }
918         return 0;
919 }
920
921 void
922 print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out)
923 {
924         if (lte == NULL) {
925                 tputc(T('\n'), out);
926                 return;
927         }
928
929
930         tprintf(T("Uncompressed size     = %"PRIu64" bytes\n"),
931                 lte->size);
932         if (lte_is_partial(lte)) {
933                 tprintf(T("Offset                = %"PRIu64" bytes\n"),
934                         lte->offset_in_res);
935
936                 tprintf(T("Raw uncompressed size = %"PRIu64" bytes\n"),
937                         lte->rspec->uncompressed_size);
938
939                 tprintf(T("Raw compressed size   = %"PRIu64" bytes\n"),
940                         lte->rspec->size_in_wim);
941
942                 tprintf(T("Raw offset            = %"PRIu64" bytes\n"),
943                         lte->rspec->offset_in_wim);
944         } else if (lte->resource_location == RESOURCE_IN_WIM) {
945                 tprintf(T("Compressed size       = %"PRIu64" bytes\n"),
946                         lte->rspec->size_in_wim);
947
948                 tprintf(T("Offset                = %"PRIu64" bytes\n"),
949                         lte->rspec->offset_in_wim);
950         }
951
952         tfprintf(out, T("Reference Count       = %u\n"), lte->refcnt);
953
954         if (lte->unhashed) {
955                 tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"),
956                          lte->back_inode, lte->back_stream_id);
957         } else {
958                 tfprintf(out, T("Hash                  = 0x"));
959                 print_hash(lte->hash, out);
960                 tputc(T('\n'), out);
961         }
962
963         tfprintf(out, T("Flags                 = "));
964         u8 flags = lte->flags;
965         if (flags & WIM_RESHDR_FLAG_COMPRESSED)
966                 tfputs(T("WIM_RESHDR_FLAG_COMPRESSED, "), out);
967         if (flags & WIM_RESHDR_FLAG_FREE)
968                 tfputs(T("WIM_RESHDR_FLAG_FREE, "), out);
969         if (flags & WIM_RESHDR_FLAG_METADATA)
970                 tfputs(T("WIM_RESHDR_FLAG_METADATA, "), out);
971         if (flags & WIM_RESHDR_FLAG_SPANNED)
972                 tfputs(T("WIM_RESHDR_FLAG_SPANNED, "), out);
973         if (flags & WIM_RESHDR_FLAG_CONCAT)
974                 tfputs(T("WIM_RESHDR_FLAG_CONCAT, "), out);
975         tputc(T('\n'), out);
976         switch (lte->resource_location) {
977         case RESOURCE_IN_WIM:
978                 if (lte->rspec->wim->filename) {
979                         tfprintf(out, T("WIM file              = `%"TS"'\n"),
980                                  lte->rspec->wim->filename);
981                 }
982                 break;
983 #ifdef __WIN32__
984         case RESOURCE_WIN32_ENCRYPTED:
985 #endif
986         case RESOURCE_IN_FILE_ON_DISK:
987                 tfprintf(out, T("File on Disk          = `%"TS"'\n"),
988                          lte->file_on_disk);
989                 break;
990 #ifdef WITH_FUSE
991         case RESOURCE_IN_STAGING_FILE:
992                 tfprintf(out, T("Staging File          = `%"TS"'\n"),
993                                 lte->staging_file_name);
994                 break;
995 #endif
996         default:
997                 break;
998         }
999         tputc(T('\n'), out);
1000 }
1001
1002 void
1003 lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte,
1004                              struct wimlib_resource_entry *wentry)
1005 {
1006         memset(wentry, 0, sizeof(*wentry));
1007
1008         wentry->uncompressed_size = lte->size;
1009         if (lte->resource_location == RESOURCE_IN_WIM) {
1010                 wentry->part_number = lte->rspec->wim->hdr.part_number;
1011                 if (lte_is_partial(lte)) {
1012                         wentry->compressed_size = 0;
1013                         wentry->offset = lte->offset_in_res;
1014                 } else {
1015                         wentry->compressed_size = lte->rspec->size_in_wim;
1016                         wentry->offset = lte->rspec->offset_in_wim;
1017                 }
1018                 wentry->raw_resource_offset_in_wim = lte->rspec->offset_in_wim;
1019                 wentry->raw_resource_uncompressed_size = lte->rspec->uncompressed_size;
1020                 wentry->raw_resource_compressed_size = lte->rspec->size_in_wim;
1021         }
1022         copy_hash(wentry->sha1_hash, lte->hash);
1023         wentry->reference_count = lte->refcnt;
1024         wentry->is_compressed = (lte->flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
1025         wentry->is_metadata = (lte->flags & WIM_RESHDR_FLAG_METADATA) != 0;
1026         wentry->is_free = (lte->flags & WIM_RESHDR_FLAG_FREE) != 0;
1027         wentry->is_spanned = (lte->flags & WIM_RESHDR_FLAG_SPANNED) != 0;
1028         wentry->is_partial = lte_is_partial(lte);
1029 }
1030
1031 struct iterate_lte_context {
1032         wimlib_iterate_lookup_table_callback_t cb;
1033         void *user_ctx;
1034 };
1035
1036 static int
1037 do_iterate_lte(struct wim_lookup_table_entry *lte, void *_ctx)
1038 {
1039         struct iterate_lte_context *ctx = _ctx;
1040         struct wimlib_resource_entry entry;
1041
1042         lte_to_wimlib_resource_entry(lte, &entry);
1043         return (*ctx->cb)(&entry, ctx->user_ctx);
1044 }
1045
1046 /* API function documented in wimlib.h  */
1047 WIMLIBAPI int
1048 wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
1049                             wimlib_iterate_lookup_table_callback_t cb,
1050                             void *user_ctx)
1051 {
1052         struct iterate_lte_context ctx = {
1053                 .cb = cb,
1054                 .user_ctx = user_ctx,
1055         };
1056         if (wim->hdr.part_number == 1) {
1057                 int ret;
1058                 for (int i = 0; i < wim->hdr.image_count; i++) {
1059                         ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte,
1060                                              &ctx);
1061                         if (ret)
1062                                 return ret;
1063                 }
1064         }
1065         return for_lookup_table_entry(wim->lookup_table, do_iterate_lte, &ctx);
1066 }
1067
1068 /* Given a SHA1 message digest, return the corresponding entry in the WIM's
1069  * lookup table, or NULL if there is none.  */
1070 struct wim_lookup_table_entry *
1071 lookup_resource(const struct wim_lookup_table *table, const u8 hash[])
1072 {
1073         size_t i;
1074         struct wim_lookup_table_entry *lte;
1075         struct hlist_node *pos;
1076
1077         wimlib_assert(table != NULL);
1078         wimlib_assert(hash != NULL);
1079
1080         i = *(size_t*)hash % table->capacity;
1081         hlist_for_each_entry(lte, pos, &table->array[i], hash_list)
1082                 if (hashes_equal(hash, lte->hash))
1083                         return lte;
1084         return NULL;
1085 }
1086
1087 #ifdef WITH_FUSE
1088 /*
1089  * Finds the dentry, lookup table entry, and stream index for a WIM file stream,
1090  * given a path name.
1091  *
1092  * This is only for pre-resolved inodes.
1093  */
1094 int
1095 wim_pathname_to_stream(WIMStruct *wim,
1096                        const tchar *path,
1097                        int lookup_flags,
1098                        struct wim_dentry **dentry_ret,
1099                        struct wim_lookup_table_entry **lte_ret,
1100                        u16 *stream_idx_ret)
1101 {
1102         struct wim_dentry *dentry;
1103         struct wim_lookup_table_entry *lte;
1104         u16 stream_idx;
1105         const tchar *stream_name = NULL;
1106         struct wim_inode *inode;
1107         tchar *p = NULL;
1108
1109         if (lookup_flags & LOOKUP_FLAG_ADS_OK) {
1110                 stream_name = path_stream_name(path);
1111                 if (stream_name) {
1112                         p = (tchar*)stream_name - 1;
1113                         *p = T('\0');
1114                 }
1115         }
1116
1117         dentry = get_dentry(wim, path);
1118         if (p)
1119                 *p = T(':');
1120         if (!dentry)
1121                 return -errno;
1122
1123         inode = dentry->d_inode;
1124
1125         if (!inode->i_resolved)
1126                 if (inode_resolve_ltes(inode, wim->lookup_table, false))
1127                         return -EIO;
1128
1129         if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK)
1130               && inode_is_directory(inode))
1131                 return -EISDIR;
1132
1133         if (stream_name) {
1134                 struct wim_ads_entry *ads_entry;
1135                 u16 ads_idx;
1136                 ads_entry = inode_get_ads_entry(inode, stream_name,
1137                                                 &ads_idx);
1138                 if (ads_entry) {
1139                         stream_idx = ads_idx + 1;
1140                         lte = ads_entry->lte;
1141                         goto out;
1142                 } else {
1143                         return -ENOENT;
1144                 }
1145         } else {
1146                 lte = inode_unnamed_stream_resolved(inode, &stream_idx);
1147         }
1148 out:
1149         if (dentry_ret)
1150                 *dentry_ret = dentry;
1151         if (lte_ret)
1152                 *lte_ret = lte;
1153         if (stream_idx_ret)
1154                 *stream_idx_ret = stream_idx;
1155         return 0;
1156 }
1157 #endif
1158
1159 int
1160 resource_not_found_error(const struct wim_inode *inode, const u8 *hash)
1161 {
1162         if (wimlib_print_errors) {
1163                 ERROR("\"%"TS"\": resource not found", inode_first_full_path(inode));
1164                 tfprintf(stderr, T("        SHA-1 message digest of missing resource:\n        "));
1165                 print_hash(hash, stderr);
1166                 tputc(T('\n'), stderr);
1167         }
1168         return WIMLIB_ERR_RESOURCE_NOT_FOUND;
1169 }
1170
1171 /*
1172  * Resolve an inode's lookup table entries.
1173  *
1174  * This replaces the SHA1 hash fields (which are used to lookup an entry in the
1175  * lookup table) with pointers directly to the lookup table entries.
1176  *
1177  * If @force is %false:
1178  *      If any needed SHA1 message digests are not found in the lookup table,
1179  *      WIMLIB_ERR_RESOURCE_NOT_FOUND is returned and the inode is left
1180  *      unmodified.
1181  * If @force is %true:
1182  *      If any needed SHA1 message digests are not found in the lookup table,
1183  *      new entries are allocated and inserted into the lookup table.
1184  */
1185 int
1186 inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table,
1187                    bool force)
1188 {
1189         const u8 *hash;
1190
1191         if (!inode->i_resolved) {
1192                 struct wim_lookup_table_entry *lte, *ads_lte;
1193
1194                 /* Resolve the default file stream */
1195                 lte = NULL;
1196                 hash = inode->i_hash;
1197                 if (!is_zero_hash(hash)) {
1198                         lte = lookup_resource(table, hash);
1199                         if (!lte) {
1200                                 if (force) {
1201                                         lte = new_lookup_table_entry();
1202                                         if (!lte)
1203                                                 return WIMLIB_ERR_NOMEM;
1204                                         copy_hash(lte->hash, hash);
1205                                         lookup_table_insert(table, lte);
1206                                 } else {
1207                                         goto resource_not_found;
1208                                 }
1209                         }
1210                 }
1211
1212                 /* Resolve the alternate data streams */
1213                 struct wim_lookup_table_entry *ads_ltes[inode->i_num_ads];
1214                 for (u16 i = 0; i < inode->i_num_ads; i++) {
1215                         struct wim_ads_entry *cur_entry;
1216
1217                         ads_lte = NULL;
1218                         cur_entry = &inode->i_ads_entries[i];
1219                         hash = cur_entry->hash;
1220                         if (!is_zero_hash(hash)) {
1221                                 ads_lte = lookup_resource(table, hash);
1222                                 if (!ads_lte) {
1223                                         if (force) {
1224                                                 ads_lte = new_lookup_table_entry();
1225                                                 if (!ads_lte)
1226                                                         return WIMLIB_ERR_NOMEM;
1227                                                 copy_hash(ads_lte->hash, hash);
1228                                                 lookup_table_insert(table, ads_lte);
1229                                         } else {
1230                                                 goto resource_not_found;
1231                                         }
1232                                 }
1233                         }
1234                         ads_ltes[i] = ads_lte;
1235                 }
1236                 inode->i_lte = lte;
1237                 for (u16 i = 0; i < inode->i_num_ads; i++)
1238                         inode->i_ads_entries[i].lte = ads_ltes[i];
1239                 inode->i_resolved = 1;
1240         }
1241         return 0;
1242
1243 resource_not_found:
1244         return resource_not_found_error(inode, hash);
1245 }
1246
1247 void
1248 inode_unresolve_ltes(struct wim_inode *inode)
1249 {
1250         if (inode->i_resolved) {
1251                 if (inode->i_lte)
1252                         copy_hash(inode->i_hash, inode->i_lte->hash);
1253                 else
1254                         zero_out_hash(inode->i_hash);
1255
1256                 for (u16 i = 0; i < inode->i_num_ads; i++) {
1257                         if (inode->i_ads_entries[i].lte)
1258                                 copy_hash(inode->i_ads_entries[i].hash,
1259                                           inode->i_ads_entries[i].lte->hash);
1260                         else
1261                                 zero_out_hash(inode->i_ads_entries[i].hash);
1262                 }
1263                 inode->i_resolved = 0;
1264         }
1265 }
1266
1267 /*
1268  * Returns the lookup table entry for stream @stream_idx of the inode, where
1269  * stream_idx = 0 means the default un-named file stream, and stream_idx >= 1
1270  * corresponds to an alternate data stream.
1271  *
1272  * This works for both resolved and un-resolved inodes.
1273  */
1274 struct wim_lookup_table_entry *
1275 inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx,
1276                  const struct wim_lookup_table *table)
1277 {
1278         if (inode->i_resolved)
1279                 return inode_stream_lte_resolved(inode, stream_idx);
1280         else
1281                 return inode_stream_lte_unresolved(inode, stream_idx, table);
1282 }
1283
1284 struct wim_lookup_table_entry *
1285 inode_unnamed_stream_resolved(const struct wim_inode *inode, u16 *stream_idx_ret)
1286 {
1287         wimlib_assert(inode->i_resolved);
1288         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
1289                 if (inode_stream_name_nbytes(inode, i) == 0 &&
1290                     !is_zero_hash(inode_stream_hash_resolved(inode, i)))
1291                 {
1292                         *stream_idx_ret = i;
1293                         return inode_stream_lte_resolved(inode, i);
1294                 }
1295         }
1296         *stream_idx_ret = 0;
1297         return NULL;
1298 }
1299
1300 struct wim_lookup_table_entry *
1301 inode_unnamed_lte_resolved(const struct wim_inode *inode)
1302 {
1303         u16 stream_idx;
1304         return inode_unnamed_stream_resolved(inode, &stream_idx);
1305 }
1306
1307 struct wim_lookup_table_entry *
1308 inode_unnamed_lte_unresolved(const struct wim_inode *inode,
1309                              const struct wim_lookup_table *table)
1310 {
1311         wimlib_assert(!inode->i_resolved);
1312         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
1313                 if (inode_stream_name_nbytes(inode, i) == 0 &&
1314                     !is_zero_hash(inode_stream_hash_unresolved(inode, i)))
1315                 {
1316                         return inode_stream_lte_unresolved(inode, i, table);
1317                 }
1318         }
1319         return NULL;
1320 }
1321
1322 /* Return the lookup table entry for the unnamed data stream of an inode, or
1323  * NULL if there is none.
1324  *
1325  * You'd think this would be easier than it actually is, since the unnamed data
1326  * stream should be the one referenced from the inode itself.  Alas, if there
1327  * are named data streams, Microsoft's "imagex.exe" program will put the unnamed
1328  * data stream in one of the alternate data streams instead of inside the WIM
1329  * dentry itself.  So we need to check the alternate data streams too.
1330  *
1331  * Also, note that a dentry may appear to have more than one unnamed stream, but
1332  * if the SHA1 message digest is all 0's then the corresponding stream does not
1333  * really "count" (this is the case for the inode's own file stream when the
1334  * file stream that should be there is actually in one of the alternate stream
1335  * entries.).  This is despite the fact that we may need to extract such a
1336  * missing entry as an empty file or empty named data stream.
1337  */
1338 struct wim_lookup_table_entry *
1339 inode_unnamed_lte(const struct wim_inode *inode,
1340                   const struct wim_lookup_table *table)
1341 {
1342         if (inode->i_resolved)
1343                 return inode_unnamed_lte_resolved(inode);
1344         else
1345                 return inode_unnamed_lte_unresolved(inode, table);
1346 }
1347
1348 /* Returns the SHA1 message digest of the unnamed data stream of a WIM inode, or
1349  * 'zero_hash' if the unnamed data stream is missing has all zeroes in its SHA1
1350  * message digest field.  */
1351 const u8 *
1352 inode_unnamed_stream_hash(const struct wim_inode *inode)
1353 {
1354         const u8 *hash;
1355
1356         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
1357                 if (inode_stream_name_nbytes(inode, i) == 0) {
1358                         hash = inode_stream_hash(inode, i);
1359                         if (!is_zero_hash(hash))
1360                                 return hash;
1361                 }
1362         }
1363         return zero_hash;
1364 }
1365
1366 struct wim_lookup_table_entry **
1367 retrieve_lte_pointer(struct wim_lookup_table_entry *lte)
1368 {
1369         wimlib_assert(lte->unhashed);
1370         struct wim_inode *inode = lte->back_inode;
1371         u32 stream_id = lte->back_stream_id;
1372         if (stream_id == 0)
1373                 return &inode->i_lte;
1374         else
1375                 for (u16 i = 0; i < inode->i_num_ads; i++)
1376                         if (inode->i_ads_entries[i].stream_id == stream_id)
1377                                 return &inode->i_ads_entries[i].lte;
1378         wimlib_assert(0);
1379         return NULL;
1380 }
1381
1382 /* Calculate the SHA1 message digest of a stream and move it from the list of
1383  * unhashed streams to the stream lookup table, possibly joining it with an
1384  * existing lookup table entry for an identical stream.
1385  *
1386  * @lte:  An unhashed lookup table entry.
1387  * @lookup_table:  Lookup table for the WIM.
1388  * @lte_ret:  On success, write a pointer to the resulting lookup table
1389  *            entry to this location.  This will be the same as @lte
1390  *            if it was inserted into the lookup table, or different if
1391  *            a duplicate stream was found.
1392  *
1393  * Returns 0 on success; nonzero if there is an error reading the stream.
1394  */
1395 int
1396 hash_unhashed_stream(struct wim_lookup_table_entry *lte,
1397                      struct wim_lookup_table *lookup_table,
1398                      struct wim_lookup_table_entry **lte_ret)
1399 {
1400         int ret;
1401         struct wim_lookup_table_entry *duplicate_lte;
1402         struct wim_lookup_table_entry **back_ptr;
1403
1404         wimlib_assert(lte->unhashed);
1405
1406         /* back_ptr must be saved because @back_inode and @back_stream_id are in
1407          * union with the SHA1 message digest and will no longer be valid once
1408          * the SHA1 has been calculated. */
1409         back_ptr = retrieve_lte_pointer(lte);
1410
1411         ret = sha1_stream(lte);
1412         if (ret)
1413                 return ret;
1414
1415         /* Look for a duplicate stream */
1416         duplicate_lte = lookup_resource(lookup_table, lte->hash);
1417         list_del(&lte->unhashed_list);
1418         if (duplicate_lte) {
1419                 /* We have a duplicate stream.  Transfer the reference counts
1420                  * from this stream to the duplicate, update the reference to
1421                  * this stream (in an inode or ads_entry) to point to the
1422                  * duplicate, then free this stream. */
1423                 wimlib_assert(!(duplicate_lte->unhashed));
1424                 duplicate_lte->refcnt += lte->refcnt;
1425                 duplicate_lte->out_refcnt += lte->out_refcnt;
1426                 *back_ptr = duplicate_lte;
1427                 free_lookup_table_entry(lte);
1428                 lte = duplicate_lte;
1429         } else {
1430                 /* No duplicate stream, so we need to insert
1431                  * this stream into the lookup table and treat
1432                  * it as a hashed stream. */
1433                 lookup_table_insert(lookup_table, lte);
1434                 lte->unhashed = 0;
1435         }
1436         if (lte_ret)
1437                 *lte_ret = lte;
1438         return 0;
1439 }
1440
1441 static int
1442 lte_clone_if_new(struct wim_lookup_table_entry *lte, void *_lookup_table)
1443 {
1444         struct wim_lookup_table *lookup_table = _lookup_table;
1445
1446         if (lookup_resource(lookup_table, lte->hash))
1447                 return 0;  /*  Resource already present.  */
1448
1449         lte = clone_lookup_table_entry(lte);
1450         if (!lte)
1451                 return WIMLIB_ERR_NOMEM;
1452         lte->out_refcnt = 1;
1453         lookup_table_insert(lookup_table, lte);
1454         return 0;
1455 }
1456
1457 static int
1458 lte_delete_if_new(struct wim_lookup_table_entry *lte, void *_lookup_table)
1459 {
1460         struct wim_lookup_table *lookup_table = _lookup_table;
1461
1462         if (lte->out_refcnt) {
1463                 lookup_table_unlink(lookup_table, lte);
1464                 free_lookup_table_entry(lte);
1465         }
1466         return 0;
1467 }
1468
1469 /* API function documented in wimlib.h  */
1470 WIMLIBAPI int
1471 wimlib_reference_resources(WIMStruct *wim,
1472                            WIMStruct **resource_wims, unsigned num_resource_wims,
1473                            int ref_flags)
1474 {
1475         int ret;
1476         unsigned i;
1477
1478         if (wim == NULL)
1479                 return WIMLIB_ERR_INVALID_PARAM;
1480
1481         if (num_resource_wims != 0 && resource_wims == NULL)
1482                 return WIMLIB_ERR_INVALID_PARAM;
1483
1484         for (i = 0; i < num_resource_wims; i++)
1485                 if (resource_wims[i] == NULL)
1486                         return WIMLIB_ERR_INVALID_PARAM;
1487
1488         for_lookup_table_entry(wim->lookup_table, lte_zero_out_refcnt, NULL);
1489
1490         for (i = 0; i < num_resource_wims; i++) {
1491                 ret = for_lookup_table_entry(resource_wims[i]->lookup_table,
1492                                              lte_clone_if_new,
1493                                              wim->lookup_table);
1494                 if (ret)
1495                         goto out_rollback;
1496         }
1497         return 0;
1498
1499 out_rollback:
1500         for_lookup_table_entry(wim->lookup_table, lte_delete_if_new,
1501                                wim->lookup_table);
1502         return ret;
1503 }
1504
1505 static int
1506 reference_resource_paths(WIMStruct *wim,
1507                          const tchar * const *resource_wimfiles,
1508                          unsigned num_resource_wimfiles,
1509                          int ref_flags,
1510                          int open_flags,
1511                          wimlib_progress_func_t progress_func)
1512 {
1513         WIMStruct **resource_wims;
1514         unsigned i;
1515         int ret;
1516
1517         resource_wims = CALLOC(num_resource_wimfiles, sizeof(resource_wims[0]));
1518         if (!resource_wims)
1519                 return WIMLIB_ERR_NOMEM;
1520
1521         for (i = 0; i < num_resource_wimfiles; i++) {
1522                 DEBUG("Referencing resources from path \"%"TS"\"",
1523                       resource_wimfiles[i]);
1524                 ret = wimlib_open_wim(resource_wimfiles[i], open_flags,
1525                                       &resource_wims[i], progress_func);
1526                 if (ret)
1527                         goto out_free_resource_wims;
1528         }
1529
1530         ret = wimlib_reference_resources(wim, resource_wims,
1531                                          num_resource_wimfiles, ref_flags);
1532         if (ret)
1533                 goto out_free_resource_wims;
1534
1535         for (i = 0; i < num_resource_wimfiles; i++)
1536                 list_add_tail(&resource_wims[i]->subwim_node, &wim->subwims);
1537
1538         ret = 0;
1539         goto out_free_array;
1540
1541 out_free_resource_wims:
1542         for (i = 0; i < num_resource_wimfiles; i++)
1543                 wimlib_free(resource_wims[i]);
1544 out_free_array:
1545         FREE(resource_wims);
1546         return ret;
1547 }
1548
1549 static int
1550 reference_resource_glob(WIMStruct *wim, const tchar *refglob,
1551                         int ref_flags, int open_flags,
1552                         wimlib_progress_func_t progress_func)
1553 {
1554         glob_t globbuf;
1555         int ret;
1556
1557         /* Note: glob() is replaced in Windows native builds.  */
1558         ret = tglob(refglob, GLOB_ERR | GLOB_NOSORT, NULL, &globbuf);
1559         if (ret) {
1560                 if (ret == GLOB_NOMATCH) {
1561                         if (ref_flags & WIMLIB_REF_FLAG_GLOB_ERR_ON_NOMATCH) {
1562                                 ERROR("Found no files for glob \"%"TS"\"", refglob);
1563                                 return WIMLIB_ERR_GLOB_HAD_NO_MATCHES;
1564                         } else {
1565                                 return reference_resource_paths(wim,
1566                                                                 &refglob,
1567                                                                 1,
1568                                                                 ref_flags,
1569                                                                 open_flags,
1570                                                                 progress_func);
1571                         }
1572                 } else {
1573                         ERROR_WITH_ERRNO("Failed to process glob \"%"TS"\"", refglob);
1574                         if (ret == GLOB_NOSPACE)
1575                                 return WIMLIB_ERR_NOMEM;
1576                         else
1577                                 return WIMLIB_ERR_READ;
1578                 }
1579         }
1580
1581         ret = reference_resource_paths(wim,
1582                                        (const tchar * const *)globbuf.gl_pathv,
1583                                        globbuf.gl_pathc,
1584                                        ref_flags,
1585                                        open_flags,
1586                                        progress_func);
1587         globfree(&globbuf);
1588         return ret;
1589 }
1590
1591 /* API function documented in wimlib.h  */
1592 WIMLIBAPI int
1593 wimlib_reference_resource_files(WIMStruct *wim,
1594                                 const tchar * const * resource_wimfiles_or_globs,
1595                                 unsigned count,
1596                                 int ref_flags,
1597                                 int open_flags,
1598                                 wimlib_progress_func_t progress_func)
1599 {
1600         unsigned i;
1601         int ret;
1602
1603         if (ref_flags & WIMLIB_REF_FLAG_GLOB_ENABLE) {
1604                 for (i = 0; i < count; i++) {
1605                         ret = reference_resource_glob(wim,
1606                                                       resource_wimfiles_or_globs[i],
1607                                                       ref_flags,
1608                                                       open_flags,
1609                                                       progress_func);
1610                         if (ret)
1611                                 return ret;
1612                 }
1613                 return 0;
1614         } else {
1615                 return reference_resource_paths(wim, resource_wimfiles_or_globs,
1616                                                 count, ref_flags,
1617                                                 open_flags, progress_func);
1618         }
1619 }