3b98553e9bb586d790df154f8de706351bc2ac0e
[wimlib] / src / lookup_table.c
1 /*
2  * lookup_table.c
3  *
4  * Lookup table, implemented as a hash table, that maps SHA1 message digests to
5  * data streams; plus code to read and write the corresponding on-disk data.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/metadata.h"
36 #include "wimlib/paths.h"
37 #include "wimlib/resource.h"
38 #include "wimlib/util.h"
39 #include "wimlib/write.h"
40
41 #include <errno.h>
42 #include <stdlib.h>
43 #ifdef WITH_FUSE
44 #  include <unistd.h> /* for unlink() */
45 #endif
46
47 struct wim_lookup_table *
48 new_lookup_table(size_t capacity)
49 {
50         struct wim_lookup_table *table;
51         struct hlist_head *array;
52
53         table = CALLOC(1, sizeof(struct wim_lookup_table));
54         if (table) {
55                 array = CALLOC(capacity, sizeof(array[0]));
56                 if (array) {
57                         table->num_entries = 0;
58                         table->capacity = capacity;
59                         table->array = array;
60                 } else {
61                         FREE(table);
62                         table = NULL;
63                         ERROR("Failed to allocate memory for lookup table "
64                               "with capacity %zu", capacity);
65                 }
66         }
67         return table;
68 }
69
70 struct wim_lookup_table_entry *
71 new_lookup_table_entry(void)
72 {
73         struct wim_lookup_table_entry *lte;
74
75         lte = CALLOC(1, sizeof(struct wim_lookup_table_entry));
76         if (lte) {
77                 lte->part_number  = 1;
78                 lte->refcnt       = 1;
79         } else {
80                 ERROR("Out of memory (tried to allocate %zu bytes for "
81                       "lookup table entry)",
82                       sizeof(struct wim_lookup_table_entry));
83         }
84         return lte;
85 }
86
87 struct wim_lookup_table_entry *
88 clone_lookup_table_entry(const struct wim_lookup_table_entry *old)
89 {
90         struct wim_lookup_table_entry *new;
91
92         new = memdup(old, sizeof(struct wim_lookup_table_entry));
93         if (!new)
94                 return NULL;
95
96         new->extracted_file = NULL;
97         switch (new->resource_location) {
98         case RESOURCE_IN_FILE_ON_DISK:
99 #ifdef __WIN32__
100         case RESOURCE_WIN32_ENCRYPTED:
101 #endif
102 #ifdef WITH_FUSE
103         case RESOURCE_IN_STAGING_FILE:
104                 BUILD_BUG_ON((void*)&old->file_on_disk !=
105                              (void*)&old->staging_file_name);
106 #endif
107                 new->file_on_disk = TSTRDUP(old->file_on_disk);
108                 if (!new->file_on_disk)
109                         goto out_free;
110                 break;
111         case RESOURCE_IN_ATTACHED_BUFFER:
112                 new->attached_buffer = memdup(old->attached_buffer,
113                                               wim_resource_size(old));
114                 if (!new->attached_buffer)
115                         goto out_free;
116                 break;
117 #ifdef WITH_NTFS_3G
118         case RESOURCE_IN_NTFS_VOLUME:
119                 if (old->ntfs_loc) {
120                         struct ntfs_location *loc;
121                         loc = memdup(old->ntfs_loc, sizeof(struct ntfs_location));
122                         if (!loc)
123                                 goto out_free;
124                         loc->path = NULL;
125                         loc->stream_name = NULL;
126                         new->ntfs_loc = loc;
127                         loc->path = STRDUP(old->ntfs_loc->path);
128                         if (!loc->path)
129                                 goto out_free;
130                         if (loc->stream_name_nchars) {
131                                 loc->stream_name = memdup(old->ntfs_loc->stream_name,
132                                                           loc->stream_name_nchars * 2);
133                                 if (!loc->stream_name)
134                                         goto out_free;
135                         }
136                 }
137                 break;
138 #endif
139         default:
140                 break;
141         }
142         return new;
143 out_free:
144         free_lookup_table_entry(new);
145         return NULL;
146 }
147
148 void
149 free_lookup_table_entry(struct wim_lookup_table_entry *lte)
150 {
151         if (lte) {
152                 switch (lte->resource_location) {
153                 case RESOURCE_IN_FILE_ON_DISK:
154         #ifdef __WIN32__
155                 case RESOURCE_WIN32_ENCRYPTED:
156         #endif
157         #ifdef WITH_FUSE
158                 case RESOURCE_IN_STAGING_FILE:
159                         BUILD_BUG_ON((void*)&lte->file_on_disk !=
160                                      (void*)&lte->staging_file_name);
161         #endif
162                 case RESOURCE_IN_ATTACHED_BUFFER:
163                         BUILD_BUG_ON((void*)&lte->file_on_disk !=
164                                      (void*)&lte->attached_buffer);
165                         FREE(lte->file_on_disk);
166                         break;
167 #ifdef WITH_NTFS_3G
168                 case RESOURCE_IN_NTFS_VOLUME:
169                         if (lte->ntfs_loc) {
170                                 FREE(lte->ntfs_loc->path);
171                                 FREE(lte->ntfs_loc->stream_name);
172                                 FREE(lte->ntfs_loc);
173                         }
174                         break;
175 #endif
176                 default:
177                         break;
178                 }
179                 FREE(lte);
180         }
181 }
182
183 static int
184 do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore)
185 {
186         free_lookup_table_entry(entry);
187         return 0;
188 }
189
190
191 void
192 free_lookup_table(struct wim_lookup_table *table)
193 {
194         DEBUG2("Freeing lookup table");
195         if (table) {
196                 if (table->array) {
197                         for_lookup_table_entry(table,
198                                                do_free_lookup_table_entry,
199                                                NULL);
200                         FREE(table->array);
201                 }
202                 FREE(table);
203         }
204 }
205
206 /*
207  * Inserts an entry into the lookup table.
208  *
209  * @table:      A pointer to the lookup table.
210  * @lte:        A pointer to the entry to insert.
211  */
212 void
213 lookup_table_insert(struct wim_lookup_table *table,
214                     struct wim_lookup_table_entry *lte)
215 {
216         size_t i = lte->hash_short % table->capacity;
217         hlist_add_head(&lte->hash_list, &table->array[i]);
218
219         /* XXX Make the table grow when too many entries have been inserted. */
220         table->num_entries++;
221 }
222
223 static void
224 finalize_lte(struct wim_lookup_table_entry *lte)
225 {
226         #ifdef WITH_FUSE
227         if (lte->resource_location == RESOURCE_IN_STAGING_FILE) {
228                 unlink(lte->staging_file_name);
229                 list_del(&lte->unhashed_list);
230         }
231         #endif
232         free_lookup_table_entry(lte);
233 }
234
235 /* Decrements the reference count for the lookup table entry @lte.  If its
236  * reference count reaches 0, it is unlinked from the lookup table.  If,
237  * furthermore, the entry has no opened file descriptors associated with it, the
238  * entry is freed.  */
239 void
240 lte_decrement_refcnt(struct wim_lookup_table_entry *lte,
241                      struct wim_lookup_table *table)
242 {
243         wimlib_assert(lte != NULL);
244         wimlib_assert(lte->refcnt != 0);
245         if (--lte->refcnt == 0) {
246                 if (lte->unhashed)
247                         list_del(&lte->unhashed_list);
248                 else
249                         lookup_table_unlink(table, lte);
250         #ifdef WITH_FUSE
251                 if (lte->num_opened_fds == 0)
252         #endif
253                         finalize_lte(lte);
254         }
255 }
256
257 #ifdef WITH_FUSE
258 void
259 lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte)
260 {
261         if (lte->num_opened_fds != 0)
262                 if (--lte->num_opened_fds == 0 && lte->refcnt == 0)
263                         finalize_lte(lte);
264 }
265 #endif
266
267 /* Calls a function on all the entries in the WIM lookup table.  Stop early and
268  * return nonzero if any call to the function returns nonzero. */
269 int
270 for_lookup_table_entry(struct wim_lookup_table *table,
271                        int (*visitor)(struct wim_lookup_table_entry *, void *),
272                        void *arg)
273 {
274         struct wim_lookup_table_entry *lte;
275         struct hlist_node *pos, *tmp;
276         int ret;
277
278         for (size_t i = 0; i < table->capacity; i++) {
279                 hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i],
280                                           hash_list)
281                 {
282                         wimlib_assert2(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA));
283                         ret = visitor(lte, arg);
284                         if (ret)
285                                 return ret;
286                 }
287         }
288         return 0;
289 }
290
291 int
292 cmp_streams_by_wim_position(const void *p1, const void *p2)
293 {
294         const struct wim_lookup_table_entry *lte1, *lte2;
295         lte1 = *(const struct wim_lookup_table_entry**)p1;
296         lte2 = *(const struct wim_lookup_table_entry**)p2;
297         if (lte1->resource_entry.offset < lte2->resource_entry.offset)
298                 return -1;
299         else if (lte1->resource_entry.offset > lte2->resource_entry.offset)
300                 return 1;
301         else
302                 return 0;
303 }
304
305 static int
306 add_lte_to_array(struct wim_lookup_table_entry *lte,
307                  void *_pp)
308 {
309         struct wim_lookup_table_entry ***pp = _pp;
310         *(*pp)++ = lte;
311         return 0;
312 }
313
314 /* Iterate through the lookup table entries, but first sort them by stream
315  * offset in the WIM.  Caution: this is intended to be used when the stream
316  * offset field has actually been set. */
317 int
318 for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table,
319                                   int (*visitor)(struct wim_lookup_table_entry *,
320                                                  void *),
321                                   void *arg)
322 {
323         struct wim_lookup_table_entry **lte_array, **p;
324         size_t num_streams = table->num_entries;
325         int ret;
326
327         lte_array = MALLOC(num_streams * sizeof(lte_array[0]));
328         if (!lte_array)
329                 return WIMLIB_ERR_NOMEM;
330         p = lte_array;
331         for_lookup_table_entry(table, add_lte_to_array, &p);
332
333         wimlib_assert(p == lte_array + num_streams);
334
335         qsort(lte_array, num_streams, sizeof(lte_array[0]),
336               cmp_streams_by_wim_position);
337         ret = 0;
338         for (size_t i = 0; i < num_streams; i++) {
339                 ret = visitor(lte_array[i], arg);
340                 if (ret)
341                         break;
342         }
343         FREE(lte_array);
344         return ret;
345 }
346
347 /* On-disk format of a WIM lookup table entry (stream entry). */
348 struct wim_lookup_table_entry_disk {
349         /* Location, offset, compression status, and metadata status of the
350          * stream. */
351         struct resource_entry_disk resource_entry;
352
353         /* Which part of the split WIM this stream is in; indexed from 1. */
354         le16 part_number;
355
356         /* Reference count of this stream over all WIM images. */
357         le32 refcnt;
358
359         /* SHA1 message digest of the uncompressed data of this stream, or
360          * optionally all zeroes if this stream is of zero length. */
361         u8 hash[SHA1_HASH_SIZE];
362 } _packed_attribute;
363
364 #define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50
365
366 void
367 lte_init_wim(struct wim_lookup_table_entry *lte, WIMStruct *wim)
368 {
369         lte->resource_location = RESOURCE_IN_WIM;
370         lte->wim = wim;
371         if (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED)
372                 lte->compression_type = wim->compression_type;
373         else
374                 lte->compression_type = WIMLIB_COMPRESSION_TYPE_NONE;
375
376         if (wim_is_pipable(wim))
377                 lte->is_pipable = 1;
378 }
379
380 /*
381  * Reads the lookup table from a WIM file.
382  *
383  * Saves lookup table entries for non-metadata streams in a hash table, and
384  * saves the metadata entry for each image in a special per-image location (the
385  * image_metadata array).
386  *
387  * Return values:
388  *      WIMLIB_ERR_SUCCESS (0)
389  *      WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY
390  *      WIMLIB_ERR_RESOURCE_NOT_FOUND
391  */
392 int
393 read_wim_lookup_table(WIMStruct *wim)
394 {
395         int ret;
396         size_t i;
397         size_t num_entries;
398         struct wim_lookup_table *table;
399         struct wim_lookup_table_entry *cur_entry, *duplicate_entry;
400         struct wim_lookup_table_entry_disk *buf;
401
402         BUILD_BUG_ON(sizeof(struct wim_lookup_table_entry_disk) !=
403                      WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE);
404
405         DEBUG("Reading lookup table: offset %"PRIu64", size %"PRIu64"",
406               wim->hdr.lookup_table_res_entry.offset,
407               wim->hdr.lookup_table_res_entry.size);
408
409         /* Calculate number of entries in the lookup table.  */
410         num_entries = wim->hdr.lookup_table_res_entry.size /
411                       sizeof(struct wim_lookup_table_entry_disk);
412
413
414         /* Read the lookup table into a buffer.  */
415         ret = res_entry_to_data(&wim->hdr.lookup_table_res_entry, wim,
416                                 (void**)&buf);
417         if (ret)
418                 goto out;
419
420         /* Allocate hash table.  */
421         table = new_lookup_table(num_entries * 2 + 1);
422         if (!table) {
423                 ERROR("Not enough memory to read lookup table.");
424                 ret = WIMLIB_ERR_NOMEM;
425                 goto out_free_buf;
426         }
427
428         /* Allocate and initalize `struct wim_lookup_table_entry's from the
429          * on-disk lookup table.  */
430         wim->current_image = 0;
431         for (i = 0; i < num_entries; i++) {
432                 const struct wim_lookup_table_entry_disk *disk_entry = &buf[i];
433
434                 cur_entry = new_lookup_table_entry();
435                 if (!cur_entry) {
436                         ERROR("Not enough memory to read lookup table.");
437                         ret = WIMLIB_ERR_NOMEM;
438                         goto out_free_lookup_table;
439                 }
440
441                 cur_entry->wim = wim;
442                 cur_entry->resource_location = RESOURCE_IN_WIM;
443                 get_resource_entry(&disk_entry->resource_entry, &cur_entry->resource_entry);
444                 cur_entry->part_number = le16_to_cpu(disk_entry->part_number);
445                 cur_entry->refcnt = le32_to_cpu(disk_entry->refcnt);
446                 copy_hash(cur_entry->hash, disk_entry->hash);
447                 lte_init_wim(cur_entry, wim);
448
449                 if (cur_entry->part_number != wim->hdr.part_number) {
450                         WARNING("A lookup table entry in part %hu of the WIM "
451                                 "points to part %hu (ignoring it)",
452                                 wim->hdr.part_number, cur_entry->part_number);
453                         free_lookup_table_entry(cur_entry);
454                         continue;
455                 }
456
457                 if (is_zero_hash(cur_entry->hash)) {
458                         WARNING("The WIM lookup table contains an entry with a "
459                                 "SHA1 message digest of all 0's (ignoring it)");
460                         free_lookup_table_entry(cur_entry);
461                         continue;
462                 }
463
464                 if (!(cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED)
465                     && (cur_entry->resource_entry.size !=
466                         cur_entry->resource_entry.original_size))
467                 {
468                         if (wimlib_print_errors) {
469                                 WARNING("Found uncompressed resource with "
470                                         "original size (%"PRIu64") not the same "
471                                         "as compressed size (%"PRIu64")",
472                                         cur_entry->resource_entry.original_size,
473                                         cur_entry->resource_entry.size);
474                                 if (cur_entry->resource_entry.original_size) {
475                                         WARNING("Overriding compressed size with original size.");
476                                         cur_entry->resource_entry.size =
477                                                 cur_entry->resource_entry.original_size;
478                                 } else {
479                                         WARNING("Overriding original size with compressed size");
480                                         cur_entry->resource_entry.original_size =
481                                                 cur_entry->resource_entry.size;
482                                 }
483                         }
484                 }
485
486                 if (cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) {
487                         /* Lookup table entry for a metadata resource */
488                         if (cur_entry->refcnt != 1) {
489                                 if (wimlib_print_errors) {
490                                         ERROR("Found metadata resource with refcnt != 1:");
491                                         print_lookup_table_entry(cur_entry, stderr);
492                                 }
493                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
494                                 goto out_free_cur_entry;
495                         }
496
497                         if (wim->hdr.part_number != 1) {
498                                 WARNING("Ignoring metadata resource found in a "
499                                         "non-first part of the split WIM");
500                                 free_lookup_table_entry(cur_entry);
501                                 continue;
502                         }
503                         if (wim->current_image == wim->hdr.image_count) {
504                                 WARNING("The WIM header says there are %u images "
505                                         "in the WIM, but we found more metadata "
506                                         "resources than this (ignoring the extra)",
507                                         wim->hdr.image_count);
508                                 free_lookup_table_entry(cur_entry);
509                                 continue;
510                         }
511
512                         /* Notice very carefully:  We are assigning the metadata
513                          * resources in the exact order mirrored by their lookup
514                          * table entries on disk, which is the behavior of
515                          * Microsoft's software.  In particular, this overrides
516                          * the actual locations of the metadata resources
517                          * themselves in the WIM file as well as any information
518                          * written in the XML data. */
519                         DEBUG("Found metadata resource for image %u at "
520                               "offset %"PRIu64".",
521                               wim->current_image + 1,
522                               cur_entry->resource_entry.offset);
523                         wim->image_metadata[
524                                 wim->current_image++]->metadata_lte = cur_entry;
525                 } else {
526                         /* Lookup table entry for a stream that is not a
527                          * metadata resource */
528                         duplicate_entry = __lookup_resource(table, cur_entry->hash);
529                         if (duplicate_entry) {
530                                 if (wimlib_print_errors) {
531                                         WARNING("The WIM lookup table contains two entries with the "
532                                               "same SHA1 message digest!");
533                                         WARNING("The first entry is:");
534                                         print_lookup_table_entry(duplicate_entry, stderr);
535                                         WARNING("The second entry is:");
536                                         print_lookup_table_entry(cur_entry, stderr);
537                                 }
538                                 free_lookup_table_entry(cur_entry);
539                                 continue;
540                         } else {
541                                 lookup_table_insert(table, cur_entry);
542                         }
543                 }
544         }
545
546         if (wim->hdr.part_number == 1 && wim->current_image != wim->hdr.image_count) {
547                 WARNING("The header of \"%"TS"\" says there are %u images in\n"
548                         "          the WIM, but we only found %d metadata resources!  Acting as if\n"
549                         "          the header specified only %d images instead.",
550                         wim->filename, wim->hdr.image_count,
551                         wim->current_image, wim->current_image);
552                 for (int i = wim->current_image; i < wim->hdr.image_count; i++)
553                         put_image_metadata(wim->image_metadata[i], NULL);
554                 wim->hdr.image_count = wim->current_image;
555         }
556         DEBUG("Done reading lookup table.");
557         wim->lookup_table = table;
558         ret = 0;
559         goto out_free_buf;
560 out_free_cur_entry:
561         FREE(cur_entry);
562 out_free_lookup_table:
563         free_lookup_table(table);
564 out_free_buf:
565         FREE(buf);
566 out:
567         wim->current_image = 0;
568         return ret;
569 }
570
571
572 static void
573 write_wim_lookup_table_entry(const struct wim_lookup_table_entry *lte,
574                              struct wim_lookup_table_entry_disk *disk_entry)
575 {
576         put_resource_entry(&lte->output_resource_entry, &disk_entry->resource_entry);
577         disk_entry->part_number = cpu_to_le16(lte->part_number);
578         disk_entry->refcnt = cpu_to_le32(lte->out_refcnt);
579         copy_hash(disk_entry->hash, lte->hash);
580 }
581
582 static int
583 write_wim_lookup_table_from_stream_list(struct list_head *stream_list,
584                                         struct filedes *out_fd,
585                                         struct resource_entry *out_res_entry,
586                                         int write_resource_flags)
587 {
588         size_t table_size;
589         struct wim_lookup_table_entry *lte;
590         struct wim_lookup_table_entry_disk *table_buf;
591         struct wim_lookup_table_entry_disk *table_buf_ptr;
592         int ret;
593
594         table_size = 0;
595         list_for_each_entry(lte, stream_list, lookup_table_list)
596                 table_size += sizeof(struct wim_lookup_table_entry_disk);
597
598         DEBUG("Writing WIM lookup table (size=%zu, offset=%"PRIu64")",
599               table_size, out_fd->offset);
600
601         table_buf = MALLOC(table_size);
602         if (!table_buf) {
603                 ERROR("Failed to allocate %zu bytes for temporary lookup table",
604                       table_size);
605                 return WIMLIB_ERR_NOMEM;
606         }
607         table_buf_ptr = table_buf;
608         list_for_each_entry(lte, stream_list, lookup_table_list)
609                 write_wim_lookup_table_entry(lte, table_buf_ptr++);
610
611         /* Write the lookup table uncompressed.  Although wimlib can handle a
612          * compressed lookup table, MS software cannot.  */
613         ret = write_wim_resource_from_buffer(table_buf,
614                                              table_size,
615                                              WIM_RESHDR_FLAG_METADATA,
616                                              out_fd,
617                                              WIMLIB_COMPRESSION_TYPE_NONE,
618                                              out_res_entry,
619                                              NULL,
620                                              write_resource_flags);
621         FREE(table_buf);
622         return ret;
623 }
624
625 static int
626 append_lookup_table_entry(struct wim_lookup_table_entry *lte, void *_list)
627 {
628         if (lte->out_refcnt != 0)
629                 list_add_tail(&lte->lookup_table_list, (struct list_head*)_list);
630         return 0;
631 }
632
633 int
634 write_wim_lookup_table(WIMStruct *wim, int image, int write_flags,
635                        struct resource_entry *out_res_entry,
636                        struct list_head *stream_list_override)
637 {
638         int write_resource_flags;
639         struct list_head _stream_list;
640         struct list_head *stream_list;
641
642         if (stream_list_override) {
643                 stream_list = stream_list_override;
644         } else {
645                 stream_list = &_stream_list;
646                 INIT_LIST_HEAD(stream_list);
647         }
648
649         if (!(write_flags & WIMLIB_WRITE_FLAG_NO_METADATA)) {
650                 int start_image;
651                 int end_image;
652
653                 if (image == WIMLIB_ALL_IMAGES) {
654                         start_image = 1;
655                         end_image = wim->hdr.image_count;
656                 } else {
657                         start_image = image;
658                         end_image = image;
659                 }
660
661                 /* Push metadata resource lookup table entries onto the front of
662                  * the list in reverse order, so that they're written in order.
663                  */
664                 for (int i = end_image; i >= start_image; i--) {
665                         struct wim_lookup_table_entry *metadata_lte;
666
667                         metadata_lte = wim->image_metadata[i - 1]->metadata_lte;
668                         metadata_lte->out_refcnt = 1;
669                         metadata_lte->part_number = wim->hdr.part_number;
670                         metadata_lte->output_resource_entry.flags |= WIM_RESHDR_FLAG_METADATA;
671
672                         list_add(&metadata_lte->lookup_table_list, stream_list);
673                 }
674         }
675
676         /* Append additional lookup table entries that have out_refcnt != 0.  */
677         if (!stream_list_override) {
678                 for_lookup_table_entry(wim->lookup_table,
679                                        append_lookup_table_entry, stream_list);
680         }
681
682         write_resource_flags = 0;
683         if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)
684                 write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE;
685         return write_wim_lookup_table_from_stream_list(stream_list,
686                                                        &wim->out_fd,
687                                                        out_res_entry,
688                                                        write_resource_flags);
689 }
690
691
692 int
693 lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore)
694 {
695         lte->real_refcnt = 0;
696         return 0;
697 }
698
699 int
700 lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore)
701 {
702         lte->out_refcnt = 0;
703         return 0;
704 }
705
706 int
707 lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore)
708 {
709         if (lte->extracted_file != NULL) {
710                 FREE(lte->extracted_file);
711                 lte->extracted_file = NULL;
712         }
713         return 0;
714 }
715
716 void
717 print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out)
718 {
719         if (!lte) {
720                 tputc(T('\n'), out);
721                 return;
722         }
723         tfprintf(out, T("Offset            = %"PRIu64" bytes\n"),
724                  lte->resource_entry.offset);
725
726         tfprintf(out, T("Size              = %"PRIu64" bytes\n"),
727                  (u64)lte->resource_entry.size);
728
729         tfprintf(out, T("Original size     = %"PRIu64" bytes\n"),
730                  lte->resource_entry.original_size);
731
732         tfprintf(out, T("Part Number       = %hu\n"), lte->part_number);
733         tfprintf(out, T("Reference Count   = %u\n"), lte->refcnt);
734
735         if (lte->unhashed) {
736                 tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"),
737                          lte->back_inode, lte->back_stream_id);
738         } else {
739                 tfprintf(out, T("Hash              = 0x"));
740                 print_hash(lte->hash, out);
741                 tputc(T('\n'), out);
742         }
743
744         tfprintf(out, T("Flags             = "));
745         u8 flags = lte->resource_entry.flags;
746         if (flags & WIM_RESHDR_FLAG_COMPRESSED)
747                 tfputs(T("WIM_RESHDR_FLAG_COMPRESSED, "), out);
748         if (flags & WIM_RESHDR_FLAG_FREE)
749                 tfputs(T("WIM_RESHDR_FLAG_FREE, "), out);
750         if (flags & WIM_RESHDR_FLAG_METADATA)
751                 tfputs(T("WIM_RESHDR_FLAG_METADATA, "), out);
752         if (flags & WIM_RESHDR_FLAG_SPANNED)
753                 tfputs(T("WIM_RESHDR_FLAG_SPANNED, "), out);
754         tputc(T('\n'), out);
755         switch (lte->resource_location) {
756         case RESOURCE_IN_WIM:
757                 if (lte->wim->filename) {
758                         tfprintf(out, T("WIM file          = `%"TS"'\n"),
759                                  lte->wim->filename);
760                 }
761                 break;
762 #ifdef __WIN32__
763         case RESOURCE_WIN32_ENCRYPTED:
764 #endif
765         case RESOURCE_IN_FILE_ON_DISK:
766                 tfprintf(out, T("File on Disk      = `%"TS"'\n"),
767                          lte->file_on_disk);
768                 break;
769 #ifdef WITH_FUSE
770         case RESOURCE_IN_STAGING_FILE:
771                 tfprintf(out, T("Staging File      = `%"TS"'\n"),
772                                 lte->staging_file_name);
773                 break;
774 #endif
775         default:
776                 break;
777         }
778         tputc(T('\n'), out);
779 }
780
781 void
782 lte_to_wimlib_resource_entry(const struct wim_lookup_table_entry *lte,
783                              struct wimlib_resource_entry *wentry)
784 {
785         wentry->uncompressed_size = lte->resource_entry.original_size;
786         wentry->compressed_size = lte->resource_entry.size;
787         wentry->offset = lte->resource_entry.offset;
788         copy_hash(wentry->sha1_hash, lte->hash);
789         wentry->part_number = lte->part_number;
790         wentry->reference_count = lte->refcnt;
791         wentry->is_compressed = (lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED) != 0;
792         wentry->is_metadata = (lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) != 0;
793         wentry->is_free = (lte->resource_entry.flags & WIM_RESHDR_FLAG_FREE) != 0;
794         wentry->is_spanned = (lte->resource_entry.flags & WIM_RESHDR_FLAG_SPANNED) != 0;
795 }
796
797 struct iterate_lte_context {
798         wimlib_iterate_lookup_table_callback_t cb;
799         void *user_ctx;
800 };
801
802 static int
803 do_iterate_lte(struct wim_lookup_table_entry *lte, void *_ctx)
804 {
805         struct iterate_lte_context *ctx = _ctx;
806         struct wimlib_resource_entry entry;
807
808         lte_to_wimlib_resource_entry(lte, &entry);
809         return (*ctx->cb)(&entry, ctx->user_ctx);
810 }
811
812 /* API function documented in wimlib.h  */
813 WIMLIBAPI int
814 wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
815                             wimlib_iterate_lookup_table_callback_t cb,
816                             void *user_ctx)
817 {
818         struct iterate_lte_context ctx = {
819                 .cb = cb,
820                 .user_ctx = user_ctx,
821         };
822         if (wim->hdr.part_number == 1) {
823                 int ret;
824                 for (int i = 0; i < wim->hdr.image_count; i++) {
825                         ret = do_iterate_lte(wim->image_metadata[i]->metadata_lte,
826                                              &ctx);
827                         if (ret)
828                                 return ret;
829                 }
830         }
831         return for_lookup_table_entry(wim->lookup_table, do_iterate_lte, &ctx);
832 }
833
834 static int
835 do_print_lookup_table_entry(struct wim_lookup_table_entry *lte, void *fp)
836 {
837         print_lookup_table_entry(lte, (FILE*)fp);
838         return 0;
839 }
840
841 /* API function documented in wimlib.h  */
842 WIMLIBAPI void
843 wimlib_print_lookup_table(WIMStruct *wim)
844 {
845         for (int i = 0; i < wim->hdr.image_count; i++)
846                 print_lookup_table_entry(wim->image_metadata[i]->metadata_lte, stdout);
847         for_lookup_table_entry(wim->lookup_table,
848                                do_print_lookup_table_entry,
849                                stdout);
850 }
851
852 /* Given a SHA1 message digest, return the corresponding entry in the WIM's
853  * lookup table, or NULL if there is none.  */
854 struct wim_lookup_table_entry *
855 __lookup_resource(const struct wim_lookup_table *table, const u8 hash[])
856 {
857         size_t i;
858         struct wim_lookup_table_entry *lte;
859         struct hlist_node *pos;
860
861         wimlib_assert(table != NULL);
862         wimlib_assert(hash != NULL);
863
864         i = *(size_t*)hash % table->capacity;
865         hlist_for_each_entry(lte, pos, &table->array[i], hash_list)
866                 if (hashes_equal(hash, lte->hash))
867                         return lte;
868         return NULL;
869 }
870
871 #ifdef WITH_FUSE
872 /*
873  * Finds the dentry, lookup table entry, and stream index for a WIM file stream,
874  * given a path name.
875  *
876  * This is only for pre-resolved inodes.
877  */
878 int
879 lookup_resource(WIMStruct *wim,
880                 const tchar *path,
881                 int lookup_flags,
882                 struct wim_dentry **dentry_ret,
883                 struct wim_lookup_table_entry **lte_ret,
884                 u16 *stream_idx_ret)
885 {
886         struct wim_dentry *dentry;
887         struct wim_lookup_table_entry *lte;
888         u16 stream_idx;
889         const tchar *stream_name = NULL;
890         struct wim_inode *inode;
891         tchar *p = NULL;
892
893         if (lookup_flags & LOOKUP_FLAG_ADS_OK) {
894                 stream_name = path_stream_name(path);
895                 if (stream_name) {
896                         p = (tchar*)stream_name - 1;
897                         *p = T('\0');
898                 }
899         }
900
901         dentry = get_dentry(wim, path);
902         if (p)
903                 *p = T(':');
904         if (!dentry)
905                 return -errno;
906
907         inode = dentry->d_inode;
908
909         if (!inode->i_resolved)
910                 if (inode_resolve_ltes(inode, wim->lookup_table, false))
911                         return -EIO;
912
913         if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK)
914               && inode_is_directory(inode))
915                 return -EISDIR;
916
917         if (stream_name) {
918                 struct wim_ads_entry *ads_entry;
919                 u16 ads_idx;
920                 ads_entry = inode_get_ads_entry(inode, stream_name,
921                                                 &ads_idx);
922                 if (ads_entry) {
923                         stream_idx = ads_idx + 1;
924                         lte = ads_entry->lte;
925                         goto out;
926                 } else {
927                         return -ENOENT;
928                 }
929         } else {
930                 lte = inode->i_lte;
931                 stream_idx = 0;
932         }
933 out:
934         if (dentry_ret)
935                 *dentry_ret = dentry;
936         if (lte_ret)
937                 *lte_ret = lte;
938         if (stream_idx_ret)
939                 *stream_idx_ret = stream_idx;
940         return 0;
941 }
942 #endif
943
944 /*
945  * Resolve an inode's lookup table entries.
946  *
947  * This replaces the SHA1 hash fields (which are used to lookup an entry in the
948  * lookup table) with pointers directly to the lookup table entries.
949  *
950  * If @force is %false:
951  *      If any needed SHA1 message digests are not found in the lookup table,
952  *      WIMLIB_ERR_RESOURCE_NOT_FOUND is returned and the inode is left
953  *      unmodified.
954  * If @force is %true:
955  *      If any needed SHA1 message digests are not found in the lookup table,
956  *      new entries are allocated and inserted into the lookup table.
957  */
958 int
959 inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table,
960                    bool force)
961 {
962         const u8 *hash;
963
964         if (!inode->i_resolved) {
965                 struct wim_lookup_table_entry *lte, *ads_lte;
966
967                 /* Resolve the default file stream */
968                 lte = NULL;
969                 hash = inode->i_hash;
970                 if (!is_zero_hash(hash)) {
971                         lte = __lookup_resource(table, hash);
972                         if (!lte) {
973                                 if (force) {
974                                         lte = new_lookup_table_entry();
975                                         if (!lte)
976                                                 return WIMLIB_ERR_NOMEM;
977                                         copy_hash(lte->hash, hash);
978                                         lookup_table_insert(table, lte);
979                                 } else {
980                                         goto resource_not_found;
981                                 }
982                         }
983                 }
984
985                 /* Resolve the alternate data streams */
986                 struct wim_lookup_table_entry *ads_ltes[inode->i_num_ads];
987                 for (u16 i = 0; i < inode->i_num_ads; i++) {
988                         struct wim_ads_entry *cur_entry;
989
990                         ads_lte = NULL;
991                         cur_entry = &inode->i_ads_entries[i];
992                         hash = cur_entry->hash;
993                         if (!is_zero_hash(hash)) {
994                                 ads_lte = __lookup_resource(table, hash);
995                                 if (!ads_lte) {
996                                         if (force) {
997                                                 ads_lte = new_lookup_table_entry();
998                                                 if (!ads_lte)
999                                                         return WIMLIB_ERR_NOMEM;
1000                                                 copy_hash(ads_lte->hash, hash);
1001                                                 lookup_table_insert(table, ads_lte);
1002                                         } else {
1003                                                 goto resource_not_found;
1004                                         }
1005                                 }
1006                         }
1007                         ads_ltes[i] = ads_lte;
1008                 }
1009                 inode->i_lte = lte;
1010                 for (u16 i = 0; i < inode->i_num_ads; i++)
1011                         inode->i_ads_entries[i].lte = ads_ltes[i];
1012                 inode->i_resolved = 1;
1013         }
1014         return 0;
1015 resource_not_found:
1016         if (wimlib_print_errors) {
1017                 ERROR("\"%"TS"\": resource not found", inode_first_full_path(inode));
1018                 tfprintf(stderr, T("        SHA-1 message digest of missing resource:\n        "));
1019                 print_hash(hash, stderr);
1020                 tputc(T('\n'), stderr);
1021         }
1022         return WIMLIB_ERR_RESOURCE_NOT_FOUND;
1023 }
1024
1025 void
1026 inode_unresolve_ltes(struct wim_inode *inode)
1027 {
1028         if (inode->i_resolved) {
1029                 if (inode->i_lte)
1030                         copy_hash(inode->i_hash, inode->i_lte->hash);
1031                 else
1032                         zero_out_hash(inode->i_hash);
1033
1034                 for (u16 i = 0; i < inode->i_num_ads; i++) {
1035                         if (inode->i_ads_entries[i].lte)
1036                                 copy_hash(inode->i_ads_entries[i].hash,
1037                                           inode->i_ads_entries[i].lte->hash);
1038                         else
1039                                 zero_out_hash(inode->i_ads_entries[i].hash);
1040                 }
1041                 inode->i_resolved = 0;
1042         }
1043 }
1044
1045 /*
1046  * Returns the lookup table entry for stream @stream_idx of the inode, where
1047  * stream_idx = 0 means the default un-named file stream, and stream_idx >= 1
1048  * corresponds to an alternate data stream.
1049  *
1050  * This works for both resolved and un-resolved inodes.
1051  */
1052 struct wim_lookup_table_entry *
1053 inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx,
1054                  const struct wim_lookup_table *table)
1055 {
1056         if (inode->i_resolved)
1057                 return inode_stream_lte_resolved(inode, stream_idx);
1058         else
1059                 return inode_stream_lte_unresolved(inode, stream_idx, table);
1060 }
1061
1062 struct wim_lookup_table_entry *
1063 inode_unnamed_lte_resolved(const struct wim_inode *inode)
1064 {
1065         wimlib_assert(inode->i_resolved);
1066         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
1067                 if (inode_stream_name_nbytes(inode, i) == 0 &&
1068                     !is_zero_hash(inode_stream_hash_resolved(inode, i)))
1069                 {
1070                         return inode_stream_lte_resolved(inode, i);
1071                 }
1072         }
1073         return NULL;
1074 }
1075
1076 struct wim_lookup_table_entry *
1077 inode_unnamed_lte_unresolved(const struct wim_inode *inode,
1078                              const struct wim_lookup_table *table)
1079 {
1080         wimlib_assert(!inode->i_resolved);
1081         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
1082                 if (inode_stream_name_nbytes(inode, i) == 0 &&
1083                     !is_zero_hash(inode_stream_hash_unresolved(inode, i)))
1084                 {
1085                         return inode_stream_lte_unresolved(inode, i, table);
1086                 }
1087         }
1088         return NULL;
1089 }
1090
1091 /* Return the lookup table entry for the unnamed data stream of an inode, or
1092  * NULL if there is none.
1093  *
1094  * You'd think this would be easier than it actually is, since the unnamed data
1095  * stream should be the one referenced from the inode itself.  Alas, if there
1096  * are named data streams, Microsoft's "imagex.exe" program will put the unnamed
1097  * data stream in one of the alternate data streams instead of inside the WIM
1098  * dentry itself.  So we need to check the alternate data streams too.
1099  *
1100  * Also, note that a dentry may appear to have more than one unnamed stream, but
1101  * if the SHA1 message digest is all 0's then the corresponding stream does not
1102  * really "count" (this is the case for the inode's own file stream when the
1103  * file stream that should be there is actually in one of the alternate stream
1104  * entries.).  This is despite the fact that we may need to extract such a
1105  * missing entry as an empty file or empty named data stream.
1106  */
1107 struct wim_lookup_table_entry *
1108 inode_unnamed_lte(const struct wim_inode *inode,
1109                   const struct wim_lookup_table *table)
1110 {
1111         if (inode->i_resolved)
1112                 return inode_unnamed_lte_resolved(inode);
1113         else
1114                 return inode_unnamed_lte_unresolved(inode, table);
1115 }
1116
1117 static int
1118 lte_add_stream_size(struct wim_lookup_table_entry *lte, void *total_bytes_p)
1119 {
1120         *(u64*)total_bytes_p += lte->resource_entry.size;
1121         return 0;
1122 }
1123
1124 u64
1125 lookup_table_total_stream_size(struct wim_lookup_table *table)
1126 {
1127         u64 total_size = 0;
1128         for_lookup_table_entry(table, lte_add_stream_size, &total_size);
1129         return total_size;
1130 }
1131
1132 struct wim_lookup_table_entry **
1133 retrieve_lte_pointer(struct wim_lookup_table_entry *lte)
1134 {
1135         wimlib_assert(lte->unhashed);
1136         struct wim_inode *inode = lte->back_inode;
1137         u32 stream_id = lte->back_stream_id;
1138         if (stream_id == 0)
1139                 return &inode->i_lte;
1140         else
1141                 for (u16 i = 0; i < inode->i_num_ads; i++)
1142                         if (inode->i_ads_entries[i].stream_id == stream_id)
1143                                 return &inode->i_ads_entries[i].lte;
1144         wimlib_assert(0);
1145         return NULL;
1146 }
1147
1148 /* Calculate the SHA1 message digest of a stream and move it from the list of
1149  * unhashed streams to the stream lookup table, possibly joining it with an
1150  * existing lookup table entry for an identical stream.
1151  *
1152  * @lte:  An unhashed lookup table entry.
1153  * @lookup_table:  Lookup table for the WIM.
1154  * @lte_ret:  On success, write a pointer to the resulting lookup table
1155  *            entry to this location.  This will be the same as @lte
1156  *            if it was inserted into the lookup table, or different if
1157  *            a duplicate stream was found.
1158  *
1159  * Returns 0 on success; nonzero if there is an error reading the stream.
1160  */
1161 int
1162 hash_unhashed_stream(struct wim_lookup_table_entry *lte,
1163                      struct wim_lookup_table *lookup_table,
1164                      struct wim_lookup_table_entry **lte_ret)
1165 {
1166         int ret;
1167         struct wim_lookup_table_entry *duplicate_lte;
1168         struct wim_lookup_table_entry **back_ptr;
1169
1170         wimlib_assert(lte->unhashed);
1171
1172         /* back_ptr must be saved because @back_inode and @back_stream_id are in
1173          * union with the SHA1 message digest and will no longer be valid once
1174          * the SHA1 has been calculated. */
1175         back_ptr = retrieve_lte_pointer(lte);
1176
1177         ret = sha1_resource(lte);
1178         if (ret)
1179                 return ret;
1180
1181         /* Look for a duplicate stream */
1182         duplicate_lte = __lookup_resource(lookup_table, lte->hash);
1183         list_del(&lte->unhashed_list);
1184         if (duplicate_lte) {
1185                 /* We have a duplicate stream.  Transfer the reference counts
1186                  * from this stream to the duplicate, update the reference to
1187                  * this stream (in an inode or ads_entry) to point to the
1188                  * duplicate, then free this stream. */
1189                 wimlib_assert(!(duplicate_lte->unhashed));
1190                 duplicate_lte->refcnt += lte->refcnt;
1191                 duplicate_lte->out_refcnt += lte->refcnt;
1192                 *back_ptr = duplicate_lte;
1193                 free_lookup_table_entry(lte);
1194                 lte = duplicate_lte;
1195         } else {
1196                 /* No duplicate stream, so we need to insert
1197                  * this stream into the lookup table and treat
1198                  * it as a hashed stream. */
1199                 lookup_table_insert(lookup_table, lte);
1200                 lte->unhashed = 0;
1201         }
1202         if (lte_ret)
1203                 *lte_ret = lte;
1204         return 0;
1205 }