]> wimlib.net Git - wimlib/blob - src/lookup_table.c
Refactor headers
[wimlib] / src / lookup_table.c
1 /*
2  * lookup_table.c
3  *
4  * Lookup table, implemented as a hash table, that maps SHA1 message digests to
5  * data streams.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 #ifdef HAVE_CONFIG_H
28 #  include "config.h"
29 #endif
30
31 #include "wimlib/buffer_io.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/lookup_table.h"
35 #include "wimlib/metadata.h"
36 #include "wimlib/paths.h"
37 #include "wimlib/resource.h"
38 #include "wimlib/util.h"
39
40 #include <errno.h>
41 #include <stdlib.h>
42 #ifdef WITH_FUSE
43 #  include <unistd.h> /* for unlink() */
44 #endif
45
46 /* Size of each lookup table entry in the WIM file. */
47 #define WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE 50
48
49 struct wim_lookup_table *
50 new_lookup_table(size_t capacity)
51 {
52         struct wim_lookup_table *table;
53         struct hlist_head *array;
54
55         table = CALLOC(1, sizeof(struct wim_lookup_table));
56         if (table) {
57                 array = CALLOC(capacity, sizeof(array[0]));
58                 if (array) {
59                         table->num_entries = 0;
60                         table->capacity = capacity;
61                         table->array = array;
62                 } else {
63                         FREE(table);
64                         table = NULL;
65                         ERROR("Failed to allocate memory for lookup table "
66                               "with capacity %zu", capacity);
67                 }
68         }
69         return table;
70 }
71
72 struct wim_lookup_table_entry *
73 new_lookup_table_entry(void)
74 {
75         struct wim_lookup_table_entry *lte;
76
77         lte = CALLOC(1, sizeof(struct wim_lookup_table_entry));
78         if (lte) {
79                 lte->part_number  = 1;
80                 lte->refcnt       = 1;
81         } else {
82                 ERROR("Out of memory (tried to allocate %zu bytes for "
83                       "lookup table entry)",
84                       sizeof(struct wim_lookup_table_entry));
85         }
86         return lte;
87 }
88
89 struct wim_lookup_table_entry *
90 clone_lookup_table_entry(const struct wim_lookup_table_entry *old)
91 {
92         struct wim_lookup_table_entry *new;
93
94         new = MALLOC(sizeof(*new));
95         if (!new)
96                 return NULL;
97
98         memcpy(new, old, sizeof(*old));
99         new->extracted_file = NULL;
100         switch (new->resource_location) {
101 #ifdef __WIN32__
102         case RESOURCE_WIN32:
103         case RESOURCE_WIN32_ENCRYPTED:
104 #else
105         case RESOURCE_IN_FILE_ON_DISK:
106 #endif
107 #ifdef WITH_FUSE
108         case RESOURCE_IN_STAGING_FILE:
109                 BUILD_BUG_ON((void*)&old->file_on_disk !=
110                              (void*)&old->staging_file_name);
111 #endif
112                 new->file_on_disk = TSTRDUP(old->file_on_disk);
113                 if (!new->file_on_disk)
114                         goto out_free;
115                 break;
116         case RESOURCE_IN_ATTACHED_BUFFER:
117                 new->attached_buffer = MALLOC(wim_resource_size(old));
118                 if (!new->attached_buffer)
119                         goto out_free;
120                 memcpy(new->attached_buffer, old->attached_buffer,
121                        wim_resource_size(old));
122                 break;
123 #ifdef WITH_NTFS_3G
124         case RESOURCE_IN_NTFS_VOLUME:
125                 if (old->ntfs_loc) {
126                         struct ntfs_location *loc;
127                         loc = MALLOC(sizeof(*loc));
128                         if (!loc)
129                                 goto out_free;
130                         memcpy(loc, old->ntfs_loc, sizeof(*loc));
131                         loc->path = NULL;
132                         loc->stream_name = NULL;
133                         new->ntfs_loc = loc;
134                         loc->path = STRDUP(old->ntfs_loc->path);
135                         if (!loc->path)
136                                 goto out_free;
137                         loc->stream_name = MALLOC((loc->stream_name_nchars + 1) * 2);
138                         if (!loc->stream_name)
139                                 goto out_free;
140                         memcpy(loc->stream_name,
141                                old->ntfs_loc->stream_name,
142                                (loc->stream_name_nchars + 1) * 2);
143                 }
144                 break;
145 #endif
146         default:
147                 break;
148         }
149         return new;
150 out_free:
151         free_lookup_table_entry(new);
152         return NULL;
153 }
154
155 void
156 free_lookup_table_entry(struct wim_lookup_table_entry *lte)
157 {
158         if (lte) {
159                 switch (lte->resource_location) {
160         #ifdef __WIN32__
161                 case RESOURCE_WIN32:
162                 case RESOURCE_WIN32_ENCRYPTED:
163         #else
164                 case RESOURCE_IN_FILE_ON_DISK:
165         #endif
166         #ifdef WITH_FUSE
167                 case RESOURCE_IN_STAGING_FILE:
168                         BUILD_BUG_ON((void*)&lte->file_on_disk !=
169                                      (void*)&lte->staging_file_name);
170         #endif
171                 case RESOURCE_IN_ATTACHED_BUFFER:
172                         BUILD_BUG_ON((void*)&lte->file_on_disk !=
173                                      (void*)&lte->attached_buffer);
174                         FREE(lte->file_on_disk);
175                         break;
176 #ifdef WITH_NTFS_3G
177                 case RESOURCE_IN_NTFS_VOLUME:
178                         if (lte->ntfs_loc) {
179                                 FREE(lte->ntfs_loc->path);
180                                 FREE(lte->ntfs_loc->stream_name);
181                                 FREE(lte->ntfs_loc);
182                         }
183                         break;
184 #endif
185                 default:
186                         break;
187                 }
188                 FREE(lte);
189         }
190 }
191
192 static int
193 do_free_lookup_table_entry(struct wim_lookup_table_entry *entry, void *ignore)
194 {
195         free_lookup_table_entry(entry);
196         return 0;
197 }
198
199
200 void
201 free_lookup_table(struct wim_lookup_table *table)
202 {
203         DEBUG2("Freeing lookup table");
204         if (table) {
205                 if (table->array) {
206                         for_lookup_table_entry(table,
207                                                do_free_lookup_table_entry,
208                                                NULL);
209                         FREE(table->array);
210                 }
211                 FREE(table);
212         }
213 }
214
215 /*
216  * Inserts an entry into the lookup table.
217  *
218  * @table:      A pointer to the lookup table.
219  * @lte:        A pointer to the entry to insert.
220  */
221 void
222 lookup_table_insert(struct wim_lookup_table *table,
223                     struct wim_lookup_table_entry *lte)
224 {
225         size_t i = lte->hash_short % table->capacity;
226         hlist_add_head(&lte->hash_list, &table->array[i]);
227
228         /* XXX Make the table grow when too many entries have been inserted. */
229         table->num_entries++;
230 }
231
232 static void
233 finalize_lte(struct wim_lookup_table_entry *lte)
234 {
235         #ifdef WITH_FUSE
236         if (lte->resource_location == RESOURCE_IN_STAGING_FILE) {
237                 unlink(lte->staging_file_name);
238                 list_del(&lte->unhashed_list);
239         }
240         #endif
241         free_lookup_table_entry(lte);
242 }
243
244 /* Decrements the reference count for the lookup table entry @lte.  If its
245  * reference count reaches 0, it is unlinked from the lookup table.  If,
246  * furthermore, the entry has no opened file descriptors associated with it, the
247  * entry is freed.  */
248 void
249 lte_decrement_refcnt(struct wim_lookup_table_entry *lte,
250                      struct wim_lookup_table *table)
251 {
252         wimlib_assert(lte != NULL);
253         wimlib_assert(lte->refcnt != 0);
254         if (--lte->refcnt == 0) {
255                 if (lte->unhashed)
256                         list_del(&lte->unhashed_list);
257                 else
258                         lookup_table_unlink(table, lte);
259         #ifdef WITH_FUSE
260                 if (lte->num_opened_fds == 0)
261         #endif
262                         finalize_lte(lte);
263         }
264 }
265
266 #ifdef WITH_FUSE
267 void
268 lte_decrement_num_opened_fds(struct wim_lookup_table_entry *lte)
269 {
270         if (lte->num_opened_fds != 0)
271                 if (--lte->num_opened_fds == 0 && lte->refcnt == 0)
272                         finalize_lte(lte);
273 }
274 #endif
275
276 /* Calls a function on all the entries in the WIM lookup table.  Stop early and
277  * return nonzero if any call to the function returns nonzero. */
278 int
279 for_lookup_table_entry(struct wim_lookup_table *table,
280                        int (*visitor)(struct wim_lookup_table_entry *, void *),
281                        void *arg)
282 {
283         struct wim_lookup_table_entry *lte;
284         struct hlist_node *pos, *tmp;
285         int ret;
286
287         for (size_t i = 0; i < table->capacity; i++) {
288                 hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i],
289                                           hash_list)
290                 {
291                         wimlib_assert2(!(lte->resource_entry.flags & WIM_RESHDR_FLAG_METADATA));
292                         ret = visitor(lte, arg);
293                         if (ret)
294                                 return ret;
295                 }
296         }
297         return 0;
298 }
299
300 int
301 cmp_streams_by_wim_position(const void *p1, const void *p2)
302 {
303         const struct wim_lookup_table_entry *lte1, *lte2;
304         lte1 = *(const struct wim_lookup_table_entry**)p1;
305         lte2 = *(const struct wim_lookup_table_entry**)p2;
306         if (lte1->resource_entry.offset < lte2->resource_entry.offset)
307                 return -1;
308         else if (lte1->resource_entry.offset > lte2->resource_entry.offset)
309                 return 1;
310         else
311                 return 0;
312 }
313
314
315 static int
316 add_lte_to_array(struct wim_lookup_table_entry *lte,
317                  void *_pp)
318 {
319         struct wim_lookup_table_entry ***pp = _pp;
320         *(*pp)++ = lte;
321         return 0;
322 }
323
324 /* Iterate through the lookup table entries, but first sort them by stream
325  * offset in the WIM.  Caution: this is intended to be used when the stream
326  * offset field has actually been set. */
327 int
328 for_lookup_table_entry_pos_sorted(struct wim_lookup_table *table,
329                                   int (*visitor)(struct wim_lookup_table_entry *,
330                                                  void *),
331                                   void *arg)
332 {
333         struct wim_lookup_table_entry **lte_array, **p;
334         size_t num_streams = table->num_entries;
335         int ret;
336
337         lte_array = MALLOC(num_streams * sizeof(lte_array[0]));
338         if (!lte_array)
339                 return WIMLIB_ERR_NOMEM;
340         p = lte_array;
341         for_lookup_table_entry(table, add_lte_to_array, &p);
342
343         wimlib_assert(p == lte_array + num_streams);
344
345         qsort(lte_array, num_streams, sizeof(lte_array[0]),
346               cmp_streams_by_wim_position);
347         ret = 0;
348         for (size_t i = 0; i < num_streams; i++) {
349                 ret = visitor(lte_array[i], arg);
350                 if (ret)
351                         break;
352         }
353         FREE(lte_array);
354         return ret;
355 }
356
357 /*
358  * Reads the lookup table from a WIM file.
359  *
360  * Saves lookup table entries for non-metadata streams in a hash table, and
361  * saves the metadata entry for each image in a special per-image location (the
362  * image_metadata array).
363  */
364 int
365 read_lookup_table(WIMStruct *w)
366 {
367         int ret;
368         size_t num_entries;
369         struct wim_lookup_table *table;
370         struct wim_lookup_table_entry *cur_entry, *duplicate_entry;
371         u8 table_buf[(BUFFER_SIZE / WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE) *
372                         WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE];
373         const u8 *p;
374         off_t offset;
375         size_t buf_entries_remaining;
376
377         DEBUG("Reading lookup table: offset %"PRIu64", size %"PRIu64"",
378               w->hdr.lookup_table_res_entry.offset,
379               w->hdr.lookup_table_res_entry.original_size);
380
381         if (resource_is_compressed(&w->hdr.lookup_table_res_entry)) {
382                 ERROR("Didn't expect a compressed lookup table!");
383                 ERROR("Ask the author to implement support for this.");
384                 return WIMLIB_ERR_COMPRESSED_LOOKUP_TABLE;
385         }
386
387         num_entries = w->hdr.lookup_table_res_entry.size /
388                       WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE;
389         table = new_lookup_table(num_entries * 2 + 1);
390         if (!table)
391                 return WIMLIB_ERR_NOMEM;
392
393         w->current_image = 0;
394         offset = w->hdr.lookup_table_res_entry.offset;
395         buf_entries_remaining = 0;
396         for (; num_entries != 0; num_entries--, buf_entries_remaining--) {
397                 if (buf_entries_remaining == 0) {
398                         size_t entries_to_read, bytes_to_read;
399
400                         entries_to_read = min(sizeof(table_buf) /
401                                                 WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE,
402                                               num_entries);
403                         bytes_to_read = entries_to_read *
404                                                 WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE;
405                         if (full_pread(w->in_fd, table_buf,
406                                        bytes_to_read, offset) != bytes_to_read)
407                         {
408                                 ERROR_WITH_ERRNO("Error reading lookup table "
409                                                  "(offset=%"PRIu64")", offset);
410                                 ret = WIMLIB_ERR_READ;
411                                 goto out_free_lookup_table;
412                         }
413                         offset += bytes_to_read;
414                         p = table_buf;
415                         buf_entries_remaining = entries_to_read;
416                 }
417                 cur_entry = new_lookup_table_entry();
418                 if (!cur_entry) {
419                         ret = WIMLIB_ERR_NOMEM;
420                         goto out_free_lookup_table;
421                 }
422
423                 cur_entry->wim = w;
424                 cur_entry->resource_location = RESOURCE_IN_WIM;
425                 p = get_resource_entry(p, &cur_entry->resource_entry);
426                 p = get_u16(p, &cur_entry->part_number);
427                 p = get_u32(p, &cur_entry->refcnt);
428                 p = get_bytes(p, SHA1_HASH_SIZE, cur_entry->hash);
429
430                 if (cur_entry->part_number != w->hdr.part_number) {
431                         ERROR("A lookup table entry in part %hu of the WIM "
432                               "points to part %hu",
433                               w->hdr.part_number, cur_entry->part_number);
434                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
435                         goto out_free_cur_entry;
436                 }
437
438                 if (is_zero_hash(cur_entry->hash)) {
439                         ERROR("The WIM lookup table contains an entry with a "
440                               "SHA1 message digest of all 0's");
441                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
442                         goto out_free_cur_entry;
443                 }
444
445                 if (!(cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED)
446                     && (cur_entry->resource_entry.size !=
447                         cur_entry->resource_entry.original_size))
448                 {
449                 #ifdef ENABLE_ERROR_MESSAGES
450                         ERROR("Found uncompressed resource with original size "
451                               "not the same as compressed size");
452                         ERROR("The lookup table entry for the resource is as follows:");
453                         print_lookup_table_entry(cur_entry, stderr);
454                 #endif
455                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
456                         goto out_free_cur_entry;
457                 }
458
459                 if (cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_METADATA) {
460                         /* Lookup table entry for a metadata resource */
461                         if (cur_entry->refcnt != 1) {
462                         #ifdef ENABLE_ERROR_MESSAGES
463                                 ERROR("Found metadata resource with refcnt != 1:");
464                                 print_lookup_table_entry(cur_entry, stderr);
465                         #endif
466                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
467                                 goto out_free_cur_entry;
468                         }
469
470                         if (w->hdr.part_number != 1) {
471                                 ERROR("Found a metadata resource in a "
472                                       "non-first part of the split WIM!");
473                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
474                                 goto out_free_cur_entry;
475                         }
476                         if (w->current_image == w->hdr.image_count) {
477                                 ERROR("The WIM header says there are %u images "
478                                       "in the WIM, but we found more metadata "
479                                       "resources than this", w->hdr.image_count);
480                                 ret = WIMLIB_ERR_IMAGE_COUNT;
481                                 goto out_free_cur_entry;
482                         }
483
484                         /* Notice very carefully:  We are assigning the metadata
485                          * resources in the exact order mirrored by their lookup
486                          * table entries on disk, which is the behavior of
487                          * Microsoft's software.  In particular, this overrides
488                          * the actual locations of the metadata resources
489                          * themselves in the WIM file as well as any information
490                          * written in the XML data. */
491                         DEBUG("Found metadata resource for image %u at "
492                               "offset %"PRIu64".",
493                               w->current_image + 1,
494                               cur_entry->resource_entry.offset);
495                         w->image_metadata[
496                                 w->current_image++]->metadata_lte = cur_entry;
497                 } else {
498                         /* Lookup table entry for a stream that is not a
499                          * metadata resource */
500                         duplicate_entry = __lookup_resource(table, cur_entry->hash);
501                         if (duplicate_entry) {
502                         #ifdef ENABLE_ERROR_MESSAGES
503                                 ERROR("The WIM lookup table contains two entries with the "
504                                       "same SHA1 message digest!");
505                                 ERROR("The first entry is:");
506                                 print_lookup_table_entry(duplicate_entry, stderr);
507                                 ERROR("The second entry is:");
508                                 print_lookup_table_entry(cur_entry, stderr);
509                         #endif
510                                 ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
511                                 goto out_free_cur_entry;
512                         }
513                         lookup_table_insert(table, cur_entry);
514                 }
515         }
516
517         if (w->hdr.part_number == 1 && w->current_image != w->hdr.image_count)
518         {
519                 ERROR("The WIM header says there are %u images "
520                       "in the WIM, but we only found %d metadata "
521                       "resources!", w->hdr.image_count, w->current_image);
522                 ret = WIMLIB_ERR_IMAGE_COUNT;
523                 goto out_free_lookup_table;
524         }
525         DEBUG("Done reading lookup table.");
526         w->lookup_table = table;
527         ret = 0;
528         goto out;
529 out_free_cur_entry:
530         FREE(cur_entry);
531 out_free_lookup_table:
532         free_lookup_table(table);
533 out:
534         w->current_image = 0;
535         return ret;
536 }
537
538
539 static u8 *
540 write_lookup_table_entry(const struct wim_lookup_table_entry *lte, u8 *buf_p)
541 {
542         buf_p = put_resource_entry(buf_p, &lte->output_resource_entry);
543         buf_p = put_u16(buf_p, lte->part_number);
544         buf_p = put_u32(buf_p, lte->out_refcnt);
545         buf_p = put_bytes(buf_p, SHA1_HASH_SIZE, lte->hash);
546         return buf_p;
547 }
548
549 int
550 write_lookup_table_from_stream_list(struct list_head *stream_list,
551                                     int out_fd,
552                                     struct resource_entry *out_res_entry)
553 {
554         int ret;
555         off_t start_offset;
556         u8 table_buf[(BUFFER_SIZE / WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE) *
557                         WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE];
558         u8 *buf_p;
559         size_t table_size;
560         size_t bytes_to_write;
561         struct wim_lookup_table_entry *lte;
562
563         start_offset = filedes_offset(out_fd);
564         if (start_offset == -1)
565                 goto write_error;
566
567         buf_p = table_buf;
568         table_size = 0;
569         list_for_each_entry(lte, stream_list, lookup_table_list) {
570                 if (buf_p == table_buf + sizeof(table_buf)) {
571                         bytes_to_write = sizeof(table_buf);
572                         if (full_write(out_fd, table_buf,
573                                        bytes_to_write) != bytes_to_write)
574                                 goto write_error;
575                         table_size += bytes_to_write;
576                         buf_p = table_buf;
577                 }
578                 buf_p = write_lookup_table_entry(lte, buf_p);
579         }
580         bytes_to_write = buf_p - table_buf;
581         if (bytes_to_write != 0) {
582                 if (full_write(out_fd, table_buf,
583                                bytes_to_write) != bytes_to_write)
584                         goto write_error;
585                 table_size += bytes_to_write;
586         }
587         out_res_entry->offset        = start_offset;
588         out_res_entry->size          = table_size;
589         out_res_entry->original_size = table_size;
590         out_res_entry->flags         = WIM_RESHDR_FLAG_METADATA;
591         ret = 0;
592 out:
593         return ret;
594 write_error:
595         ERROR_WITH_ERRNO("Failed to write lookup table");
596         ret = WIMLIB_ERR_WRITE;
597         goto out;
598 }
599
600 static int
601 append_lookup_table_entry(struct wim_lookup_table_entry *lte, void *_list)
602 {
603         if (lte->out_refcnt != 0)
604                 list_add_tail(&lte->lookup_table_list, (struct list_head*)_list);
605         return 0;
606 }
607
608 /* Writes the WIM lookup table to the output file. */
609 int
610 write_lookup_table(WIMStruct *w, int image, struct resource_entry *out_res_entry)
611 {
612         LIST_HEAD(stream_list);
613         int start_image;
614         int end_image;
615
616         if (image == WIMLIB_ALL_IMAGES) {
617                 start_image = 1;
618                 end_image = w->hdr.image_count;
619         } else {
620                 start_image = image;
621                 end_image = image;
622         }
623
624         for (int i = start_image; i <= end_image; i++) {
625                 struct wim_lookup_table_entry *metadata_lte;
626
627                 metadata_lte = w->image_metadata[i - 1]->metadata_lte;
628                 metadata_lte->out_refcnt = 1;
629                 metadata_lte->output_resource_entry.flags |= WIM_RESHDR_FLAG_METADATA;
630                 append_lookup_table_entry(metadata_lte, &stream_list);
631         }
632         for_lookup_table_entry(w->lookup_table,
633                                append_lookup_table_entry,
634                                &stream_list);
635         return write_lookup_table_from_stream_list(&stream_list,
636                                                    w->out_fd,
637                                                    out_res_entry);
638 }
639
640 int
641 lte_zero_real_refcnt(struct wim_lookup_table_entry *lte, void *_ignore)
642 {
643         lte->real_refcnt = 0;
644         return 0;
645 }
646
647 int
648 lte_zero_out_refcnt(struct wim_lookup_table_entry *lte, void *_ignore)
649 {
650         lte->out_refcnt = 0;
651         return 0;
652 }
653
654 int
655 lte_free_extracted_file(struct wim_lookup_table_entry *lte, void *_ignore)
656 {
657         if (lte->extracted_file != NULL) {
658                 FREE(lte->extracted_file);
659                 lte->extracted_file = NULL;
660         }
661         return 0;
662 }
663
664 void
665 print_lookup_table_entry(const struct wim_lookup_table_entry *lte, FILE *out)
666 {
667         if (!lte) {
668                 tputc(T('\n'), out);
669                 return;
670         }
671         tfprintf(out, T("Offset            = %"PRIu64" bytes\n"),
672                  lte->resource_entry.offset);
673
674         tfprintf(out, T("Size              = %"PRIu64" bytes\n"),
675                  (u64)lte->resource_entry.size);
676
677         tfprintf(out, T("Original size     = %"PRIu64" bytes\n"),
678                  lte->resource_entry.original_size);
679
680         tfprintf(out, T("Part Number       = %hu\n"), lte->part_number);
681         tfprintf(out, T("Reference Count   = %u\n"), lte->refcnt);
682
683         if (lte->unhashed) {
684                 tfprintf(out, T("(Unhashed: inode %p, stream_id = %u)\n"),
685                          lte->back_inode, lte->back_stream_id);
686         } else {
687                 tfprintf(out, T("Hash              = 0x"));
688                 print_hash(lte->hash, out);
689                 tputc(T('\n'), out);
690         }
691
692         tfprintf(out, T("Flags             = "));
693         u8 flags = lte->resource_entry.flags;
694         if (flags & WIM_RESHDR_FLAG_COMPRESSED)
695                 tfputs(T("WIM_RESHDR_FLAG_COMPRESSED, "), out);
696         if (flags & WIM_RESHDR_FLAG_FREE)
697                 tfputs(T("WIM_RESHDR_FLAG_FREE, "), out);
698         if (flags & WIM_RESHDR_FLAG_METADATA)
699                 tfputs(T("WIM_RESHDR_FLAG_METADATA, "), out);
700         if (flags & WIM_RESHDR_FLAG_SPANNED)
701                 tfputs(T("WIM_RESHDR_FLAG_SPANNED, "), out);
702         tputc(T('\n'), out);
703         switch (lte->resource_location) {
704         case RESOURCE_IN_WIM:
705                 if (lte->wim->filename) {
706                         tfprintf(out, T("WIM file          = `%"TS"'\n"),
707                                  lte->wim->filename);
708                 }
709                 break;
710 #ifdef __WIN32__
711         case RESOURCE_WIN32:
712         case RESOURCE_WIN32_ENCRYPTED:
713 #else
714         case RESOURCE_IN_FILE_ON_DISK:
715 #endif
716                 tfprintf(out, T("File on Disk      = `%"TS"'\n"),
717                          lte->file_on_disk);
718                 break;
719 #ifdef WITH_FUSE
720         case RESOURCE_IN_STAGING_FILE:
721                 tfprintf(out, T("Staging File      = `%"TS"'\n"),
722                                 lte->staging_file_name);
723                 break;
724 #endif
725         default:
726                 break;
727         }
728         tputc(T('\n'), out);
729 }
730
731 static int
732 do_print_lookup_table_entry(struct wim_lookup_table_entry *lte, void *fp)
733 {
734         print_lookup_table_entry(lte, (FILE*)fp);
735         return 0;
736 }
737
738 /*
739  * Prints the lookup table of a WIM file.
740  */
741 WIMLIBAPI void
742 wimlib_print_lookup_table(WIMStruct *w)
743 {
744         for_lookup_table_entry(w->lookup_table,
745                                do_print_lookup_table_entry,
746                                stdout);
747 }
748
749 /* Given a SHA1 message digest, return the corresponding entry in the WIM's
750  * lookup table, or NULL if there is none.  */
751 struct wim_lookup_table_entry *
752 __lookup_resource(const struct wim_lookup_table *table, const u8 hash[])
753 {
754         size_t i;
755         struct wim_lookup_table_entry *lte;
756         struct hlist_node *pos;
757
758         wimlib_assert(table != NULL);
759         wimlib_assert(hash != NULL);
760
761         i = *(size_t*)hash % table->capacity;
762         hlist_for_each_entry(lte, pos, &table->array[i], hash_list)
763                 if (hashes_equal(hash, lte->hash))
764                         return lte;
765         return NULL;
766 }
767
768 #ifdef WITH_FUSE
769 /*
770  * Finds the dentry, lookup table entry, and stream index for a WIM file stream,
771  * given a path name.
772  *
773  * This is only for pre-resolved inodes.
774  */
775 int
776 lookup_resource(WIMStruct *w,
777                 const tchar *path,
778                 int lookup_flags,
779                 struct wim_dentry **dentry_ret,
780                 struct wim_lookup_table_entry **lte_ret,
781                 u16 *stream_idx_ret)
782 {
783         struct wim_dentry *dentry;
784         struct wim_lookup_table_entry *lte;
785         u16 stream_idx;
786         const tchar *stream_name = NULL;
787         struct wim_inode *inode;
788         tchar *p = NULL;
789
790         if (lookup_flags & LOOKUP_FLAG_ADS_OK) {
791                 stream_name = path_stream_name(path);
792                 if (stream_name) {
793                         p = (tchar*)stream_name - 1;
794                         *p = T('\0');
795                 }
796         }
797
798         dentry = get_dentry(w, path);
799         if (p)
800                 *p = T(':');
801         if (!dentry)
802                 return -errno;
803
804         inode = dentry->d_inode;
805
806         wimlib_assert(inode->i_resolved);
807
808         if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK)
809               && inode_is_directory(inode))
810                 return -EISDIR;
811
812         if (stream_name) {
813                 struct wim_ads_entry *ads_entry;
814                 u16 ads_idx;
815                 ads_entry = inode_get_ads_entry(inode, stream_name,
816                                                 &ads_idx);
817                 if (ads_entry) {
818                         stream_idx = ads_idx + 1;
819                         lte = ads_entry->lte;
820                         goto out;
821                 } else {
822                         return -ENOENT;
823                 }
824         } else {
825                 lte = inode->i_lte;
826                 stream_idx = 0;
827         }
828 out:
829         if (dentry_ret)
830                 *dentry_ret = dentry;
831         if (lte_ret)
832                 *lte_ret = lte;
833         if (stream_idx_ret)
834                 *stream_idx_ret = stream_idx;
835         return 0;
836 }
837 #endif
838
839 /*
840  * XXX Probably should store the compression type directly in the lookup table
841  * entry
842  */
843 int
844 wim_resource_compression_type(const struct wim_lookup_table_entry *lte)
845 {
846         if (!(lte->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED)
847             || lte->resource_location != RESOURCE_IN_WIM)
848                 return WIMLIB_COMPRESSION_TYPE_NONE;
849         return wimlib_get_compression_type(lte->wim);
850 }
851
852 /* Resolve an inode's lookup table entries
853  *
854  * This replaces the SHA1 hash fields (which are used to lookup an entry in the
855  * lookup table) with pointers directly to the lookup table entries.  A circular
856  * linked list of streams sharing the same lookup table entry is created.
857  *
858  * This function always succeeds; unresolved lookup table entries are given a
859  * NULL pointer.
860  */
861 void
862 inode_resolve_ltes(struct wim_inode *inode, struct wim_lookup_table *table)
863 {
864
865         if (!inode->i_resolved) {
866                 struct wim_lookup_table_entry *lte;
867                 /* Resolve the default file stream */
868                 lte = __lookup_resource(table, inode->i_hash);
869                 inode->i_lte = lte;
870                 inode->i_resolved = 1;
871
872                 /* Resolve the alternate data streams */
873                 for (u16 i = 0; i < inode->i_num_ads; i++) {
874                         struct wim_ads_entry *cur_entry = &inode->i_ads_entries[i];
875                         lte = __lookup_resource(table, cur_entry->hash);
876                         cur_entry->lte = lte;
877                 }
878         }
879 }
880
881 void
882 inode_unresolve_ltes(struct wim_inode *inode)
883 {
884         if (inode->i_resolved) {
885                 if (inode->i_lte)
886                         copy_hash(inode->i_hash, inode->i_lte->hash);
887                 else
888                         zero_out_hash(inode->i_hash);
889
890                 for (u16 i = 0; i < inode->i_num_ads; i++) {
891                         if (inode->i_ads_entries[i].lte)
892                                 copy_hash(inode->i_ads_entries[i].hash,
893                                           inode->i_ads_entries[i].lte->hash);
894                         else
895                                 zero_out_hash(inode->i_ads_entries[i].hash);
896                 }
897                 inode->i_resolved = 0;
898         }
899 }
900
901 /*
902  * Returns the lookup table entry for stream @stream_idx of the inode, where
903  * stream_idx = 0 means the default un-named file stream, and stream_idx >= 1
904  * corresponds to an alternate data stream.
905  *
906  * This works for both resolved and un-resolved inodes.
907  */
908 struct wim_lookup_table_entry *
909 inode_stream_lte(const struct wim_inode *inode, unsigned stream_idx,
910                  const struct wim_lookup_table *table)
911 {
912         if (inode->i_resolved)
913                 return inode_stream_lte_resolved(inode, stream_idx);
914         else
915                 return inode_stream_lte_unresolved(inode, stream_idx, table);
916 }
917
918 struct wim_lookup_table_entry *
919 inode_unnamed_lte_resolved(const struct wim_inode *inode)
920 {
921         wimlib_assert(inode->i_resolved);
922         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
923                 if (inode_stream_name_nbytes(inode, i) == 0 &&
924                     !is_zero_hash(inode_stream_hash_resolved(inode, i)))
925                 {
926                         return inode_stream_lte_resolved(inode, i);
927                 }
928         }
929         return NULL;
930 }
931
932 struct wim_lookup_table_entry *
933 inode_unnamed_lte_unresolved(const struct wim_inode *inode,
934                              const struct wim_lookup_table *table)
935 {
936         wimlib_assert(!inode->i_resolved);
937         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
938                 if (inode_stream_name_nbytes(inode, i) == 0 &&
939                     !is_zero_hash(inode_stream_hash_unresolved(inode, i)))
940                 {
941                         return inode_stream_lte_unresolved(inode, i, table);
942                 }
943         }
944         return NULL;
945 }
946
947 /* Return the lookup table entry for the unnamed data stream of an inode, or
948  * NULL if there is none.
949  *
950  * You'd think this would be easier than it actually is, since the unnamed data
951  * stream should be the one referenced from the inode itself.  Alas, if there
952  * are named data streams, Microsoft's "imagex.exe" program will put the unnamed
953  * data stream in one of the alternate data streams instead of inside the WIM
954  * dentry itself.  So we need to check the alternate data streams too.
955  *
956  * Also, note that a dentry may appear to have more than one unnamed stream, but
957  * if the SHA1 message digest is all 0's then the corresponding stream does not
958  * really "count" (this is the case for the inode's own file stream when the
959  * file stream that should be there is actually in one of the alternate stream
960  * entries.).  This is despite the fact that we may need to extract such a
961  * missing entry as an empty file or empty named data stream.
962  */
963 struct wim_lookup_table_entry *
964 inode_unnamed_lte(const struct wim_inode *inode,
965                   const struct wim_lookup_table *table)
966 {
967         if (inode->i_resolved)
968                 return inode_unnamed_lte_resolved(inode);
969         else
970                 return inode_unnamed_lte_unresolved(inode, table);
971 }
972
973 static int
974 lte_add_stream_size(struct wim_lookup_table_entry *lte, void *total_bytes_p)
975 {
976         *(u64*)total_bytes_p += lte->resource_entry.size;
977         return 0;
978 }
979
980 u64
981 lookup_table_total_stream_size(struct wim_lookup_table *table)
982 {
983         u64 total_size = 0;
984         for_lookup_table_entry(table, lte_add_stream_size, &total_size);
985         return total_size;
986 }
987
988 struct wim_lookup_table_entry **
989 retrieve_lte_pointer(struct wim_lookup_table_entry *lte)
990 {
991         wimlib_assert(lte->unhashed);
992         struct wim_inode *inode = lte->back_inode;
993         u32 stream_id = lte->back_stream_id;
994         if (stream_id == 0)
995                 return &inode->i_lte;
996         else
997                 for (u16 i = 0; i < inode->i_num_ads; i++)
998                         if (inode->i_ads_entries[i].stream_id == stream_id)
999                                 return &inode->i_ads_entries[i].lte;
1000         wimlib_assert(0);
1001         return NULL;
1002 }
1003
1004 /* Calculate the SHA1 message digest of a stream and move it from the list of
1005  * unhashed streams to the stream lookup table, possibly joining it with an
1006  * existing lookup table entry for an identical stream.
1007  *
1008  * @lte:  An unhashed lookup table entry.
1009  * @lookup_table:  Lookup table for the WIM.
1010  * @lte_ret:  On success, write a pointer to the resulting lookup table
1011  *            entry to this location.  This will be the same as @lte
1012  *            if it was inserted into the lookup table, or different if
1013  *            a duplicate stream was found.
1014  *
1015  * Returns 0 on success; nonzero if there is an error reading the stream.
1016  */
1017 int
1018 hash_unhashed_stream(struct wim_lookup_table_entry *lte,
1019                      struct wim_lookup_table *lookup_table,
1020                      struct wim_lookup_table_entry **lte_ret)
1021 {
1022         int ret;
1023         struct wim_lookup_table_entry *duplicate_lte;
1024         struct wim_lookup_table_entry **back_ptr;
1025
1026         wimlib_assert(lte->unhashed);
1027
1028         /* back_ptr must be saved because @back_inode and @back_stream_id are in
1029          * union with the SHA1 message digest and will no longer be valid once
1030          * the SHA1 has been calculated. */
1031         back_ptr = retrieve_lte_pointer(lte);
1032
1033         ret = sha1_resource(lte);
1034         if (ret)
1035                 return ret;
1036
1037         /* Look for a duplicate stream */
1038         duplicate_lte = __lookup_resource(lookup_table, lte->hash);
1039         list_del(&lte->unhashed_list);
1040         if (duplicate_lte) {
1041                 /* We have a duplicate stream.  Transfer the reference counts
1042                  * from this stream to the duplicate, update the reference to
1043                  * this stream (in an inode or ads_entry) to point to the
1044                  * duplicate, then free this stream. */
1045                 wimlib_assert(!(duplicate_lte->unhashed));
1046                 duplicate_lte->refcnt += lte->refcnt;
1047                 duplicate_lte->out_refcnt += lte->refcnt;
1048                 *back_ptr = duplicate_lte;
1049                 free_lookup_table_entry(lte);
1050                 lte = duplicate_lte;
1051         } else {
1052                 /* No duplicate stream, so we need to insert
1053                  * this stream into the lookup table and treat
1054                  * it as a hashed stream. */
1055                 lookup_table_insert(lookup_table, lte);
1056                 lte->unhashed = 0;
1057         }
1058         if (lte_ret)
1059                 *lte_ret = lte;
1060         return 0;
1061 }
1062