]> wimlib.net Git - wimlib/blob - src/lookup_table.c
3a1e2d25fa6b7060504115f6d9d2876d19938d2e
[wimlib] / src / lookup_table.c
1 /*
2  * lookup_table.c
3  *
4  * Lookup table, implemented as a hash table, that maps dentries to file
5  * resources.
6  */
7
8 /*
9  * Copyright (C) 2012 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 #include "wimlib_internal.h"
28 #include "lookup_table.h"
29 #include "io.h"
30 #include <errno.h>
31
32 #ifdef WITH_FUSE
33 #include <unistd.h>
34 #endif
35
36 struct lookup_table *new_lookup_table(size_t capacity)
37 {
38         struct lookup_table *table;
39         struct hlist_head *array;
40
41         table = MALLOC(sizeof(struct lookup_table));
42         if (!table)
43                 goto err;
44         array = CALLOC(capacity, sizeof(array[0]));
45         if (!array) {
46                 FREE(table);
47                 goto err;
48         }
49         table->num_entries = 0;
50         table->capacity = capacity;
51         table->array = array;
52         return table;
53 err:
54         ERROR("Failed to allocate memory for lookup table with capacity %zu",
55               capacity);
56         return NULL;
57 }
58
59 struct lookup_table_entry *new_lookup_table_entry()
60 {
61         struct lookup_table_entry *lte;
62         
63         lte = CALLOC(1, sizeof(struct lookup_table_entry));
64         if (!lte) {
65                 ERROR("Out of memory (tried to allocate %zu bytes for "
66                       "lookup table entry)",
67                       sizeof(struct lookup_table_entry));
68                 return NULL;
69         }
70
71         lte->part_number  = 1;
72         lte->refcnt       = 1;
73         return lte;
74 }
75
76 void free_lookup_table_entry(struct lookup_table_entry *lte)
77 {
78         if (lte) {
79                 switch (lte->resource_location) {
80                 case RESOURCE_IN_STAGING_FILE:
81                 case RESOURCE_IN_ATTACHED_BUFFER:
82                 case RESOURCE_IN_FILE_ON_DISK:
83                         wimlib_assert(((void*)&lte->file_on_disk ==
84                                       (void*)&lte->staging_file_name)
85                                       && ((void*)&lte->file_on_disk ==
86                                       (void*)&lte->attached_buffer));
87                         FREE(lte->file_on_disk);
88                         break;
89 #ifdef WITH_NTFS_3G
90                 case RESOURCE_IN_NTFS_VOLUME:
91                         if (lte->ntfs_loc) {
92                                 FREE(lte->ntfs_loc->path_utf8);
93                                 FREE(lte->ntfs_loc->stream_name_utf16);
94                                 FREE(lte->ntfs_loc);
95                         }
96                         break;
97 #endif
98                 default:
99                         break;
100                 }
101                 FREE(lte->extracted_file);
102                 FREE(lte);
103         }
104 }
105
106 static int do_free_lookup_table_entry(struct lookup_table_entry *entry,
107                                       void *ignore)
108 {
109         free_lookup_table_entry(entry);
110         return 0;
111 }
112
113
114 void free_lookup_table(struct lookup_table *table)
115 {
116         DEBUG2("Freeing lookup table");
117         if (table) {
118                 if (table->array) {
119                         for_lookup_table_entry(table,
120                                                do_free_lookup_table_entry,
121                                                NULL);
122                         FREE(table->array);
123                 }
124                 FREE(table);
125         }
126 }
127
128 /*
129  * Inserts an entry into the lookup table.
130  *
131  * @table:      A pointer to the lookup table.
132  * @entry:      A pointer to the entry to insert.
133  */
134 void lookup_table_insert(struct lookup_table *table, 
135                          struct lookup_table_entry *lte)
136 {
137         size_t i = lte->hash_short % table->capacity;
138         hlist_add_head(&lte->hash_list, &table->array[i]);
139
140         /* XXX Make the table grow when too many entries have been inserted. */
141         table->num_entries++;
142 }
143
144 static void finalize_lte(struct lookup_table_entry *lte)
145 {
146         #ifdef WITH_FUSE
147         if (lte->resource_location == RESOURCE_IN_STAGING_FILE) {
148                 unlink(lte->staging_file_name);
149                 wimlib_assert(lte->staging_list.next);
150                 wimlib_assert(lte->staging_list.prev);
151                 list_del(&lte->staging_list);
152         }
153         #endif
154         free_lookup_table_entry(lte);
155 }
156
157 /* Decrements the reference count for the lookup table entry @lte.  If its
158  * reference count reaches 0, it is unlinked from the lookup table.  If,
159  * furthermore, the entry has no opened file descriptors associated with it, the
160  * entry is freed.  */
161 void lte_decrement_refcnt(struct lookup_table_entry *lte,
162                           struct lookup_table *table)
163 {
164         wimlib_assert(lte);
165         wimlib_assert(lte->refcnt);
166         if (--lte->refcnt == 0) {
167                 lookup_table_unlink(table, lte);
168         #ifdef WITH_FUSE
169                 if (lte->num_opened_fds == 0)
170         #endif
171                         finalize_lte(lte);
172         }
173 }
174
175 #ifdef WITH_FUSE
176 void lte_decrement_num_opened_fds(struct lookup_table_entry *lte,
177                                   struct lookup_table *table)
178 {
179         wimlib_assert(lte);
180         wimlib_assert(lte->num_opened_fds);
181         if (--lte->num_opened_fds == 0 && lte->refcnt == 0)
182                 finalize_lte(lte);
183 }
184 #endif
185
186 /* 
187  * Calls a function on all the entries in the lookup table.  Stop early and
188  * return nonzero if any call to the function returns nonzero.
189  */
190 int for_lookup_table_entry(struct lookup_table *table, 
191                            int (*visitor)(struct lookup_table_entry *, void *),
192                            void *arg)
193 {
194         struct lookup_table_entry *lte;
195         struct hlist_node *pos, *tmp;
196         int ret;
197
198         for (size_t i = 0; i < table->capacity; i++) {
199                 hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i],
200                                           hash_list)
201                 {
202                         ret = visitor(lte, arg);
203                         if (ret != 0)
204                                 return ret;
205                 }
206         }
207         return 0;
208 }
209
210
211 /*
212  * Reads the lookup table from a WIM file.
213  */
214 int read_lookup_table(WIMStruct *w)
215 {
216         u64    num_entries;
217         u8     buf[WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE];
218         int    ret;
219         struct lookup_table *table;
220         struct lookup_table_entry *cur_entry = NULL, *duplicate_entry;
221
222         DEBUG("Reading lookup table: offset %"PRIu64", size %"PRIu64"",
223               w->hdr.lookup_table_res_entry.offset,
224               w->hdr.lookup_table_res_entry.original_size);
225
226         if (fseeko(w->fp, w->hdr.lookup_table_res_entry.offset, SEEK_SET) != 0) {
227                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" to read "
228                                  "lookup table",
229                                  w->hdr.lookup_table_res_entry.offset);
230                 return WIMLIB_ERR_READ;
231         }
232
233         num_entries = w->hdr.lookup_table_res_entry.original_size /
234                       WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE;
235         table = new_lookup_table(num_entries * 2 + 1);
236         if (!table)
237                 return WIMLIB_ERR_NOMEM;
238
239         while (num_entries--) {
240                 const u8 *p;
241
242                 if (fread(buf, 1, sizeof(buf), w->fp) != sizeof(buf)) {
243                         if (feof(w->fp)) {
244                                 ERROR("Unexpected EOF in WIM lookup table!");
245                         } else {
246                                 ERROR_WITH_ERRNO("Error reading WIM lookup "
247                                                  "table");
248                         }
249                         ret = WIMLIB_ERR_READ;
250                         goto out;
251                 }
252                 cur_entry = new_lookup_table_entry();
253                 if (!cur_entry) {
254                         ret = WIMLIB_ERR_NOMEM;
255                         goto out;
256                 }
257                 cur_entry->wim = w;
258                 cur_entry->resource_location = RESOURCE_IN_WIM;
259                          
260                 p = get_resource_entry(buf, &cur_entry->resource_entry);
261                 p = get_u16(p, &cur_entry->part_number);
262                 p = get_u32(p, &cur_entry->refcnt);
263                 p = get_bytes(p, SHA1_HASH_SIZE, cur_entry->hash);
264
265                 if (cur_entry->part_number != w->hdr.part_number) {
266                         ERROR("A lookup table entry in part %hu of the WIM "
267                               "points to part %hu",
268                               w->hdr.part_number, cur_entry->part_number);
269                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
270                         goto out_free_cur_entry;
271                         
272                 }
273
274                 if (is_zero_hash(cur_entry->hash)) {
275                         ERROR("The WIM lookup table contains an entry with a "
276                               "SHA1 message digest of all 0's");
277                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
278                         goto out_free_cur_entry;
279                 }
280
281                 duplicate_entry = __lookup_resource(table, cur_entry->hash);
282                 if (duplicate_entry) {
283                         ERROR("The WIM lookup table contains two entries with the "
284                               "same SHA1 message digest!");
285                         ERROR("The first entry is:");
286                         print_lookup_table_entry(duplicate_entry);
287                         ERROR("The second entry is:");
288                         print_lookup_table_entry(cur_entry);
289                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
290                         goto out_free_cur_entry;
291                 }
292
293                 if (!(cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED)
294                     && (cur_entry->resource_entry.size !=
295                       cur_entry->resource_entry.original_size))
296                 {
297                         ERROR("Found uncompressed resource with original size "
298                               "not the same as compressed size");
299                         ERROR("The lookup table entry for the resource is as follows:");
300                         print_lookup_table_entry(cur_entry);
301                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
302                         goto out_free_cur_entry;
303                 }
304                 lookup_table_insert(table, cur_entry);
305
306         }
307         DEBUG("Done reading lookup table.");
308         w->lookup_table = table;
309         return 0;
310 out_free_cur_entry:
311         FREE(cur_entry);
312 out:
313         free_lookup_table(table);
314         return ret;
315 }
316
317
318 /* 
319  * Writes a lookup table entry to the output file.
320  */
321 int write_lookup_table_entry(struct lookup_table_entry *lte, void *__out)
322 {
323         FILE *out;
324         u8 buf[WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE];
325         u8 *p;
326
327         out = __out;
328
329         /* Don't write entries that have not had file resources or metadata
330          * resources written for them. */
331         if (lte->out_refcnt == 0)
332                 return 0;
333
334         if (lte->output_resource_entry.flags & WIM_RESHDR_FLAG_METADATA)
335                 DEBUG("Writing metadata entry at %lu (orig size = %zu)",
336                       ftello(out), lte->output_resource_entry.original_size);
337
338         p = put_resource_entry(buf, &lte->output_resource_entry);
339         p = put_u16(p, lte->part_number);
340         p = put_u32(p, lte->out_refcnt);
341         p = put_bytes(p, SHA1_HASH_SIZE, lte->hash);
342         if (fwrite(buf, 1, sizeof(buf), out) != sizeof(buf)) {
343                 ERROR_WITH_ERRNO("Failed to write lookup table entry");
344                 return WIMLIB_ERR_WRITE;
345         }
346         return 0;
347 }
348
349
350 int lte_zero_real_refcnt(struct lookup_table_entry *lte, void *ignore)
351 {
352         lte->real_refcnt = 0;
353         return 0;
354 }
355
356 int lte_zero_out_refcnt(struct lookup_table_entry *lte, void *ignore)
357 {
358         lte->out_refcnt = 0;
359         return 0;
360 }
361
362 int lte_free_extracted_file(struct lookup_table_entry *lte, void *ignone)
363 {
364         FREE(lte->extracted_file);
365         lte->extracted_file = NULL;
366         return 0;
367 }
368
369 void print_lookup_table_entry(const struct lookup_table_entry *lte)
370 {
371         if (!lte) {
372                 putchar('\n');
373                 return;
374         }
375         printf("Offset            = %"PRIu64" bytes\n", 
376                lte->resource_entry.offset);
377         printf("Size              = %"PRIu64" bytes\n", 
378                (u64)lte->resource_entry.size);
379         printf("Original size     = %"PRIu64" bytes\n", 
380                lte->resource_entry.original_size);
381         printf("Part Number       = %hu\n", lte->part_number);
382         printf("Reference Count   = %u\n", lte->refcnt);
383         printf("Hash              = 0x");
384         print_hash(lte->hash);
385         putchar('\n');
386         printf("Flags             = ");
387         u8 flags = lte->resource_entry.flags;
388         if (flags & WIM_RESHDR_FLAG_COMPRESSED)
389                 fputs("WIM_RESHDR_FLAG_COMPRESSED, ", stdout);
390         if (flags & WIM_RESHDR_FLAG_FREE)
391                 fputs("WIM_RESHDR_FLAG_FREE, ", stdout);
392         if (flags & WIM_RESHDR_FLAG_METADATA)
393                 fputs("WIM_RESHDR_FLAG_METADATA, ", stdout);
394         if (flags & WIM_RESHDR_FLAG_SPANNED)
395                 fputs("WIM_RESHDR_FLAG_SPANNED, ", stdout);
396         putchar('\n');
397         switch (lte->resource_location) {
398         case RESOURCE_IN_WIM:
399                 if (lte->wim->filename) {
400                         printf("WIM file          = `%s'\n",
401                                lte->wim->filename);
402                 }
403                 break;
404         case RESOURCE_IN_FILE_ON_DISK:
405                 printf("File on Disk      = `%s'\n", lte->file_on_disk);
406                 break;
407         case RESOURCE_IN_STAGING_FILE:
408                 printf("Staging File      = `%s'\n", lte->staging_file_name);
409                 break;
410         default:
411                 break;
412         }
413         putchar('\n');
414 }
415
416 static int do_print_lookup_table_entry(struct lookup_table_entry *lte,
417                                        void *ignore)
418 {
419         print_lookup_table_entry(lte);
420         return 0;
421 }
422
423 /*
424  * Prints the lookup table of a WIM file. 
425  */
426 WIMLIBAPI void wimlib_print_lookup_table(WIMStruct *w)
427 {
428         for_lookup_table_entry(w->lookup_table, 
429                                do_print_lookup_table_entry,
430                                NULL);
431 }
432
433 /* 
434  * Looks up an entry in the lookup table.
435  */
436 struct lookup_table_entry *
437 __lookup_resource(const struct lookup_table *table, const u8 hash[])
438 {
439         size_t i;
440         struct lookup_table_entry *lte;
441         struct hlist_node *pos;
442
443         wimlib_assert(table);
444
445         i = *(size_t*)hash % table->capacity;
446         hlist_for_each_entry(lte, pos, &table->array[i], hash_list)
447                 if (hashes_equal(hash, lte->hash))
448                         return lte;
449         return NULL;
450 }
451
452 #ifdef WITH_FUSE
453 /* 
454  * Finds the dentry, lookup table entry, and stream index for a WIM file stream,
455  * given a path name.
456  *
457  * This is only for pre-resolved inodes.
458  */
459 int lookup_resource(WIMStruct *w, const char *path,
460                     int lookup_flags,
461                     struct dentry **dentry_ret,
462                     struct lookup_table_entry **lte_ret,
463                     u16 *stream_idx_ret)
464 {
465         struct dentry *dentry;
466         struct lookup_table_entry *lte;
467         u16 stream_idx;
468         const char *stream_name = NULL;
469         struct inode *inode;
470         char *p = NULL;
471
472         if (lookup_flags & LOOKUP_FLAG_ADS_OK) {
473                 stream_name = path_stream_name(path);
474                 if (stream_name) {
475                         p = (char*)stream_name - 1;
476                         *p = '\0';
477                 }
478         }
479
480         dentry = get_dentry(w, path);
481         if (p)
482                 *p = ':';
483         if (!dentry)
484                 return -ENOENT;
485
486         inode = dentry->d_inode;
487
488         wimlib_assert(inode->resolved);
489
490         if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK)
491               && inode_is_directory(inode))
492                 return -EISDIR;
493
494         if (stream_name) {
495                 struct ads_entry *ads_entry;
496                 u16 ads_idx;
497                 ads_entry = inode_get_ads_entry(inode, stream_name,
498                                                 &ads_idx);
499                 if (ads_entry) {
500                         stream_idx = ads_idx + 1;
501                         lte = ads_entry->lte;
502                         goto out;
503                 } else {
504                         return -ENOENT;
505                 }
506         } else {
507                 lte = inode->lte;
508                 stream_idx = 0;
509         }
510 out:
511         if (dentry_ret)
512                 *dentry_ret = dentry;
513         if (lte_ret)
514                 *lte_ret = lte;
515         if (stream_idx_ret)
516                 *stream_idx_ret = stream_idx;
517         return 0;
518 }
519 #endif
520
521 static void inode_resolve_ltes(struct inode *inode, struct lookup_table *table)
522 {
523         struct lookup_table_entry *lte;
524
525         /* Resolve the default file stream */
526         lte = __lookup_resource(table, inode->hash);
527         inode->lte = lte;
528         inode->resolved = true;
529
530         /* Resolve the alternate data streams */
531         for (u16 i = 0; i < inode->num_ads; i++) {
532                 struct ads_entry *cur_entry = &inode->ads_entries[i];
533                 lte = __lookup_resource(table, cur_entry->hash);
534                 cur_entry->lte = lte;
535         }
536 }
537
538 /* Resolve a dentry's lookup table entries 
539  *
540  * This replaces the SHA1 hash fields (which are used to lookup an entry in the
541  * lookup table) with pointers directly to the lookup table entries.  A circular
542  * linked list of streams sharing the same lookup table entry is created.
543  *
544  * This function always succeeds; unresolved lookup table entries are given a
545  * NULL pointer.
546  */
547 int dentry_resolve_ltes(struct dentry *dentry, void *table)
548 {
549         if (!dentry->d_inode->resolved)
550                 inode_resolve_ltes(dentry->d_inode, table);
551         return 0;
552 }
553
554 /* Return the lookup table entry for the unnamed data stream of an inode, or
555  * NULL if there is none.
556  *
557  * You'd think this would be easier than it actually is, since the unnamed data
558  * stream should be the one referenced from the inode itself.  Alas, if there
559  * are named data streams, Microsoft's "imagex.exe" program will put the unnamed
560  * data stream in one of the alternate data streams instead of inside the WIM
561  * dentry itself.  So we need to check the alternate data streams too.
562  *
563  * Also, note that a dentry may appear to have than one unnamed stream, but if
564  * the SHA1 message digest is all 0's then the corresponding stream does not
565  * really "count" (this is the case for the inode's own file stream when the
566  * file stream that should be there is actually in one of the alternate stream
567  * entries.).  This is despite the fact that we may need to extract such a
568  * missing entry as an empty file or empty named data stream.
569  */
570 struct lookup_table_entry *
571 inode_unnamed_lte(const struct inode *inode,
572                    const struct lookup_table *table)
573 {
574         if (inode->resolved)
575                 return inode_unnamed_lte_resolved(inode);
576         else
577                 return inode_unnamed_lte_unresolved(inode, table);
578 }
579