d4a1ba4229ddb30b4aa10b7e540cfb003fb19ac6
[wimlib] / src / lookup_table.c
1 /*
2  * lookup_table.c
3  *
4  * Lookup table, implemented as a hash table, that maps dentries to file
5  * resources.
6  */
7
8 /*
9  * Copyright (C) 2012 Eric Biggers
10  *
11  * This file is part of wimlib, a library for working with WIM files.
12  *
13  * wimlib is free software; you can redistribute it and/or modify it under the
14  * terms of the GNU General Public License as published by the Free
15  * Software Foundation; either version 3 of the License, or (at your option)
16  * any later version.
17  *
18  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20  * A PARTICULAR PURPOSE. See the GNU General Public License for more
21  * details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with wimlib; if not, see http://www.gnu.org/licenses/.
25  */
26
27 #include "wimlib_internal.h"
28 #include "lookup_table.h"
29 #include "io.h"
30 #include <errno.h>
31
32 struct lookup_table *new_lookup_table(size_t capacity)
33 {
34         struct lookup_table *table;
35         struct hlist_head *array;
36
37         table = MALLOC(sizeof(struct lookup_table));
38         if (!table)
39                 goto err;
40         array = CALLOC(capacity, sizeof(array[0]));
41         if (!array) {
42                 FREE(table);
43                 goto err;
44         }
45         table->num_entries = 0;
46         table->capacity = capacity;
47         table->array = array;
48         return table;
49 err:
50         ERROR("Failed to allocate memory for lookup table with capacity %zu",
51               capacity);
52         return NULL;
53 }
54
55 struct lookup_table_entry *new_lookup_table_entry()
56 {
57         struct lookup_table_entry *lte;
58         
59         lte = CALLOC(1, sizeof(struct lookup_table_entry));
60         if (!lte) {
61                 ERROR("Out of memory (tried to allocate %zu bytes for "
62                       "lookup table entry)",
63                       sizeof(struct lookup_table_entry));
64                 return NULL;
65         }
66
67         lte->part_number  = 1;
68         lte->refcnt       = 1;
69         INIT_LIST_HEAD(&lte->lte_group_list);
70         return lte;
71 }
72
73 void free_lookup_table_entry(struct lookup_table_entry *lte)
74 {
75         if (lte) {
76 #ifdef WITH_FUSE
77                 if (lte->staging_list.next)
78                         list_del(&lte->staging_list);
79 #endif
80                 switch (lte->resource_location) {
81                 case RESOURCE_IN_STAGING_FILE:
82                 case RESOURCE_IN_ATTACHED_BUFFER:
83                 case RESOURCE_IN_FILE_ON_DISK:
84                         wimlib_assert(((void*)&lte->file_on_disk ==
85                                       (void*)&lte->staging_file_name)
86                                       && ((void*)&lte->file_on_disk ==
87                                       (void*)&lte->attached_buffer));
88                         FREE(lte->file_on_disk);
89                         break;
90 #ifdef WITH_NTFS_3G
91                 case RESOURCE_IN_NTFS_VOLUME:
92                         if (lte->ntfs_loc) {
93                                 FREE(lte->ntfs_loc->path_utf8);
94                                 FREE(lte->ntfs_loc->stream_name_utf16);
95                                 FREE(lte->ntfs_loc);
96                         }
97                         break;
98 #endif
99                 default:
100                         break;
101                 }
102                 FREE(lte->extracted_file);
103                 FREE(lte);
104         }
105 }
106
107 static int do_free_lookup_table_entry(struct lookup_table_entry *entry,
108                                       void *ignore)
109 {
110         free_lookup_table_entry(entry);
111         return 0;
112 }
113
114
115 void free_lookup_table(struct lookup_table *table)
116 {
117         DEBUG2("Freeing lookup table");
118         if (table) {
119                 if (table->array) {
120                         for_lookup_table_entry(table,
121                                                do_free_lookup_table_entry,
122                                                NULL);
123                         FREE(table->array);
124                 }
125                 FREE(table);
126         }
127 }
128
129 /*
130  * Inserts an entry into the lookup table.
131  *
132  * @table:      A pointer to the lookup table.
133  * @entry:      A pointer to the entry to insert.
134  */
135 void lookup_table_insert(struct lookup_table *table, 
136                          struct lookup_table_entry *lte)
137 {
138         size_t i = lte->hash_short % table->capacity;
139         hlist_add_head(&lte->hash_list, &table->array[i]);
140
141         /* XXX Make the table grow when too many entries have been inserted. */
142         table->num_entries++;
143 }
144
145
146
147 /* Decrements the reference count for the lookup table entry @lte.  If its
148  * reference count reaches 0, it is unlinked from the lookup table.  If,
149  * furthermore, the entry has no opened file descriptors associated with it, the
150  * entry is freed.  */
151 struct lookup_table_entry *
152 lte_decrement_refcnt(struct lookup_table_entry *lte, struct lookup_table *table)
153 {
154         if (lte) {
155                 wimlib_assert(lte->refcnt);
156                 if (--lte->refcnt == 0) {
157                         lookup_table_unlink(table, lte);
158                 #ifdef WITH_FUSE
159                         if (lte->num_opened_fds == 0)
160                 #endif
161                         {
162                                 free_lookup_table_entry(lte);
163                                 lte = NULL;
164                         }
165                 }
166         }
167         return lte;
168 }
169
170 /* 
171  * Calls a function on all the entries in the lookup table.  Stop early and
172  * return nonzero if any call to the function returns nonzero.
173  */
174 int for_lookup_table_entry(struct lookup_table *table, 
175                            int (*visitor)(struct lookup_table_entry *, void *),
176                            void *arg)
177 {
178         struct lookup_table_entry *lte;
179         struct hlist_node *pos, *tmp;
180         int ret;
181
182         for (size_t i = 0; i < table->capacity; i++) {
183                 hlist_for_each_entry_safe(lte, pos, tmp, &table->array[i],
184                                           hash_list)
185                 {
186                         ret = visitor(lte, arg);
187                         if (ret != 0)
188                                 return ret;
189                 }
190         }
191         return 0;
192 }
193
194
195 /*
196  * Reads the lookup table from a WIM file.
197  */
198 int read_lookup_table(WIMStruct *w)
199 {
200         u64    num_entries;
201         u8     buf[WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE];
202         int    ret;
203         struct lookup_table *table;
204         struct lookup_table_entry *cur_entry = NULL, *duplicate_entry;
205
206         DEBUG("Reading lookup table: offset %"PRIu64", size %"PRIu64"",
207               w->hdr.lookup_table_res_entry.offset,
208               w->hdr.lookup_table_res_entry.original_size);
209
210         if (fseeko(w->fp, w->hdr.lookup_table_res_entry.offset, SEEK_SET) != 0) {
211                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" to read "
212                                  "lookup table",
213                                  w->hdr.lookup_table_res_entry.offset);
214                 return WIMLIB_ERR_READ;
215         }
216
217         num_entries = w->hdr.lookup_table_res_entry.original_size /
218                       WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE;
219         table = new_lookup_table(num_entries * 2 + 1);
220         if (!table)
221                 return WIMLIB_ERR_NOMEM;
222
223         while (num_entries--) {
224                 const u8 *p;
225
226                 if (fread(buf, 1, sizeof(buf), w->fp) != sizeof(buf)) {
227                         if (feof(w->fp)) {
228                                 ERROR("Unexpected EOF in WIM lookup table!");
229                         } else {
230                                 ERROR_WITH_ERRNO("Error reading WIM lookup "
231                                                  "table");
232                         }
233                         ret = WIMLIB_ERR_READ;
234                         goto out;
235                 }
236                 cur_entry = new_lookup_table_entry();
237                 if (!cur_entry) {
238                         ret = WIMLIB_ERR_NOMEM;
239                         goto out;
240                 }
241                 cur_entry->wim = w;
242                 cur_entry->resource_location = RESOURCE_IN_WIM;
243                          
244                 p = get_resource_entry(buf, &cur_entry->resource_entry);
245                 p = get_u16(p, &cur_entry->part_number);
246                 p = get_u32(p, &cur_entry->refcnt);
247                 p = get_bytes(p, SHA1_HASH_SIZE, cur_entry->hash);
248
249                 if (cur_entry->part_number != w->hdr.part_number) {
250                         ERROR("A lookup table entry in part %hu of the WIM "
251                               "points to part %hu",
252                               w->hdr.part_number, cur_entry->part_number);
253                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
254                         goto out_free_cur_entry;
255                         
256                 }
257
258                 if (is_zero_hash(cur_entry->hash)) {
259                         ERROR("The WIM lookup table contains an entry with a "
260                               "SHA1 message digest of all 0's");
261                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
262                         goto out_free_cur_entry;
263                 }
264
265                 duplicate_entry = __lookup_resource(table, cur_entry->hash);
266                 if (duplicate_entry) {
267                         ERROR("The WIM lookup table contains two entries with the "
268                               "same SHA1 message digest!");
269                         ERROR("The first entry is:");
270                         print_lookup_table_entry(duplicate_entry);
271                         ERROR("The second entry is:");
272                         print_lookup_table_entry(cur_entry);
273                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
274                         goto out_free_cur_entry;
275                 }
276
277                 if (!(cur_entry->resource_entry.flags & WIM_RESHDR_FLAG_COMPRESSED)
278                     && (cur_entry->resource_entry.size !=
279                       cur_entry->resource_entry.original_size))
280                 {
281                         ERROR("Found uncompressed resource with original size "
282                               "not the same as compressed size");
283                         ERROR("The lookup table entry for the resource is as follows:");
284                         print_lookup_table_entry(cur_entry);
285                         ret = WIMLIB_ERR_INVALID_LOOKUP_TABLE_ENTRY;
286                         goto out_free_cur_entry;
287                 }
288                 lookup_table_insert(table, cur_entry);
289
290         }
291         DEBUG("Done reading lookup table.");
292         w->lookup_table = table;
293         return 0;
294 out_free_cur_entry:
295         FREE(cur_entry);
296 out:
297         free_lookup_table(table);
298         return ret;
299 }
300
301
302 /* 
303  * Writes a lookup table entry to the output file.
304  */
305 int write_lookup_table_entry(struct lookup_table_entry *lte, void *__out)
306 {
307         FILE *out;
308         u8 buf[WIM_LOOKUP_TABLE_ENTRY_DISK_SIZE];
309         u8 *p;
310
311         out = __out;
312
313         /* Don't write entries that have not had file resources or metadata
314          * resources written for them. */
315         if (lte->out_refcnt == 0)
316                 return 0;
317
318         if (lte->output_resource_entry.flags & WIM_RESHDR_FLAG_METADATA)
319                 DEBUG("Writing metadata entry at %lu (orig size = %zu)",
320                       ftello(out), lte->output_resource_entry.original_size);
321
322         p = put_resource_entry(buf, &lte->output_resource_entry);
323         p = put_u16(p, lte->part_number);
324         p = put_u32(p, lte->out_refcnt);
325         p = put_bytes(p, SHA1_HASH_SIZE, lte->hash);
326         if (fwrite(buf, 1, sizeof(buf), out) != sizeof(buf)) {
327                 ERROR_WITH_ERRNO("Failed to write lookup table entry");
328                 return WIMLIB_ERR_WRITE;
329         }
330         return 0;
331 }
332
333
334
335 int zero_out_refcnts(struct lookup_table_entry *entry, void *ignore)
336 {
337         entry->out_refcnt = 0;
338         return 0;
339 }
340
341 void print_lookup_table_entry(const struct lookup_table_entry *lte)
342 {
343         if (!lte) {
344                 putchar('\n');
345                 return;
346         }
347         printf("Offset            = %"PRIu64" bytes\n", 
348                lte->resource_entry.offset);
349         printf("Size              = %"PRIu64" bytes\n", 
350                (u64)lte->resource_entry.size);
351         printf("Original size     = %"PRIu64" bytes\n", 
352                lte->resource_entry.original_size);
353         printf("Part Number       = %hu\n", lte->part_number);
354         printf("Reference Count   = %u\n", lte->refcnt);
355         printf("Hash              = 0x");
356         print_hash(lte->hash);
357         putchar('\n');
358         printf("Flags             = ");
359         u8 flags = lte->resource_entry.flags;
360         if (flags & WIM_RESHDR_FLAG_COMPRESSED)
361                 fputs("WIM_RESHDR_FLAG_COMPRESSED, ", stdout);
362         if (flags & WIM_RESHDR_FLAG_FREE)
363                 fputs("WIM_RESHDR_FLAG_FREE, ", stdout);
364         if (flags & WIM_RESHDR_FLAG_METADATA)
365                 fputs("WIM_RESHDR_FLAG_METADATA, ", stdout);
366         if (flags & WIM_RESHDR_FLAG_SPANNED)
367                 fputs("WIM_RESHDR_FLAG_SPANNED, ", stdout);
368         putchar('\n');
369         switch (lte->resource_location) {
370         case RESOURCE_IN_WIM:
371                 if (lte->wim->filename) {
372                         printf("WIM file          = `%s'\n",
373                                lte->wim->filename);
374                 }
375                 break;
376         case RESOURCE_IN_FILE_ON_DISK:
377                 printf("File on Disk      = `%s'\n", lte->file_on_disk);
378                 break;
379         case RESOURCE_IN_STAGING_FILE:
380                 printf("Staging File      = `%s'\n", lte->staging_file_name);
381                 break;
382         default:
383                 break;
384         }
385         putchar('\n');
386 }
387
388 static int do_print_lookup_table_entry(struct lookup_table_entry *lte,
389                                        void *ignore)
390 {
391         print_lookup_table_entry(lte);
392         return 0;
393 }
394
395 /*
396  * Prints the lookup table of a WIM file. 
397  */
398 WIMLIBAPI void wimlib_print_lookup_table(WIMStruct *w)
399 {
400         for_lookup_table_entry(w->lookup_table, 
401                                do_print_lookup_table_entry,
402                                NULL);
403 }
404
405 /* 
406  * Looks up an entry in the lookup table.
407  */
408 struct lookup_table_entry *
409 __lookup_resource(const struct lookup_table *table, const u8 hash[])
410 {
411         size_t i;
412         struct lookup_table_entry *lte;
413         struct hlist_node *pos;
414
415         i = *(size_t*)hash % table->capacity;
416         hlist_for_each_entry(lte, pos, &table->array[i], hash_list)
417                 if (hashes_equal(hash, lte->hash))
418                         return lte;
419         return NULL;
420 }
421
422 /* 
423  * Finds the dentry, lookup table entry, and stream index for a WIM file stream,
424  * given a path name.
425  *
426  * This is only for pre-resolved inodes.
427  */
428 int lookup_resource(WIMStruct *w, const char *path,
429                     int lookup_flags,
430                     struct dentry **dentry_ret,
431                     struct lookup_table_entry **lte_ret,
432                     unsigned *stream_idx_ret)
433 {
434         struct dentry *dentry;
435         struct lookup_table_entry *lte;
436         unsigned stream_idx;
437         const char *stream_name = NULL;
438         struct inode *inode;
439         char *p = NULL;
440
441         if (lookup_flags & LOOKUP_FLAG_ADS_OK) {
442                 stream_name = path_stream_name(path);
443                 if (stream_name) {
444                         p = (char*)stream_name - 1;
445                         *p = '\0';
446                 }
447         }
448
449         dentry = get_dentry(w, path);
450         if (p)
451                 *p = ':';
452         if (!dentry)
453                 return -ENOENT;
454
455         inode = dentry->inode;
456
457         wimlib_assert(inode->resolved);
458
459         if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK)
460               && inode_is_directory(inode))
461                 return -EISDIR;
462
463         lte = inode->lte;
464         stream_idx = 0;
465         if (stream_name) {
466                 size_t stream_name_len = strlen(stream_name);
467                 for (u16 i = 0; i < inode->num_ads; i++) {
468                         if (ads_entry_has_name(inode->ads_entries[i],
469                                                stream_name,
470                                                stream_name_len))
471                         {
472                                 stream_idx = i + 1;
473                                 lte = inode->ads_entries[i]->lte;
474                                 goto out;
475                         }
476                 }
477                 return -ENOENT;
478         }
479 out:
480         if (dentry_ret)
481                 *dentry_ret = dentry;
482         if (lte_ret)
483                 *lte_ret = lte;
484         if (stream_idx_ret)
485                 *stream_idx_ret = stream_idx;
486         return 0;
487 }
488
489 static int inode_resolve_ltes(struct inode *inode, struct lookup_table *table)
490 {
491         struct lookup_table_entry *lte;
492
493         /* Resolve the default file stream */
494         lte = __lookup_resource(table, inode->hash);
495         if (lte)
496                 list_add(&inode->lte_group_list.list, &lte->lte_group_list);
497         else
498                 INIT_LIST_HEAD(&inode->lte_group_list.list);
499         inode->lte = lte;
500         inode->lte_group_list.type = STREAM_TYPE_NORMAL;
501         inode->resolved = true;
502
503         /* Resolve the alternate data streams */
504         for (u16 i = 0; i < inode->num_ads; i++) {
505                 struct ads_entry *cur_entry = inode->ads_entries[i];
506
507                 lte = __lookup_resource(table, cur_entry->hash);
508                 if (lte)
509                         list_add(&cur_entry->lte_group_list.list,
510                                  &lte->lte_group_list);
511                 else
512                         INIT_LIST_HEAD(&cur_entry->lte_group_list.list);
513                 cur_entry->lte = lte;
514                 cur_entry->lte_group_list.type = STREAM_TYPE_ADS;
515         }
516         return 0;
517 }
518
519 /* Resolve a dentry's lookup table entries 
520  *
521  * This replaces the SHA1 hash fields (which are used to lookup an entry in the
522  * lookup table) with pointers directly to the lookup table entries.  A circular
523  * linked list of streams sharing the same lookup table entry is created.
524  *
525  * This function always succeeds; unresolved lookup table entries are given a
526  * NULL pointer.
527  */
528 int dentry_resolve_ltes(struct dentry *dentry, void *table)
529 {
530         if (dentry->inode->resolved)
531                 return 0;
532         else
533                 return inode_resolve_ltes(dentry->inode, table);
534 }
535
536
537
538
539 /* Return the lookup table entry for the unnamed data stream of a inode, or
540  * NULL if there is none.
541  *
542  * You'd think this would be easier than it actually is, since the unnamed data
543  * stream should be the one referenced from the inode itself.  Alas, if there
544  * are named data streams, Microsoft's "imagex.exe" program will put the unnamed
545  * data stream in one of the alternate data streams instead of inside the
546  * inode.  So we need to check the alternate data streams too.
547  *
548  * Also, note that a inode may appear to have than one unnamed stream, but if
549  * the SHA1 message digest is all 0's then the corresponding stream does not
550  * really "count" (this is the case for the inode's own file stream when the
551  * file stream that should be there is actually in one of the alternate stream
552  * entries.).  This is despite the fact that we may need to extract such a
553  * missing entry as an empty file or empty named data stream.
554  */
555 struct lookup_table_entry *
556 inode_unnamed_lte(const struct inode *inode,
557                    const struct lookup_table *table)
558 {
559         if (inode->resolved)
560                 return inode_unnamed_lte_resolved(inode);
561         else
562                 return inode_unnamed_lte_unresolved(inode, table);
563 }
564