]> wimlib.net Git - wimlib/blob - src/solid.c
mount_image.c: add fallback definitions of RENAME_* constants
[wimlib] / src / solid.c
1 /*
2  * solid.c
3  *
4  * Heuristic sorting of blobs to optimize solid compression.
5  */
6
7 /*
8  * Copyright (C) 2015 Eric Biggers
9  *
10  * This file is free software; you can redistribute it and/or modify it under
11  * the terms of the GNU Lesser General Public License as published by the Free
12  * Software Foundation; either version 3 of the License, or (at your option) any
13  * later version.
14  *
15  * This file is distributed in the hope that it will be useful, but WITHOUT
16  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU Lesser General Public License
21  * along with this file; if not, see https://www.gnu.org/licenses/.
22  */
23
24 #ifdef HAVE_CONFIG_H
25 #  include "config.h"
26 #endif
27
28 #include "wimlib/blob_table.h"
29 #include "wimlib/dentry.h"
30 #include "wimlib/encoding.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/metadata.h"
33 #include "wimlib/paths.h"
34 #include "wimlib/solid.h"
35 #include "wimlib/unaligned.h"
36
37 static const utf16lechar *
38 get_extension(const utf16lechar *name, size_t nbytes)
39 {
40         const utf16lechar *p = name + (nbytes / sizeof(utf16lechar));
41         for (;;) {
42                 if (p == name)
43                         return NULL;
44                 if (*(p - 1) == cpu_to_le16('/') || *(p - 1) == cpu_to_le16('\\'))
45                         return NULL;
46                 if (*(p - 1) == cpu_to_le16('.'))
47                         return p;
48                 p--;
49         }
50 }
51
52 /*
53  * Sort order for solid compression:
54  *
55  * 1. Blobs without sort names
56  *      - sorted by sequential order
57  * 2. Blobs with sort names:
58  *    a. Blobs whose sort name does not have an extension
59  *        - sorted by sort name
60  *    b. Blobs whose sort name has an extension
61  *        - sorted primarily by extension (case insensitive),
62  *          secondarily by sort name (case insensitive)
63  */
64 static int
65 cmp_blobs_by_solid_sort_name(const void *p1, const void *p2)
66 {
67         const struct blob_descriptor *blob1, *blob2;
68
69         blob1 = *(const struct blob_descriptor **)p1;
70         blob2 = *(const struct blob_descriptor **)p2;
71
72         if (blob1->solid_sort_name) {
73                 if (!blob2->solid_sort_name)
74                         return 1;
75                 const utf16lechar *extension1 = get_extension(blob1->solid_sort_name,
76                                                               blob1->solid_sort_name_nbytes);
77                 const utf16lechar *extension2 = get_extension(blob2->solid_sort_name,
78                                                               blob2->solid_sort_name_nbytes);
79                 if (extension1) {
80                         if (!extension2)
81                                 return 1;
82                         int res = cmp_utf16le_strings_z(extension1,
83                                                         extension2,
84                                                         true); /* case insensitive */
85                         if (res)
86                                 return res;
87                 } else {
88                         if (extension2)
89                                 return -1;
90                 }
91                 int res = cmp_utf16le_strings(blob1->solid_sort_name,
92                                               blob1->solid_sort_name_nbytes / sizeof(utf16lechar),
93                                               blob2->solid_sort_name,
94                                               blob2->solid_sort_name_nbytes / sizeof(utf16lechar),
95                                               true); /* case insensitive */
96                 if (res)
97                         return res;
98         } else {
99                 if (blob2->solid_sort_name)
100                         return -1;
101         }
102         return cmp_blobs_by_sequential_order(p1, p2);
103 }
104
105 static void
106 blob_set_solid_sort_name_from_inode(struct blob_descriptor *blob,
107                                     const struct wim_inode *inode)
108 {
109         const struct wim_dentry *dentry;
110         const utf16lechar *best_name = NULL;
111         size_t best_name_nbytes = SIZE_MAX;
112
113         if (blob->solid_sort_name) /* Sort name already set?  */
114                 return;
115
116         /* If this file has multiple names, choose the shortest one.  */
117         inode_for_each_dentry(dentry, inode) {
118                 if (dentry->d_name_nbytes < best_name_nbytes) {
119                         best_name = dentry->d_name;
120                         best_name_nbytes = dentry->d_name_nbytes;
121                 }
122         }
123         blob->solid_sort_name = utf16le_dupz(best_name, best_name_nbytes);
124         blob->solid_sort_name_nbytes = best_name_nbytes;
125 }
126
127 struct temp_blob_table {
128         struct hlist_head *table;
129         size_t capacity;
130 };
131
132 static int
133 dentry_fill_in_solid_sort_names(struct wim_dentry *dentry, void *_blob_table)
134 {
135         const struct temp_blob_table *blob_table = _blob_table;
136         const struct wim_inode *inode = dentry->d_inode;
137         const u8 *hash;
138         struct hlist_head *head;
139         struct blob_descriptor *blob;
140
141         hash = inode_get_hash_of_unnamed_data_stream(inode);
142         if (!hash) /* unhashed? */
143                 return 0;
144         head = &blob_table->table[load_size_t_unaligned(hash) %
145                                   blob_table->capacity];
146         hlist_for_each_entry(blob, head, hash_list_2) {
147                 if (hashes_equal(hash, blob->hash)) {
148                         blob_set_solid_sort_name_from_inode(blob, inode);
149                         break;
150                 }
151         }
152         return 0;
153 }
154
155 static int
156 image_fill_in_solid_sort_names(WIMStruct *wim)
157 {
158         return for_dentry_in_tree(wim_get_current_root_dentry(wim),
159                                   dentry_fill_in_solid_sort_names,
160                                   wim->private);
161 }
162
163 int
164 sort_blob_list_for_solid_compression(struct list_head *blob_list)
165 {
166         size_t num_blobs = 0;
167         struct temp_blob_table blob_table;
168         WIMStruct *wims[128];
169         int num_wims = 0;
170         struct blob_descriptor *blob;
171         int ret;
172
173         /* Count the number of blobs to be written.  */
174         list_for_each_entry(blob, blob_list, write_blobs_list)
175                 num_blobs++;
176
177         /* Allocate a temporary hash table for mapping blob hash => blob  */
178         blob_table.capacity = num_blobs;
179         blob_table.table = CALLOC(blob_table.capacity,
180                                   sizeof(blob_table.table[0]));
181         if (!blob_table.table)
182                 return WIMLIB_ERR_NOMEM;
183
184         /*
185          * For each blob to be written:
186          * - Reset the sort name
187          * - If it's in non-solid WIM resource, then save the WIMStruct.
188          * - If it's in a file on disk, then set its sort name from that.
189          */
190         list_for_each_entry(blob, blob_list, write_blobs_list) {
191                 blob->solid_sort_name = NULL;
192                 blob->solid_sort_name_nbytes = 0;
193                 switch (blob->blob_location) {
194                 case BLOB_IN_WIM:
195                         if (blob->size != blob->rdesc->uncompressed_size)
196                                 continue;
197                         for (int i = 0; i < num_wims; i++)
198                                 if (blob->rdesc->wim == wims[i])
199                                         goto found_wim;
200                         if (num_wims >= ARRAY_LEN(wims))
201                                 continue;
202                         wims[num_wims++] = blob->rdesc->wim;
203                 found_wim:
204                         hlist_add_head(&blob->hash_list_2,
205                                        &blob_table.table[load_size_t_unaligned(blob->hash) %
206                                                          blob_table.capacity]);
207                         break;
208                 case BLOB_IN_FILE_ON_DISK:
209         #ifdef _WIN32
210                 case BLOB_IN_WINDOWS_FILE:
211         #endif
212                         blob_set_solid_sort_name_from_inode(blob, blob->file_inode);
213                         break;
214                 default:
215                         break;
216                 }
217         }
218
219         /* For each WIMStruct that was found, search for dentry references to
220          * each blob and fill in the sort name this way.  This is useful e.g.
221          * when exporting a solid WIM file from a non-solid WIM file.  */
222         for (int i = 0; i < num_wims; i++) {
223                 if (!wim_has_metadata(wims[i]))
224                         continue;
225                 wims[i]->private = &blob_table;
226                 ret = for_image(wims[i], WIMLIB_ALL_IMAGES,
227                                 image_fill_in_solid_sort_names);
228                 if (ret)
229                         goto out;
230                 deselect_current_wim_image(wims[i]);
231         }
232
233         ret = sort_blob_list(blob_list,
234                              offsetof(struct blob_descriptor, write_blobs_list),
235                              cmp_blobs_by_solid_sort_name);
236
237 out:
238         list_for_each_entry(blob, blob_list, write_blobs_list)
239                 FREE(blob->solid_sort_name);
240         FREE(blob_table.table);
241         return ret;
242 }