4 * Heuristic sorting of blobs to optimize solid compression.
8 * Copyright (C) 2015 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/blob_table.h"
29 #include "wimlib/dentry.h"
30 #include "wimlib/encoding.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/metadata.h"
33 #include "wimlib/paths.h"
34 #include "wimlib/solid.h"
35 #include "wimlib/unaligned.h"
37 static const utf16lechar *
38 get_extension(const utf16lechar *name, size_t nbytes)
40 const utf16lechar *p = name + (nbytes / sizeof(utf16lechar));
44 if (*(p - 1) == cpu_to_le16('/') || *(p - 1) == cpu_to_le16('\\'))
46 if (*(p - 1) == cpu_to_le16('.'))
53 * Sort order for solid compression:
55 * 1. Blobs without sort names
56 * - sorted by sequential order
57 * 2. Blobs with sort names:
58 * a. Blobs whose sort name does not have an extension
59 * - sorted by sort name
60 * b. Blobs whose sort name has an extension
61 * - sorted primarily by extension (case insensitive),
62 * secondarily by sort name (case insensitive)
65 cmp_blobs_by_solid_sort_name(const void *p1, const void *p2)
67 const struct blob_descriptor *blob1, *blob2;
69 blob1 = *(const struct blob_descriptor **)p1;
70 blob2 = *(const struct blob_descriptor **)p2;
72 if (blob1->solid_sort_name) {
73 if (!blob2->solid_sort_name)
75 const utf16lechar *extension1 = get_extension(blob1->solid_sort_name,
76 blob1->solid_sort_name_nbytes);
77 const utf16lechar *extension2 = get_extension(blob2->solid_sort_name,
78 blob2->solid_sort_name_nbytes);
82 int res = cmp_utf16le_strings_z(extension1,
84 true); /* case insensitive */
91 int res = cmp_utf16le_strings(blob1->solid_sort_name,
92 blob1->solid_sort_name_nbytes / sizeof(utf16lechar),
93 blob2->solid_sort_name,
94 blob2->solid_sort_name_nbytes / sizeof(utf16lechar),
95 true); /* case insensitive */
99 if (blob2->solid_sort_name)
102 return cmp_blobs_by_sequential_order(p1, p2);
106 blob_set_solid_sort_name_from_inode(struct blob_descriptor *blob,
107 const struct wim_inode *inode)
109 const struct wim_dentry *dentry;
110 const utf16lechar *best_name = NULL;
111 size_t best_name_nbytes = SIZE_MAX;
113 if (blob->solid_sort_name) /* Sort name already set? */
116 /* If this file has multiple names, choose the shortest one. */
117 inode_for_each_dentry(dentry, inode) {
118 if (dentry->file_name_nbytes < best_name_nbytes) {
119 best_name = dentry->file_name;
120 best_name_nbytes = dentry->file_name_nbytes;
123 blob->solid_sort_name = utf16le_dupz(best_name, best_name_nbytes);
124 blob->solid_sort_name_nbytes = best_name_nbytes;
127 struct temp_blob_table {
128 struct hlist_head *table;
133 dentry_fill_in_solid_sort_names(struct wim_dentry *dentry, void *_blob_table)
135 const struct temp_blob_table *blob_table = _blob_table;
136 const struct wim_inode *inode = dentry->d_inode;
138 struct hlist_head *head;
139 struct hlist_node *cur;
140 struct blob_descriptor *blob;
142 hash = inode_get_hash_of_unnamed_data_stream(inode);
143 head = &blob_table->table[load_size_t_unaligned(hash) %
144 blob_table->capacity];
145 hlist_for_each_entry(blob, cur, head, hash_list_2) {
146 if (hashes_equal(hash, blob->hash)) {
147 blob_set_solid_sort_name_from_inode(blob, inode);
155 image_fill_in_solid_sort_names(WIMStruct *wim)
157 return for_dentry_in_tree(wim_get_current_root_dentry(wim),
158 dentry_fill_in_solid_sort_names,
163 sort_blob_list_for_solid_compression(struct list_head *blob_list)
165 size_t num_blobs = 0;
166 struct temp_blob_table blob_table;
167 WIMStruct *wims[128];
169 struct blob_descriptor *blob;
172 /* Count the number of blobs to be written. */
173 list_for_each_entry(blob, blob_list, write_blobs_list)
176 /* Allocate a temporary hash table for mapping blob hash => blob */
177 blob_table.capacity = num_blobs;
178 blob_table.table = CALLOC(blob_table.capacity,
179 sizeof(blob_table.table[0]));
180 if (!blob_table.table)
181 return WIMLIB_ERR_NOMEM;
184 * For each blob to be written:
185 * - Reset the sort name
186 * - If it's in non-solid WIM resource, then save the WIMStruct.
187 * - If it's in a file on disk, then set its sort name from that.
189 list_for_each_entry(blob, blob_list, write_blobs_list) {
190 blob->solid_sort_name = NULL;
191 blob->solid_sort_name_nbytes = 0;
192 switch (blob->blob_location) {
194 if (blob->size != blob->rdesc->uncompressed_size)
196 for (int i = 0; i < num_wims; i++)
197 if (blob->rdesc->wim == wims[i])
199 if (num_wims >= ARRAY_LEN(wims))
201 wims[num_wims++] = blob->rdesc->wim;
203 hlist_add_head(&blob->hash_list_2,
204 &blob_table.table[load_size_t_unaligned(blob->hash) %
205 blob_table.capacity]);
207 case BLOB_IN_FILE_ON_DISK:
209 case BLOB_IN_WINNT_FILE_ON_DISK:
211 blob_set_solid_sort_name_from_inode(blob, blob->file_inode);
218 /* For each WIMStruct that was found, search for dentry references to
219 * each blob and fill in the sort name this way. This is useful e.g.
220 * when exporting a solid WIM file from a non-solid WIM file. */
221 for (int i = 0; i < num_wims; i++) {
222 if (!wim_has_metadata(wims[i]))
224 wims[i]->private = &blob_table;
225 ret = for_image(wims[i], WIMLIB_ALL_IMAGES,
226 image_fill_in_solid_sort_names);
229 deselect_current_wim_image(wims[i]);
232 ret = sort_blob_list(blob_list,
233 offsetof(struct blob_descriptor, write_blobs_list),
234 cmp_blobs_by_solid_sort_name);
237 list_for_each_entry(blob, blob_list, write_blobs_list)
238 FREE(blob->solid_sort_name);
239 FREE(blob_table.table);