4 * Heuristic sorting of streams to optimize solid compression.
8 * Copyright (C) 2015 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see http://www.gnu.org/licenses/.
28 #include "wimlib/dentry.h"
29 #include "wimlib/encoding.h"
30 #include "wimlib/endianness.h"
31 #include "wimlib/lookup_table.h"
32 #include "wimlib/metadata.h"
33 #include "wimlib/paths.h"
34 #include "wimlib/solid.h"
35 #include "wimlib/unaligned.h"
37 static const utf16lechar *
38 get_extension(const utf16lechar *name, size_t nbytes)
40 const utf16lechar *p = name + (nbytes / sizeof(utf16lechar));
44 if (*(p - 1) == cpu_to_le16('/') || *(p - 1) == cpu_to_le16('\\'))
46 if (*(p - 1) == cpu_to_le16('.'))
53 * Sort order for solid compression:
55 * 1. Streams without sort names
56 * - sorted by sequential order
57 * 2. Streams with sort names:
58 * a. Streams whose sort name does not have an extension
59 * - sorted by sort name
60 * b. Streams whose sort name has an extension
61 * - sorted primarily by extension (case insensitive),
62 * secondarily by sort name (case insensitive)
65 cmp_streams_by_solid_sort_name(const void *p1, const void *p2)
67 const struct wim_lookup_table_entry *lte1, *lte2;
69 lte1 = *(const struct wim_lookup_table_entry **)p1;
70 lte2 = *(const struct wim_lookup_table_entry **)p2;
72 if (lte1->solid_sort_name) {
73 if (!lte2->solid_sort_name)
75 const utf16lechar *extension1 = get_extension(lte1->solid_sort_name,
76 lte1->solid_sort_name_nbytes);
77 const utf16lechar *extension2 = get_extension(lte2->solid_sort_name,
78 lte2->solid_sort_name_nbytes);
82 int res = cmp_utf16le_strings(extension1,
83 utf16le_strlen(extension1) / sizeof(utf16lechar),
85 utf16le_strlen(extension2) / sizeof(utf16lechar),
86 true); /* case insensitive */
93 int res = cmp_utf16le_strings(lte1->solid_sort_name,
94 lte1->solid_sort_name_nbytes / sizeof(utf16lechar),
95 lte2->solid_sort_name,
96 lte2->solid_sort_name_nbytes / sizeof(utf16lechar),
97 true); /* case insensitive */
101 if (lte2->solid_sort_name)
104 return cmp_streams_by_sequential_order(p1, p2);
108 lte_set_solid_sort_name_from_inode(struct wim_lookup_table_entry *lte,
109 const struct wim_inode *inode)
111 const struct wim_dentry *dentry;
112 const utf16lechar *best_name = NULL;
113 size_t best_name_nbytes = SIZE_MAX;
115 if (lte->solid_sort_name) /* Sort name already set? */
118 /* If this file has multiple names, choose the shortest one. */
119 inode_for_each_dentry(dentry, inode) {
120 if (dentry->file_name_nbytes < best_name_nbytes) {
121 best_name = dentry->file_name;
122 best_name_nbytes = dentry->file_name_nbytes;
125 lte->solid_sort_name = utf16le_dupz(best_name, best_name_nbytes);
126 lte->solid_sort_name_nbytes = best_name_nbytes;
129 struct temp_lookup_table {
130 struct hlist_head *table;
135 dentry_fill_in_solid_sort_names(struct wim_dentry *dentry, void *_lookup_table)
137 const struct temp_lookup_table *lookup_table = _lookup_table;
138 const struct wim_inode *inode = dentry->d_inode;
140 struct hlist_head *head;
141 struct hlist_node *cur;
142 struct wim_lookup_table_entry *lte;
144 hash = inode_unnamed_stream_hash(inode);
145 head = &lookup_table->table[load_size_t_unaligned(hash) %
146 lookup_table->capacity];
147 hlist_for_each_entry(lte, cur, head, hash_list_2) {
148 if (hashes_equal(hash, lte->hash)) {
149 lte_set_solid_sort_name_from_inode(lte, inode);
157 image_fill_in_solid_sort_names(WIMStruct *wim)
159 return for_dentry_in_tree(wim_get_current_root_dentry(wim),
160 dentry_fill_in_solid_sort_names,
165 sort_stream_list_for_solid_compression(struct list_head *stream_list)
167 size_t num_streams = 0;
168 struct temp_lookup_table lookup_table;
169 WIMStruct *wims[128];
171 struct wim_lookup_table_entry *lte;
174 /* Count the number of streams to be written. */
175 list_for_each_entry(lte, stream_list, write_streams_list)
178 /* Allocate a temporary hash table for mapping stream hash => stream */
179 lookup_table.capacity = num_streams;
180 lookup_table.table = CALLOC(lookup_table.capacity,
181 sizeof(lookup_table.table[0]));
182 if (!lookup_table.table)
183 return WIMLIB_ERR_NOMEM;
186 * For each stream to be written:
187 * - Reset the sort name
188 * - If it's in non-solid WIM resource, then save the WIMStruct.
189 * - If it's in a file on disk, then set its sort name from that.
191 list_for_each_entry(lte, stream_list, write_streams_list) {
192 lte->solid_sort_name = NULL;
193 lte->solid_sort_name_nbytes = 0;
194 switch (lte->resource_location) {
195 case RESOURCE_IN_WIM:
196 if (lte->size != lte->rspec->uncompressed_size)
198 for (int i = 0; i < num_wims; i++)
199 if (lte->rspec->wim == wims[i])
201 if (num_wims >= ARRAY_LEN(wims))
203 wims[num_wims++] = lte->rspec->wim;
205 hlist_add_head(<e->hash_list_2,
206 &lookup_table.table[load_size_t_unaligned(lte->hash) %
207 lookup_table.capacity]);
209 case RESOURCE_IN_FILE_ON_DISK:
211 case RESOURCE_IN_WINNT_FILE_ON_DISK:
213 lte_set_solid_sort_name_from_inode(lte, lte->file_inode);
220 /* For each WIMStruct that was found, search for dentry references to
221 * each stream and fill in the sort name this way. This is useful e.g.
222 * when exporting a solid WIM file from a non-solid WIM file. */
223 for (int i = 0; i < num_wims; i++) {
224 if (!wim_has_metadata(wims[i]))
226 wims[i]->private = &lookup_table;
227 ret = for_image(wims[i], WIMLIB_ALL_IMAGES,
228 image_fill_in_solid_sort_names);
230 goto out_free_lookup_table;
231 deselect_current_wim_image(wims[i]);
234 ret = sort_stream_list(stream_list,
235 offsetof(struct wim_lookup_table_entry,
237 cmp_streams_by_solid_sort_name);
239 list_for_each_entry(lte, stream_list, write_streams_list)
240 FREE(lte->solid_sort_name);
241 out_free_lookup_table:
242 FREE(lookup_table.table);