Lots of changes
[wimlib] / src / join.c
1 /*
2  * join.c
3  *
4  * Join split WIMs (sometimes named as .swm files) together into one WIM.
5  */
6
7 /*
8  * Copyright (C) 2012 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #include "wimlib_internal.h"
27 #include "lookup_table.h"
28 #include "xml.h"
29 #include <stdlib.h>
30
31 static int move_lte_to_table(struct lookup_table_entry *lte,
32                              void *other_tab)
33 {
34         hlist_del(&lte->hash_list);
35         lookup_table_insert((struct lookup_table*)other_tab, lte);
36         return 0;
37 }
38
39 static int lookup_table_join(struct lookup_table *table,
40                              struct lookup_table *new)
41 {
42         return for_lookup_table_entry(new, move_lte_to_table, table);
43 }
44
45 /*
46  * Sanity checks to make sure a set of WIMs correctly correspond to a spanned
47  * set.
48  *
49  * @w:
50  *      Part 1 of the set.
51  *
52  * @additional_swms:
53  *      All parts of the set other than part 1.
54  *
55  * @num_additional_swms:
56  *      Number of WIMStructs in @additional_swms.  Or, the total number of parts
57  *      in the set minus 1.
58  *
59  * @return:
60  *      0 on success; WIMLIB_ERR_SPLIT_INVALID if the set is not valid.
61  */
62 int verify_swm_set(WIMStruct *w, WIMStruct **additional_swms,
63                    unsigned num_additional_swms)
64 {
65         unsigned total_parts = w->hdr.total_parts;
66         int ctype;
67         const u8 *guid;
68
69         if (total_parts != num_additional_swms + 1) {
70                 ERROR("`%s' says there are %u parts in the spanned set, "
71                       "but %s%u part%s provided",
72                       w->filename, total_parts,
73                       (num_additional_swms + 1 < total_parts) ? "only " : "",
74                       num_additional_swms + 1,
75                       (num_additional_swms) ? "s were" : " was");
76                 return WIMLIB_ERR_SPLIT_INVALID;
77         }
78         if (w->hdr.part_number != 1) {
79                 ERROR("WIM `%s' is not the first part of the split WIM.",
80                       w->filename);
81                 return WIMLIB_ERR_SPLIT_INVALID;
82         }
83         for (unsigned i = 0; i < num_additional_swms; i++) {
84                 if (additional_swms[i]->hdr.total_parts != total_parts) {
85                         ERROR("WIM `%s' says there are %u parts in the spanned set, "
86                               "but %u parts were provided",
87                               additional_swms[i]->filename,
88                               additional_swms[i]->hdr.total_parts,
89                               total_parts);
90                         return WIMLIB_ERR_SPLIT_INVALID;
91                 }
92         }
93
94         /* keep track of ctype and guid just to make sure they are the same for
95          * all the WIMs. */
96         ctype = wimlib_get_compression_type(w);
97         guid = w->hdr.guid;
98
99         WIMStruct *parts_to_swms[num_additional_swms];
100         ZERO_ARRAY(parts_to_swms);
101         for (unsigned i = 0; i < num_additional_swms; i++) {
102
103                 WIMStruct *swm = additional_swms[i];
104
105                 if (wimlib_get_compression_type(swm) != ctype) {
106                         ERROR("The split WIMs do not all have the same "
107                               "compression type");
108                         return WIMLIB_ERR_SPLIT_INVALID;
109                 }
110                 if (memcmp(guid, swm->hdr.guid, WIM_GID_LEN) != 0) {
111                         ERROR("The split WIMs do not all have the same "
112                               "GUID");
113                         return WIMLIB_ERR_SPLIT_INVALID;
114                 }
115                 if (swm->hdr.part_number == 1) {
116                         ERROR("WIMs `%s' and `%s' both are marked as the "
117                               "first WIM in the spanned set",
118                               w->filename, swm->filename);
119                         return WIMLIB_ERR_SPLIT_INVALID;
120                 }
121                 if (swm->hdr.part_number == 0 ||
122                     swm->hdr.part_number > total_parts)
123                 {
124                         ERROR("WIM `%s' says it is part %u in the spanned set, "
125                               "but the part number must be in the range "
126                               "[1, %u]",
127                               swm->filename, swm->hdr.part_number, total_parts);
128                         return WIMLIB_ERR_SPLIT_INVALID;
129                 }
130                 if (parts_to_swms[swm->hdr.part_number - 2])
131                 {
132                         ERROR("`%s' and `%s' are both marked as part %u of %u "
133                               "in the spanned set",
134                               parts_to_swms[swm->hdr.part_number - 2]->filename,
135                               swm->filename,
136                               swm->hdr.part_number,
137                               total_parts);
138                         return WIMLIB_ERR_SPLIT_INVALID;
139                 } else {
140                         parts_to_swms[swm->hdr.part_number - 2] = swm;
141                 }
142         }
143         return 0;
144 }
145
146 /*
147  * Joins lookup tables from the parts of a split WIM.
148  *
149  * @w specifies the first part, while @additional_swms and @num_additional_swms
150  * specify an array of pointers to the WIMStruct's for additional split WIM parts.
151  *
152  * The lookup table entries are *moved* to the new table.
153  *
154  * On success, 0 is returned on a pointer to the joined lookup table is returned
155  * in @table_ret.
156  *
157  * The reason we join the lookup tables is so:
158  *      - We only have to search one lookup table to find the location of a
159  *      resource in the entire split WIM.
160  *      - Each lookup table entry will have a pointer to its split WIM part (and
161  *      a part number field, although we don't really use it).
162  */
163 int new_joined_lookup_table(WIMStruct *w,
164                             WIMStruct **additional_swms,
165                             unsigned num_additional_swms,
166                             struct lookup_table **table_ret)
167 {
168         struct lookup_table *table;
169         int ret;
170         unsigned i;
171
172         table = new_lookup_table(9001);
173         if (!table)
174                 return WIMLIB_ERR_NOMEM;
175
176         if (w)
177                 lookup_table_join(table, w->lookup_table);
178
179         for (i = 0; i < num_additional_swms; i++) {
180                 ret = lookup_table_join(table, additional_swms[i]->lookup_table);
181                 if (ret != 0)
182                         goto out_free_table;
183         }
184         *table_ret = table;
185         return 0;
186 out_free_table:
187         free_lookup_table(table);
188         return ret;
189 }
190
191
192 static int join_wims(WIMStruct **swms, unsigned num_swms,
193                      WIMStruct *joined_wim, int write_flags,
194                      wimlib_progress_func_t progress_func)
195 {
196         int ret;
197         unsigned i;
198         union wimlib_progress_info progress;
199         u64 total_bytes = 0;
200         u64 part_bytes;
201         u64 swm_part_sizes[num_swms];
202
203         /* Calculate total size of the streams in the split WIM parts. */
204         for (i = 0; i < num_swms; i++) {
205                 part_bytes = lookup_table_total_stream_size(swms[i]->lookup_table);
206                 swm_part_sizes[i] = part_bytes;
207                 total_bytes += part_bytes;
208         }
209
210         if (progress_func) {
211                 progress.join.total_bytes        = total_bytes;
212                 progress.join.total_parts        = swms[0]->hdr.total_parts;
213                 progress.join.completed_bytes    = 0;
214                 progress.join.completed_parts    = 0;
215                 progress_func(WIMLIB_PROGRESS_MSG_JOIN_STREAMS, &progress);
216         }
217
218         /* Write the resources (streams and metadata resources) from each SWM
219          * part */
220         swms[0]->write_metadata = true;
221         for (i = 0; i < num_swms; i++) {
222                 swms[i]->fp = fopen(swms[i]->filename, "rb");
223                 if (!swms[i]->fp) {
224                         ERROR_WITH_ERRNO("Failed to reopen `%s'",
225                                          swms[i]->filename);
226                         return WIMLIB_ERR_OPEN;
227                 }
228                 swms[i]->out_fp = joined_wim->out_fp;
229                 swms[i]->hdr.part_number = 1;
230                 ret = for_lookup_table_entry(swms[i]->lookup_table,
231                                              copy_resource, swms[i]);
232                 swms[i]->out_fp = NULL;
233                 fclose(swms[i]->fp);
234                 swms[i]->fp = NULL;
235
236                 if (ret != 0)
237                         return ret;
238
239                 if (progress_func) {
240                         progress.join.completed_bytes += swm_part_sizes[i];
241                         progress.join.completed_parts++;
242                         progress_func(WIMLIB_PROGRESS_MSG_JOIN_STREAMS, &progress);
243                 }
244         }
245
246         joined_wim->hdr.image_count = swms[0]->hdr.image_count;
247         for (i = 0; i < num_swms; i++)
248                 lookup_table_join(joined_wim->lookup_table, swms[i]->lookup_table);
249
250         free_wim_info(joined_wim->wim_info);
251         joined_wim->wim_info = swms[0]->wim_info;
252         ret = finish_write(joined_wim, WIMLIB_ALL_IMAGES, write_flags, progress_func);
253         joined_wim->wim_info = NULL;
254         return ret;
255 }
256
257 static int cmp_swms_by_part_number(const void *swm1, const void *swm2)
258 {
259         u16 partno_1 = (*(const WIMStruct**)swm1)->hdr.part_number;
260         u16 partno_2 = (*(const WIMStruct**)swm2)->hdr.part_number;
261         return (int)partno_1 - (int)partno_2;
262 }
263
264 /*
265  * Join a set of split WIMs into a stand-alone WIM.
266  */
267 WIMLIBAPI int wimlib_join(const char **swm_names, unsigned num_swms,
268                           const char *output_path, int swm_open_flags,
269                           int wim_write_flags,
270                           wimlib_progress_func_t progress_func)
271 {
272         int ret;
273         WIMStruct *joined_wim = NULL;
274         unsigned i;
275
276         swm_open_flags |= WIMLIB_OPEN_FLAG_SPLIT_OK;
277         wim_write_flags &= WIMLIB_WRITE_MASK_PUBLIC;
278
279         if (num_swms < 1 || num_swms > 0xffff)
280                 return WIMLIB_ERR_INVALID_PARAM;
281
282         WIMStruct *swms[num_swms];
283         ZERO_ARRAY(swms);
284
285         for (i = 0; i < num_swms; i++) {
286                 ret = wimlib_open_wim(swm_names[i], swm_open_flags, &swms[i],
287                                       progress_func);
288                 if (ret != 0)
289                         goto out;
290
291                 /* Don't open all the parts at the same time, in case there are
292                  * a lot of them */
293                 fclose(swms[i]->fp);
294                 swms[i]->fp = NULL;
295         }
296
297         qsort(swms, num_swms, sizeof(swms[0]), cmp_swms_by_part_number);
298
299         ret = verify_swm_set(swms[0], &swms[1], num_swms - 1);
300         if (ret != 0)
301                 goto out;
302
303         ret = wimlib_create_new_wim(wimlib_get_compression_type(swms[0]),
304                                     &joined_wim);
305         if (ret != 0)
306                 goto out;
307
308         ret = begin_write(joined_wim, output_path, wim_write_flags);
309         if (ret != 0)
310                 goto out;
311         ret = join_wims(swms, num_swms, joined_wim, wim_write_flags,
312                         progress_func);
313 out:
314         for (i = 0; i < num_swms; i++)
315                 wimlib_free(swms[i]);
316         wimlib_free(joined_wim);
317         return ret;
318 }