d32fc1fe936c2f364965fa7a6fa1a31eb87c22e2
[wimlib] / src / join.c
1 /*
2  * join.c
3  *
4  * Join split WIMs (sometimes named as .swm files) together into one WIM.
5  */
6
7 /*
8  * Copyright (C) 2012 Eric Biggers
9  *
10  * This file is part of wimlib, a library for working with WIM files.
11  *
12  * wimlib is free software; you can redistribute it and/or modify it under the
13  * terms of the GNU General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option)
15  * any later version.
16  *
17  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
18  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
19  * A PARTICULAR PURPOSE. See the GNU General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with wimlib; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #include "wimlib_internal.h"
27 #include "lookup_table.h"
28 #include "xml.h"
29 #include <stdlib.h>
30
31 static int copy_lte_to_table(struct lookup_table_entry *lte, void *table)
32 {
33         struct lookup_table_entry *copy;
34         copy = MALLOC(sizeof(struct lookup_table_entry));
35         if (!copy)
36                 return WIMLIB_ERR_NOMEM;
37         memcpy(copy, lte, sizeof(struct lookup_table_entry));
38         lookup_table_insert(table, copy);
39         return 0;
40 }
41
42 static int lookup_table_join(struct lookup_table *table,
43                              struct lookup_table *new)
44 {
45         return for_lookup_table_entry(new, copy_lte_to_table, table);
46 }
47
48
49 static int cmp_swms_by_part_number(const void *swm1, const void *swm2)
50 {
51         u16 partno_1 = (*(WIMStruct**)swm1)->hdr.part_number;
52         u16 partno_2 = (*(WIMStruct**)swm2)->hdr.part_number;
53         return (int)partno_1 - (int)partno_2;
54 }
55
56 /*
57  * Sanity checks to make sure a set of WIMs correctly correspond to a spanned
58  * set.
59  *
60  * @w:
61  *      Part 1 of the set.
62  *
63  * @additional_swms:
64  *      All parts of the set other than part 1.
65  *
66  * @num_additional_swms:
67  *      Number of WIMStructs in @additional_swms.  Or, the total number of parts
68  *      in the set minus 1.
69  *
70  * @return:
71  *      0 on success; WIMLIB_ERR_SPLIT_INVALID if the set is not valid.
72  */
73 int verify_swm_set(WIMStruct *w, WIMStruct **additional_swms,
74                    unsigned num_additional_swms)
75 {
76         unsigned total_parts = w->hdr.total_parts;
77         int ctype;
78         const u8 *guid;
79
80         if (total_parts != num_additional_swms + 1) {
81                 ERROR("`%s' says there are %u parts in the spanned set, "
82                       "but %s%u part%s provided",
83                       w->filename, total_parts,
84                       (num_additional_swms + 1 < total_parts) ? "only " : "",
85                       num_additional_swms + 1,
86                       (num_additional_swms) ? "s were" : " was");
87                 return WIMLIB_ERR_SPLIT_INVALID;
88         }
89         if (w->hdr.part_number != 1) {
90                 ERROR("WIM `%s' is not the first part of the split WIM.",
91                       w->filename);
92                 return WIMLIB_ERR_SPLIT_INVALID;
93         }
94         for (unsigned i = 0; i < num_additional_swms; i++) {
95                 if (additional_swms[i]->hdr.total_parts != total_parts) {
96                         ERROR("WIM `%s' says there are %u parts in the spanned set, "
97                               "but %u parts were provided",
98                               additional_swms[i]->filename,
99                               additional_swms[i]->hdr.total_parts,
100                               total_parts);
101                         return WIMLIB_ERR_SPLIT_INVALID;
102                 }
103         }
104
105         /* keep track of ctype and guid just to make sure they are the same for
106          * all the WIMs. */
107         ctype = wimlib_get_compression_type(w);
108         guid = w->hdr.guid;
109
110         WIMStruct *parts_to_swms[num_additional_swms];
111         ZERO_ARRAY(parts_to_swms);
112         for (unsigned i = 0; i < num_additional_swms; i++) {
113
114                 WIMStruct *swm = additional_swms[i];
115
116                 if (wimlib_get_compression_type(swm) != ctype) {
117                         ERROR("The split WIMs do not all have the same "
118                               "compression type");
119                         return WIMLIB_ERR_SPLIT_INVALID;
120                 }
121                 if (memcmp(guid, swm->hdr.guid, WIM_GID_LEN) != 0) {
122                         ERROR("The split WIMs do not all have the same "
123                               "GUID");
124                         return WIMLIB_ERR_SPLIT_INVALID;
125                 }
126                 if (swm->hdr.part_number == 1) {
127                         ERROR("WIMs `%s' and `%s' both are marked as the "
128                               "first WIM in the spanned set",
129                               w->filename, swm->filename);
130                         return WIMLIB_ERR_SPLIT_INVALID;
131                 }
132                 if (swm->hdr.part_number == 0 ||
133                     swm->hdr.part_number > total_parts)
134                 {
135                         ERROR("WIM `%s' says it is part %u in the spanned set, "
136                               "but the part number must be in the range "
137                               "[1, %u]",
138                               swm->filename, swm->hdr.part_number, total_parts);
139                         return WIMLIB_ERR_SPLIT_INVALID;
140                 }
141                 if (parts_to_swms[swm->hdr.part_number - 2])
142                 {
143                         ERROR("`%s' and `%s' are both marked as part %u of %u "
144                               "in the spanned set",
145                               parts_to_swms[swm->hdr.part_number - 2]->filename,
146                               swm->filename,
147                               swm->hdr.part_number,
148                               total_parts);
149                         return WIMLIB_ERR_SPLIT_INVALID;
150                 } else {
151                         parts_to_swms[swm->hdr.part_number - 2] = swm;
152                 }
153         }
154         return 0;
155 }
156
157 /*
158  * Joins lookup tables from the parts of a split WIM.
159  *
160  * @w specifies the first part, while @additional_swms and @num_additional_swms
161  * specify an array of points to the WIMStruct's for additional split WIM parts.
162  *
163  * On success, 0 is returned on a pointer to the joined lookup table is returned
164  * in @table_ret.
165  *
166  * The reason we join the lookup tables is so:
167  *      - We only have to search one lookup table to find the location of a
168  *      resource in the entire split WIM.
169  *      - Each lookup table entry will have a pointer to its split WIM part (and
170  *      a part number field, although we don't really use it).
171  */
172 int new_joined_lookup_table(WIMStruct *w,
173                             WIMStruct **additional_swms,
174                             unsigned num_additional_swms,
175                             struct lookup_table **table_ret)
176 {
177         struct lookup_table *table;
178         int ret;
179         unsigned i;
180
181
182         table = new_lookup_table(9001);
183         if (!table)
184                 return WIMLIB_ERR_NOMEM;
185         ret = lookup_table_join(table, w->lookup_table);
186         if (ret != 0)
187                 goto out_free_table;
188         for (i = 0; i < num_additional_swms; i++) {
189                 ret = lookup_table_join(table, additional_swms[i]->lookup_table);
190                 if (ret != 0)
191                         goto out_free_table;
192         }
193         *table_ret = table;
194         return 0;
195 out_free_table:
196         free_lookup_table(table);
197         return ret;
198 }
199
200
201 static int join_wims(WIMStruct **swms, uint num_swms, WIMStruct *joined_wim,
202                      int write_flags)
203 {
204         uint i;
205         int ret;
206         FILE *out_fp = joined_wim->out_fp;
207         u64 total_bytes = wim_info_get_total_bytes(swms[0]->wim_info);
208
209         swms[0]->write_metadata = false;
210         for (i = 0; i < num_swms; i++) {
211                 if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS) {
212                         off_t cur_offset = ftello(out_fp);
213                         printf("Writing resources from part %u of %u "
214                                "(%"PRIu64" of %"PRIu64" bytes, %.0f%% done)\n",
215                                i + 1, num_swms, cur_offset, total_bytes,
216                                (double)cur_offset / total_bytes * 100.0);
217                 }
218                 swms[i]->fp = fopen(swms[i]->filename, "rb");
219                 if (!swms[i]->fp) {
220                         ERROR_WITH_ERRNO("Failed to reopen `%s'",
221                                          swms[i]->filename);
222                         return WIMLIB_ERR_OPEN;
223                 }
224                 swms[i]->out_fp = out_fp;
225                 swms[i]->hdr.part_number = 1;
226                 ret = for_lookup_table_entry(swms[i]->lookup_table,
227                                              copy_resource, swms[i]);
228                 if (ret != 0)
229                         return ret;
230                 if (i != 0) {
231                         fclose(swms[i]->fp);
232                         swms[i]->fp = NULL;
233                 }
234         }
235         swms[0]->write_metadata = true;
236         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
237                 printf("Writing %d metadata resources\n",
238                         swms[0]->hdr.image_count);
239
240         for (i = 0; i < swms[0]->hdr.image_count; i++) {
241                 ret = copy_resource(swms[0]->image_metadata[i].metadata_lte,
242                                     swms[0]);
243                 if (ret != 0)
244                         return ret;
245         }
246
247         off_t lookup_table_offset = ftello(out_fp);
248
249         if (write_flags & WIMLIB_WRITE_FLAG_SHOW_PROGRESS)
250                 printf("Writing lookup tables, XML data, and header\n");
251         /* Now write the lookup table for the joined wim.  Since the lookup
252          * table has no header, we can just concatenate the lookup tables of all
253          * the SWM parts. */
254         for (i = 0; i < num_swms; i++) {
255                 ret = write_lookup_table(swms[i]->lookup_table, out_fp);
256                 if (ret != 0)
257                         return ret;
258         }
259         off_t xml_data_offset = ftello(out_fp);
260
261         if (lookup_table_offset == -1 || xml_data_offset == -1) {
262                 ERROR_WITH_ERRNO("Failed to get file offset");
263                 return WIMLIB_ERR_WRITE;
264         }
265         swms[0]->hdr.lookup_table_res_entry.offset = lookup_table_offset;
266         swms[0]->hdr.lookup_table_res_entry.size =
267                                         xml_data_offset - lookup_table_offset;
268
269
270         /* finish_write is called on the first swm, not the joined_wim, because
271          * the first swm is the one that has the image metadata and XML data
272          * attached to it.  */
273         swms[0]->hdr.flags &= ~WIM_HDR_FLAG_SPANNED;
274         swms[0]->hdr.total_parts = 1;
275         return finish_write(swms[0], WIM_ALL_IMAGES,
276                             write_flags | WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE);
277 }
278
279
280 WIMLIBAPI int wimlib_join(const char **swm_names, unsigned num_swms,
281                           const char *output_path, int flags)
282 {
283         int i;
284         int ret;
285         int part_idx;
286         int write_flags = 0;
287         WIMStruct *joined_wim = NULL;
288         WIMStruct *swms[num_swms];
289
290         int ctype;
291         u8 *guid;
292
293         if (num_swms < 1)
294                 return WIMLIB_ERR_INVALID_PARAM;
295
296         ZERO_ARRAY(swms);
297
298         for (i = 0; i < num_swms; i++) {
299                 ret = wimlib_open_wim(swm_names[i],
300                                       flags | WIMLIB_OPEN_FLAG_SPLIT_OK, &swms[i]);
301                 if (ret != 0)
302                         goto out;
303
304                 /* don't open all the parts at the same time, in case there are
305                  * a lot of them */
306                 fclose(swms[i]->fp);
307                 swms[i]->fp = NULL;
308         }
309
310         qsort(swms, num_swms, sizeof(swms[0]), cmp_swms_by_part_number);
311
312         ret = verify_swm_set(swms[0], &swms[1], num_swms - 1);
313         if (ret != 0)
314                 goto out;
315
316         joined_wim = new_wim_struct();
317         if (!joined_wim) {
318                 ret = WIMLIB_ERR_NOMEM;
319                 goto out;
320         }
321
322         if (flags & WIMLIB_OPEN_FLAG_CHECK_INTEGRITY)
323                 write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY;
324         if (flags & WIMLIB_OPEN_FLAG_SHOW_PROGRESS)
325                 write_flags |= WIMLIB_WRITE_FLAG_SHOW_PROGRESS;
326
327         ret = begin_write(joined_wim, output_path, write_flags);
328         if (ret != 0)
329                 goto out;
330         ret = join_wims(swms, num_swms, joined_wim, write_flags);
331 out:
332         for (i = 0; i < num_swms; i++) {
333                 /* out_fp is the same in all the swms and joined_wim; only close
334                  * it one time, when freeing joined_wim. */
335                 if (swms[i]) {
336                         swms[i]->out_fp = NULL;
337                         wimlib_free(swms[i]);
338                 }
339         }
340         wimlib_free(joined_wim);
341         return ret;
342 }