]> wimlib.net Git - wimlib/blob - src/xml.c
mount_image.c: add fallback definitions of RENAME_* constants
[wimlib] / src / xml.c
1 /*
2  * xml.c - deals with the XML information in WIM files
3  */
4
5 /*
6  * Copyright 2012-2023 Eric Biggers
7  *
8  * This file is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU Lesser General Public License as published by the Free
10  * Software Foundation; either version 3 of the License, or (at your option) any
11  * later version.
12  *
13  * This file is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this file; if not, see https://www.gnu.org/licenses/.
20  */
21
22 #ifdef HAVE_CONFIG_H
23 #  include "config.h"
24 #endif
25
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "wimlib/blob_table.h"
30 #include "wimlib/dentry.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/metadata.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/timestamp.h"
37 #include "wimlib/xml.h"
38 #include "wimlib/xmlproc.h"
39 #include "wimlib/write.h"
40
41 /*
42  * A wrapper around a WIM file's XML document.  The XML document contains
43  * metadata about each image in the WIM file as well as metadata about the WIM
44  * file itself.
45  */
46 struct wim_xml_info {
47
48         /* The XML document in tree form */
49         struct xml_node *root;
50
51         /* A malloc()ed array containing a pointer to the IMAGE element for each
52          * WIM image.  The image with 1-based index 'i' is at index 'i - 1' in
53          * this array.  Note: these pointers are cached values, since they could
54          * also be found by searching the document.  */
55         struct xml_node **images;
56
57         /* The number of WIM images (the length of 'images')  */
58         int image_count;
59 };
60
61 static u64
62 parse_number(const tchar *str, int base)
63 {
64         tchar *end;
65         unsigned long long v;
66
67         if (!str)
68                 return 0;
69         v = tstrtoull(str, &end, base);
70         if (end == str || *end || v >= UINT64_MAX)
71                 return 0;
72         return v;
73 }
74
75 /*
76  * Retrieve an unsigned integer from the contents of the specified element,
77  * decoding it using the specified base.  If the element has no contents or does
78  * not contain a valid number, returns 0.
79  */
80 static u64
81 xml_element_get_number(const struct xml_node *element, int base)
82 {
83         return parse_number(xml_element_get_text(element), base);
84 }
85
86 /*
87  * Retrieve the timestamp from a time element.  This element should have child
88  * elements HIGHPART and LOWPART; these elements will be used to construct a
89  * Windows-style timestamp.
90  */
91 static u64
92 xml_element_get_timestamp(const struct xml_node *element)
93 {
94         u64 timestamp = 0;
95         const struct xml_node *child;
96
97         xml_node_for_each_child(element, child) {
98                 if (xml_node_is_element(child, T("HIGHPART")))
99                         timestamp |= xml_element_get_number(child, 16) << 32;
100                 else if (xml_node_is_element(child, T("LOWPART")))
101                         timestamp |= xml_element_get_number(child, 16);
102         }
103         return timestamp;
104 }
105
106 /* Create a new timestamp element and optionally link it into a tree.  */
107 static struct xml_node *
108 xml_new_element_with_timestamp(struct xml_node *parent, const tchar *name,
109                                u64 timestamp)
110 {
111         struct xml_node *element;
112         tchar buf[32];
113
114         element = xml_new_element(NULL, name);
115         if (!element)
116                 goto err;
117
118         tsprintf(buf, T("0x%08"PRIX32), (u32)(timestamp >> 32));
119         if (!xml_new_element_with_text(element, T("HIGHPART"), buf))
120                 goto err;
121
122         tsprintf(buf, T("0x%08"PRIX32), (u32)timestamp);
123         if (!xml_new_element_with_text(element, T("LOWPART"), buf))
124                 goto err;
125
126         if (parent)
127                 xml_add_child(parent, element);
128         return element;
129
130 err:
131         xml_free_node(element);
132         return NULL;
133 }
134
135 /* Create a new number element and optionally link it into a tree.  */
136 static struct xml_node *
137 xml_new_element_with_u64(struct xml_node *parent, const tchar *name, u64 value)
138 {
139         tchar buf[32];
140
141         tsprintf(buf, T("%"PRIu64), value);
142         return xml_new_element_with_text(parent, name, buf);
143 }
144
145 static bool
146 parse_index(tchar **pp, u32 *index_ret)
147 {
148         tchar *p = *pp;
149         u32 index = 0;
150
151         *p++ = '\0'; /* overwrite '[' */
152         while (*p >= '0' && *p <= '9') {
153                 u32 n = (index * 10) + (*p++ - '0');
154                 if (n < index)
155                         return false;
156                 index = n;
157         }
158         if (index == 0)
159                 return false;
160         if (*p != ']')
161                 return false;
162         p++;
163         if (*p != '/' && *p != '\0')
164                 return false;
165
166         *pp = p;
167         *index_ret = index;
168         return true;
169 }
170
171 static int
172 do_xml_path_walk(struct xml_node *element, const tchar *path, bool create,
173                  struct xml_node **result_ret)
174 {
175         size_t n = tstrlen(path) + 1;
176         tchar buf[n];
177         tchar *p;
178         tchar c;
179
180         *result_ret = NULL;
181
182         if (!element)
183                 return 0;
184
185         /* Copy the path to a temporary buffer.  */
186         tmemcpy(buf, path, n);
187         p = buf;
188
189         if (*p == '/')
190                 goto bad_syntax;
191         c = *p;
192
193         while (c != '\0') {
194                 const tchar *name;
195                 struct xml_node *child;
196                 u32 index = 1;
197
198                 /* We have another path component.  */
199
200                 /* Parse the element name.  */
201                 name = p;
202                 while (*p != '/' && *p != '\0' && *p != '[')
203                         p++;
204                 if (p == name) /* empty name?  */
205                         goto bad_syntax;
206
207                 /* Handle a bracketed index, if one was specified.  */
208                 if (*p == '[' && !parse_index(&p, &index))
209                         goto bad_syntax;
210
211                 c = *p;
212                 *p = '\0';
213
214                 /* Look for a matching child.  */
215                 xml_node_for_each_child(element, child)
216                         if (xml_node_is_element(child, name) && !--index)
217                                 goto next_step;
218
219                 /* No child matched the path.  If create=false, the lookup
220                  * failed.  If create=true, create the needed element.  */
221                 if (!create)
222                         return 0;
223
224                 /* We can't create an element at index 'n' if indices 1...n-1
225                  * didn't already exist.  */
226                 if (index != 1)
227                         return WIMLIB_ERR_INVALID_PARAM;
228
229                 child = xml_new_element(element, name);
230                 if (!child)
231                         return WIMLIB_ERR_NOMEM;
232         next_step:
233                 /* Continue to the next path component, if there is one.  */
234                 element = child;
235                 p++;
236         }
237
238         *result_ret = element;
239         return 0;
240
241 bad_syntax:
242         ERROR("The XML path \"%"TS"\" has invalid syntax.", path);
243         return WIMLIB_ERR_INVALID_PARAM;
244 }
245
246 /* Retrieve the XML element, if any, at the specified 'path'.  This supports a
247  * simple filesystem-like syntax.  If the element was found, returns a pointer
248  * to it; otherwise returns NULL.  */
249 static struct xml_node *
250 xml_get_element_by_path(struct xml_node *root, const tchar *path)
251 {
252         struct xml_node *element;
253
254         do_xml_path_walk(root, path, false, &element);
255         return element;
256 }
257
258 /*
259  * Similar to xml_get_element_by_path(), but creates the element and any
260  * requisite ancestor elements as needed.   If successful, 0 is returned and
261  * *element_ret is set to a pointer to the resulting element.  If unsuccessful,
262  * an error code is returned and *element_ret is set to NULL.
263  */
264 static int
265 xml_ensure_element_by_path(struct xml_node *root, const tchar *path,
266                            struct xml_node **element_ret)
267 {
268         return do_xml_path_walk(root, path, true, element_ret);
269 }
270
271 static u64
272 xml_get_number_by_path(struct xml_node *root, const tchar *path)
273 {
274         return xml_element_get_number(xml_get_element_by_path(root, path), 10);
275 }
276
277 static u64
278 xml_get_timestamp_by_path(struct xml_node *root, const tchar *path)
279 {
280         return xml_element_get_timestamp(xml_get_element_by_path(root, path));
281 }
282
283 static const tchar *
284 xml_get_text_by_path(struct xml_node *root, const tchar *path)
285 {
286         return xml_element_get_text(xml_get_element_by_path(root, path));
287 }
288
289 /*
290  * Create/replace (if text is not NULL and not empty) or remove (if text is NULL
291  * or empty) an element containing text.
292  */
293 static int
294 xml_set_text_by_path(struct xml_node *root, const tchar *path,
295                      const tchar *text)
296 {
297         int ret;
298         struct xml_node *element;
299
300         if (text && *text) {
301                 /* Create or replace  */
302                 ret = xml_ensure_element_by_path(root, path, &element);
303                 if (ret)
304                         return ret;
305                 return xml_element_set_text(element, text);
306         } else {
307                 /* Remove  */
308                 xml_free_node(xml_get_element_by_path(root, path));
309                 return 0;
310         }
311 }
312
313 /* Unlink and return the node which represents the INDEX attribute of the
314  * specified IMAGE element.  */
315 static struct xml_node *
316 unlink_index_attribute(struct xml_node *image_node)
317 {
318         struct xml_node *attr = xml_get_attrib(image_node, T("INDEX"));
319
320         xml_unlink_node(attr);
321         return attr;
322 }
323
324 /* Compute the total uncompressed size of the streams of the specified inode. */
325 static u64
326 inode_sum_stream_sizes(const struct wim_inode *inode,
327                        const struct blob_table *blob_table)
328 {
329         u64 total_size = 0;
330
331         for (unsigned i = 0; i < inode->i_num_streams; i++) {
332                 const struct blob_descriptor *blob;
333
334                 blob = stream_blob(&inode->i_streams[i], blob_table);
335                 if (blob)
336                         total_size += blob->size;
337         }
338         return total_size;
339 }
340
341 static int
342 append_image_node(struct wim_xml_info *info, struct xml_node *image_node)
343 {
344         tchar buf[32];
345         struct xml_node **images;
346         int ret;
347
348         /* Limit exceeded?  */
349         if (unlikely(info->image_count >= MAX_IMAGES))
350                 return WIMLIB_ERR_IMAGE_COUNT;
351
352         /* Set the INDEX attribute. */
353         tsprintf(buf, T("%d"), info->image_count + 1);
354         ret = xml_set_attrib(image_node, T("INDEX"), buf);
355         if (ret)
356                 return ret;
357
358         /* Append the IMAGE element to the 'images' array.  */
359         images = REALLOC(info->images,
360                          (info->image_count + 1) * sizeof(info->images[0]));
361         if (unlikely(!images))
362                 return WIMLIB_ERR_NOMEM;
363         info->images = images;
364         images[info->image_count++] = image_node;
365
366         /* Add the IMAGE element to the document.  */
367         xml_add_child(info->root, image_node);
368         return 0;
369 }
370
371 /*----------------------------------------------------------------------------*
372  *                     Functions for internal library use                     *
373  *----------------------------------------------------------------------------*/
374
375 /* Allocate an empty 'struct wim_xml_info', containing no images.  */
376 struct wim_xml_info *
377 xml_new_info_struct(void)
378 {
379         struct wim_xml_info *info = CALLOC(1, sizeof(*info));
380
381         if (!info)
382                 return NULL;
383
384         info->root = xml_new_element(NULL, T("WIM"));
385         if (!info->root) {
386                 FREE(info);
387                 return NULL;
388         }
389         return info;
390 }
391
392 /* Free a 'struct wim_xml_info'.  */
393 void
394 xml_free_info_struct(struct wim_xml_info *info)
395 {
396         if (info) {
397                 xml_free_node(info->root);
398                 FREE(info->images);
399                 FREE(info);
400         }
401 }
402
403 /* Retrieve the number of images for which there exist IMAGE elements in the XML
404  * document.  */
405 int
406 xml_get_image_count(const struct wim_xml_info *info)
407 {
408         return info->image_count;
409 }
410
411 /* Retrieve the TOTALBYTES value for the WIM file, or 0 if this value is
412  * unavailable.  */
413 u64
414 xml_get_total_bytes(const struct wim_xml_info *info)
415 {
416         return xml_get_number_by_path(info->root, T("TOTALBYTES"));
417 }
418
419 /* Retrieve the TOTALBYTES value for the specified image, or 0 if this value is
420  * unavailable.  */
421 u64
422 xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
423 {
424         return xml_get_number_by_path(info->images[image - 1], T("TOTALBYTES"));
425 }
426
427 /* Retrieve the HARDLINKBYTES value for the specified image, or 0 if this value
428  * is unavailable.  */
429 u64
430 xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
431 {
432         return xml_get_number_by_path(info->images[image - 1],
433                                       T("HARDLINKBYTES"));
434 }
435
436 /* Retrieve the WIMBOOT value for the specified image, or false if this value is
437  * unavailable.  */
438 bool
439 xml_get_wimboot(const struct wim_xml_info *info, int image)
440 {
441         return xml_get_number_by_path(info->images[image - 1], T("WIMBOOT"));
442 }
443
444 /* Retrieve the Windows build number for the specified image, or 0 if this
445  * information is not available.  */
446 u64
447 xml_get_windows_build_number(const struct wim_xml_info *info, int image)
448 {
449         return xml_get_number_by_path(info->images[image - 1],
450                                       T("WINDOWS/VERSION/BUILD"));
451 }
452
453 /* Set the WIMBOOT value for the specified image.  */
454 int
455 xml_set_wimboot(struct wim_xml_info *info, int image)
456 {
457         return xml_set_text_by_path(info->images[image - 1],
458                                     T("WIMBOOT"), T("1"));
459 }
460
461 /*
462  * Update the DIRCOUNT, FILECOUNT, TOTALBYTES, HARDLINKBYTES, and
463  * LASTMODIFICATIONTIME elements for the specified WIM image.
464  *
465  * Note: since these stats are likely to be used for display purposes only, we
466  * no longer attempt to duplicate WIMGAPI's weird bugs when calculating them.
467  */
468 int
469 xml_update_image_info(WIMStruct *wim, int image)
470 {
471         const struct wim_image_metadata *imd = wim->image_metadata[image - 1];
472         struct xml_node *image_node = wim->xml_info->images[image - 1];
473         const struct wim_inode *inode;
474         u64 dir_count = 0;
475         u64 file_count = 0;
476         u64 total_bytes = 0;
477         u64 hard_link_bytes = 0;
478         u64 size;
479         struct xml_node *dircount_node;
480         struct xml_node *filecount_node;
481         struct xml_node *totalbytes_node;
482         struct xml_node *hardlinkbytes_node;
483         struct xml_node *lastmodificationtime_node;
484
485         image_for_each_inode(inode, imd) {
486                 if (inode_is_directory(inode))
487                         dir_count += inode->i_nlink;
488                 else
489                         file_count += inode->i_nlink;
490                 size = inode_sum_stream_sizes(inode, wim->blob_table);
491                 total_bytes += size * inode->i_nlink;
492                 hard_link_bytes += size * (inode->i_nlink - 1);
493         }
494
495         dircount_node = xml_new_element_with_u64(NULL, T("DIRCOUNT"),
496                                                  dir_count);
497         filecount_node = xml_new_element_with_u64(NULL, T("FILECOUNT"),
498                                                   file_count);
499         totalbytes_node = xml_new_element_with_u64(NULL, T("TOTALBYTES"),
500                                                    total_bytes);
501         hardlinkbytes_node = xml_new_element_with_u64(NULL, T("HARDLINKBYTES"),
502                                                       hard_link_bytes);
503         lastmodificationtime_node = xml_new_element_with_timestamp(NULL,
504                         T("LASTMODIFICATIONTIME"), now_as_wim_timestamp());
505
506         if (unlikely(!dircount_node || !filecount_node || !totalbytes_node ||
507                      !hardlinkbytes_node || !lastmodificationtime_node)) {
508                 xml_free_node(dircount_node);
509                 xml_free_node(filecount_node);
510                 xml_free_node(totalbytes_node);
511                 xml_free_node(hardlinkbytes_node);
512                 xml_free_node(lastmodificationtime_node);
513                 return WIMLIB_ERR_NOMEM;
514         }
515
516         xml_replace_child(image_node, dircount_node);
517         xml_replace_child(image_node, filecount_node);
518         xml_replace_child(image_node, totalbytes_node);
519         xml_replace_child(image_node, hardlinkbytes_node);
520         xml_replace_child(image_node, lastmodificationtime_node);
521         return 0;
522 }
523
524 /* Add an image to the XML information. */
525 int
526 xml_add_image(struct wim_xml_info *info, const tchar *name)
527 {
528         const u64 now = now_as_wim_timestamp();
529         struct xml_node *image_node;
530         int ret;
531
532         if (name && !xml_legal_value(name)) {
533                 ERROR("Name of new image contains illegal characters");
534                 return WIMLIB_ERR_INVALID_PARAM;
535         }
536
537         ret = WIMLIB_ERR_NOMEM;
538         image_node = xml_new_element(NULL, T("IMAGE"));
539         if (!image_node)
540                 goto err;
541         if (name && *name &&
542             !xml_new_element_with_text(image_node, T("NAME"), name))
543                 goto err;
544         if (!xml_new_element_with_u64(image_node, T("DIRCOUNT"), 0))
545                 goto err;
546         if (!xml_new_element_with_u64(image_node, T("FILECOUNT"), 0))
547                 goto err;
548         if (!xml_new_element_with_u64(image_node, T("TOTALBYTES"), 0))
549                 goto err;
550         if (!xml_new_element_with_u64(image_node, T("HARDLINKBYTES"), 0))
551                 goto err;
552         if (!xml_new_element_with_timestamp(image_node, T("CREATIONTIME"), now))
553                 goto err;
554         if (!xml_new_element_with_timestamp(image_node,
555                                             T("LASTMODIFICATIONTIME"), now))
556                 goto err;
557         ret = append_image_node(info, image_node);
558         if (ret)
559                 goto err;
560         return 0;
561
562 err:
563         xml_free_node(image_node);
564         return ret;
565 }
566
567 /*
568  * Make a copy of the XML information for the image with index @src_image in the
569  * @src_info XML document and append it to the @dest_info XML document.
570  *
571  * In the process, change the image's name and description to the values
572  * specified by @dest_image_name and @dest_image_description.  Either or both
573  * may be NULL, which indicates that the corresponding element will not be
574  * included in the destination image.
575  */
576 int
577 xml_export_image(const struct wim_xml_info *src_info, int src_image,
578                  struct wim_xml_info *dest_info, const tchar *dest_image_name,
579                  const tchar *dest_image_description, bool wimboot)
580 {
581         struct xml_node *dest_node;
582         int ret;
583
584         if (dest_image_name && !xml_legal_value(dest_image_name)) {
585                 ERROR("Destination image name contains illegal characters");
586                 return WIMLIB_ERR_INVALID_PARAM;
587         }
588         if (dest_image_description &&
589             !xml_legal_value(dest_image_description)) {
590                 ERROR("Destination image description contains illegal characters");
591                 return WIMLIB_ERR_INVALID_PARAM;
592         }
593
594         ret = WIMLIB_ERR_NOMEM;
595         dest_node = xml_clone_tree(src_info->images[src_image - 1]);
596         if (!dest_node)
597                 goto err;
598
599         ret = xml_set_text_by_path(dest_node, T("NAME"), dest_image_name);
600         if (ret)
601                 goto err;
602
603         ret = xml_set_text_by_path(dest_node, T("DESCRIPTION"),
604                                    dest_image_description);
605         if (ret)
606                 goto err;
607
608         if (wimboot) {
609                 ret = xml_set_text_by_path(dest_node, T("WIMBOOT"), T("1"));
610                 if (ret)
611                         goto err;
612         }
613
614         ret = append_image_node(dest_info, dest_node);
615         if (ret)
616                 goto err;
617         return 0;
618
619 err:
620         xml_free_node(dest_node);
621         return ret;
622 }
623
624 /* Remove the specified image from the XML document.  */
625 void
626 xml_delete_image(struct wim_xml_info *info, int image)
627 {
628         struct xml_node *next_image;
629         struct xml_node *index_attr, *next_index_attr;
630
631         /* Free the IMAGE element for the deleted image.  Then, shift all
632          * higher-indexed IMAGE elements down by 1, in the process re-assigning
633          * their INDEX attributes.  */
634
635         next_image = info->images[image - 1];
636         next_index_attr = unlink_index_attribute(next_image);
637         xml_free_node(next_image);
638
639         while (image < info->image_count) {
640                 index_attr = next_index_attr;
641                 next_image = info->images[image];
642                 next_index_attr = unlink_index_attribute(next_image);
643                 xml_add_child(next_image, index_attr);
644                 info->images[image - 1] = next_image;
645                 image++;
646         }
647
648         xml_free_node(next_index_attr);
649         info->image_count--;
650 }
651
652 /* Architecture constants are from w64 mingw winnt.h  */
653 #define PROCESSOR_ARCHITECTURE_INTEL            0
654 #define PROCESSOR_ARCHITECTURE_MIPS             1
655 #define PROCESSOR_ARCHITECTURE_ALPHA            2
656 #define PROCESSOR_ARCHITECTURE_PPC              3
657 #define PROCESSOR_ARCHITECTURE_SHX              4
658 #define PROCESSOR_ARCHITECTURE_ARM              5
659 #define PROCESSOR_ARCHITECTURE_IA64             6
660 #define PROCESSOR_ARCHITECTURE_ALPHA64          7
661 #define PROCESSOR_ARCHITECTURE_MSIL             8
662 #define PROCESSOR_ARCHITECTURE_AMD64            9
663 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64    10
664 #define PROCESSOR_ARCHITECTURE_ARM64            12
665
666 static const tchar *
667 describe_arch(u64 arch)
668 {
669         static const tchar * const descriptions[] = {
670                 [PROCESSOR_ARCHITECTURE_INTEL] = T("x86"),
671                 [PROCESSOR_ARCHITECTURE_MIPS]  = T("MIPS"),
672                 [PROCESSOR_ARCHITECTURE_ARM]   = T("ARM"),
673                 [PROCESSOR_ARCHITECTURE_IA64]  = T("ia64"),
674                 [PROCESSOR_ARCHITECTURE_AMD64] = T("x86_64"),
675                 [PROCESSOR_ARCHITECTURE_ARM64] = T("ARM64"),
676         };
677
678         if (arch < ARRAY_LEN(descriptions) && descriptions[arch] != NULL)
679                 return descriptions[arch];
680
681         return T("unknown");
682 }
683
684 /* Print information from the WINDOWS element, if present.  */
685 static void
686 print_windows_info(struct xml_node *image_node)
687 {
688         struct xml_node *windows_node;
689         struct xml_node *langs_node;
690         struct xml_node *version_node;
691         const tchar *text;
692
693         windows_node = xml_get_element_by_path(image_node, T("WINDOWS"));
694         if (!windows_node)
695                 return;
696
697         tprintf(T("Architecture:           %"TS"\n"),
698                 describe_arch(xml_get_number_by_path(windows_node, T("ARCH"))));
699
700         text = xml_get_text_by_path(windows_node, T("PRODUCTNAME"));
701         if (text)
702                 tprintf(T("Product Name:           %"TS"\n"), text);
703
704         text = xml_get_text_by_path(windows_node, T("EDITIONID"));
705         if (text)
706                 tprintf(T("Edition ID:             %"TS"\n"), text);
707
708         text = xml_get_text_by_path(windows_node, T("INSTALLATIONTYPE"));
709         if (text)
710                 tprintf(T("Installation Type:      %"TS"\n"), text);
711
712         text = xml_get_text_by_path(windows_node, T("HAL"));
713         if (text)
714                 tprintf(T("HAL:                    %"TS"\n"), text);
715
716         text = xml_get_text_by_path(windows_node, T("PRODUCTTYPE"));
717         if (text)
718                 tprintf(T("Product Type:           %"TS"\n"), text);
719
720         text = xml_get_text_by_path(windows_node, T("PRODUCTSUITE"));
721         if (text)
722                 tprintf(T("Product Suite:          %"TS"\n"), text);
723
724         langs_node = xml_get_element_by_path(windows_node, T("LANGUAGES"));
725         if (langs_node) {
726                 struct xml_node *lang_node;
727
728                 tprintf(T("Languages:              "));
729                 xml_node_for_each_child(langs_node, lang_node) {
730                         if (!xml_node_is_element(lang_node, T("LANGUAGE")))
731                                 continue;
732                         text = xml_element_get_text(lang_node);
733                         if (!text)
734                                 continue;
735                         tprintf(T("%"TS" "), text);
736                 }
737                 tputchar(T('\n'));
738
739                 text = xml_get_text_by_path(langs_node, T("DEFAULT"));
740                 if (text)
741                         tprintf(T("Default Language:       %"TS"\n"), text);
742         }
743
744         text = xml_get_text_by_path(windows_node, T("SYSTEMROOT"));
745         if (text)
746                 tprintf(T("System Root:            %"TS"\n"), text);
747
748         version_node = xml_get_element_by_path(windows_node, T("VERSION"));
749         if (version_node) {
750                 tprintf(T("Major Version:          %"PRIu64"\n"),
751                         xml_get_number_by_path(version_node, T("MAJOR")));
752                 tprintf(T("Minor Version:          %"PRIu64"\n"),
753                         xml_get_number_by_path(version_node, T("MINOR")));
754                 tprintf(T("Build:                  %"PRIu64"\n"),
755                         xml_get_number_by_path(version_node, T("BUILD")));
756                 tprintf(T("Service Pack Build:     %"PRIu64"\n"),
757                         xml_get_number_by_path(version_node, T("SPBUILD")));
758                 tprintf(T("Service Pack Level:     %"PRIu64"\n"),
759                         xml_get_number_by_path(version_node, T("SPLEVEL")));
760         }
761 }
762
763 /* Prints information about the specified image.  */
764 void
765 xml_print_image_info(struct wim_xml_info *info, int image)
766 {
767         struct xml_node * const image_node = info->images[image - 1];
768         const tchar *text;
769         tchar timebuf[64];
770
771         tprintf(T("Index:                  %d\n"), image);
772
773         /* Always print the Name and Description, even if the corresponding XML
774          * elements are not present.  */
775         text = xml_get_text_by_path(image_node, T("NAME"));
776         tprintf(T("Name:                   %"TS"\n"), text ? text : T(""));
777         text = xml_get_text_by_path(image_node, T("DESCRIPTION"));
778         tprintf(T("Description:            %"TS"\n"), text ? text : T(""));
779
780         text = xml_get_text_by_path(image_node, T("DISPLAYNAME"));
781         if (text)
782                 tprintf(T("Display Name:           %"TS"\n"), text);
783
784         text = xml_get_text_by_path(image_node, T("DISPLAYDESCRIPTION"));
785         if (text)
786                 tprintf(T("Display Description:    %"TS"\n"), text);
787
788         tprintf(T("Directory Count:        %"PRIu64"\n"),
789                 xml_get_number_by_path(image_node, T("DIRCOUNT")));
790
791         tprintf(T("File Count:             %"PRIu64"\n"),
792                 xml_get_number_by_path(image_node, T("FILECOUNT")));
793
794         tprintf(T("Total Bytes:            %"PRIu64"\n"),
795                 xml_get_number_by_path(image_node, T("TOTALBYTES")));
796
797         tprintf(T("Hard Link Bytes:        %"PRIu64"\n"),
798                 xml_get_number_by_path(image_node, T("HARDLINKBYTES")));
799
800         wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
801                                                        T("CREATIONTIME")),
802                              timebuf, ARRAY_LEN(timebuf));
803         tprintf(T("Creation Time:          %"TS"\n"), timebuf);
804
805         wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
806                                         T("LASTMODIFICATIONTIME")),
807                                         timebuf, ARRAY_LEN(timebuf));
808         tprintf(T("Last Modification Time: %"TS"\n"), timebuf);
809
810         print_windows_info(image_node);
811
812         text = xml_get_text_by_path(image_node, T("FLAGS"));
813         if (text)
814                 tprintf(T("Flags:                  %"TS"\n"), text);
815
816         tprintf(T("WIMBoot compatible:     %"TS"\n"),
817                 xml_get_number_by_path(image_node, T("WIMBOOT")) ?
818                         T("yes") : T("no"));
819
820         tputchar('\n');
821 }
822
823 /*----------------------------------------------------------------------------*
824  *                      Reading and writing the XML data                      *
825  *----------------------------------------------------------------------------*/
826
827 static int
828 image_element_get_index(struct xml_node *element)
829 {
830         struct xml_node *attrib = xml_get_attrib(element, T("INDEX"));
831
832         if (!attrib)
833                 return 0;
834         return min(INT_MAX, parse_number(attrib->value, 10));
835 }
836
837 /* Prepare the 'images' array from the XML document tree.  */
838 static int
839 setup_images(struct wim_xml_info *info, struct xml_node *root)
840 {
841         struct xml_node *child;
842         int index;
843         int max_index = 0;
844         int ret;
845
846         xml_node_for_each_child(root, child) {
847                 if (!xml_node_is_element(child, T("IMAGE")))
848                         continue;
849                 index = image_element_get_index(child);
850                 if (unlikely(index < 1 || info->image_count >= MAX_IMAGES))
851                         goto err_indices;
852                 max_index = max(max_index, index);
853                 info->image_count++;
854         }
855         if (unlikely(max_index != info->image_count))
856                 goto err_indices;
857         ret = WIMLIB_ERR_NOMEM;
858         info->images = CALLOC(info->image_count, sizeof(info->images[0]));
859         if (unlikely(!info->images))
860                 goto err;
861         xml_node_for_each_child(root, child) {
862                 if (!xml_node_is_element(child, T("IMAGE")))
863                         continue;
864                 index = image_element_get_index(child);
865                 if (unlikely(info->images[index - 1]))
866                         goto err_indices;
867                 info->images[index - 1] = child;
868         }
869         return 0;
870
871 err_indices:
872         ERROR("The WIM file's XML document does not contain exactly one IMAGE "
873               "element per image!");
874         ret = WIMLIB_ERR_XML;
875 err:
876         FREE(info->images);
877         return ret;
878 }
879
880 static int
881 parse_wim_xml_document(const utf16lechar *raw_doc, size_t raw_doc_size,
882                        struct xml_node **root_ret)
883 {
884         tchar *doc;
885         int ret;
886
887         ret = utf16le_to_tstr(raw_doc, raw_doc_size, &doc, NULL);
888         if (ret)
889                 return ret;
890         ret = xml_parse_document(doc, root_ret);
891         FREE(doc);
892         return ret;
893 }
894
895 /* Reads the XML data from a WIM file.  */
896 int
897 read_wim_xml_data(WIMStruct *wim)
898 {
899         struct wim_xml_info *info;
900         void *raw_doc;
901         size_t raw_doc_size;
902         struct xml_node *root;
903         int ret;
904
905         /* Allocate the 'struct wim_xml_info'.  */
906         ret = WIMLIB_ERR_NOMEM;
907         info = CALLOC(1, sizeof(*info));
908         if (!info)
909                 goto err;
910
911         /* Read the raw UTF-16LE XML document.  */
912         ret = wimlib_get_xml_data(wim, &raw_doc, &raw_doc_size);
913         if (ret)
914                 goto err;
915
916         /* Parse the document, creating the document tree.  */
917         ret = parse_wim_xml_document(raw_doc, raw_doc_size, &info->root);
918         FREE(raw_doc);
919         raw_doc = NULL;
920         if (ret) {
921                 if (ret != WIMLIB_ERR_NOMEM)
922                         ret = WIMLIB_ERR_XML;
923                 ERROR("Unable to parse the WIM file's XML document!");
924                 goto err;
925         }
926         root = info->root;
927
928         /* Verify the root element.  */
929         if (!xml_node_is_element(root, T("WIM"))) {
930                 ERROR("The WIM file's XML document has an unexpected format!");
931                 ret = WIMLIB_ERR_XML;
932                 goto err;
933         }
934
935         /* Verify the WIM file is not encrypted.  */
936         if (xml_get_element_by_path(root, T("ESD/ENCRYPTED"))) {
937                 ret = WIMLIB_ERR_WIM_IS_ENCRYPTED;
938                 goto err;
939         }
940
941         /* Validate the image elements and set up the images[] array.  */
942         ret = setup_images(info, root);
943         if (ret)
944                 goto err;
945
946         /* Success!  */
947         wim->xml_info = info;
948         return 0;
949
950 err:
951         xml_free_info_struct(info);
952         return ret;
953 }
954
955 /* Swap the INDEX attributes of two IMAGE elements.  */
956 static void
957 swap_index_attributes(struct xml_node *image_element_1,
958                       struct xml_node *image_element_2)
959 {
960         struct xml_node *attr_1, *attr_2;
961
962         if (image_element_1 != image_element_2) {
963                 attr_1 = unlink_index_attribute(image_element_1);
964                 attr_2 = unlink_index_attribute(image_element_2);
965                 xml_add_child(image_element_1, attr_2);
966                 xml_add_child(image_element_2, attr_1);
967         }
968 }
969
970 static int
971 prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes,
972                            struct xml_node **orig_totalbytes_element_ret)
973 {
974         struct xml_node *totalbytes_element = NULL;
975
976         /* Allocate the new TOTALBYTES element if needed.  */
977         if (total_bytes != WIM_TOTALBYTES_USE_EXISTING &&
978             total_bytes != WIM_TOTALBYTES_OMIT) {
979                 totalbytes_element = xml_new_element_with_u64(
980                                         NULL, T("TOTALBYTES"), total_bytes);
981                 if (!totalbytes_element)
982                         return WIMLIB_ERR_NOMEM;
983         }
984
985         /* Adjust the IMAGE elements if needed.  */
986         if (image != WIMLIB_ALL_IMAGES) {
987                 /* We're writing a single image only.  Temporarily unlink all
988                  * other IMAGE elements from the document.  */
989                 for (int i = 0; i < info->image_count; i++)
990                         if (i + 1 != image)
991                                 xml_unlink_node(info->images[i]);
992
993                 /* Temporarily set the INDEX attribute of the needed IMAGE
994                  * element to 1.  */
995                 swap_index_attributes(info->images[0], info->images[image - 1]);
996         }
997
998         /* Adjust (add, change, or remove) the TOTALBYTES element if needed.  */
999         *orig_totalbytes_element_ret = NULL;
1000         if (total_bytes != WIM_TOTALBYTES_USE_EXISTING) {
1001                 /* Unlink the previous TOTALBYTES element, if any.  */
1002                 *orig_totalbytes_element_ret = xml_get_element_by_path(
1003                                                 info->root, T("TOTALBYTES"));
1004                 if (*orig_totalbytes_element_ret)
1005                         xml_unlink_node(*orig_totalbytes_element_ret);
1006
1007                 /* Link in the new TOTALBYTES element, if any.  */
1008                 if (totalbytes_element)
1009                         xml_add_child(info->root, totalbytes_element);
1010         }
1011         return 0;
1012 }
1013
1014 static void
1015 restore_document_after_write(struct wim_xml_info *info, int image,
1016                              struct xml_node *orig_totalbytes_element)
1017 {
1018         /* Restore the IMAGE elements if needed.  */
1019         if (image != WIMLIB_ALL_IMAGES) {
1020                 /* We wrote a single image only.  Re-link all other IMAGE
1021                  * elements to the document.  */
1022                 for (int i = 0; i < info->image_count; i++)
1023                         if (i + 1 != image)
1024                                 xml_add_child(info->root, info->images[i]);
1025
1026                 /* Restore the original INDEX attributes.  */
1027                 swap_index_attributes(info->images[0], info->images[image - 1]);
1028         }
1029
1030         /* Restore the original TOTALBYTES element if needed.  */
1031         if (orig_totalbytes_element)
1032                 xml_replace_child(info->root, orig_totalbytes_element);
1033 }
1034
1035 /*
1036  * Writes the XML data to a WIM file.
1037  *
1038  * 'image' specifies the image(s) to include in the XML data.  Normally it is
1039  * WIMLIB_ALL_IMAGES, but it can also be a 1-based image index.
1040  *
1041  * 'total_bytes' is the number to use in the top-level TOTALBYTES element, or
1042  * WIM_TOTALBYTES_USE_EXISTING to use the existing value from the XML document
1043  * (if any), or WIM_TOTALBYTES_OMIT to omit the TOTALBYTES element entirely.
1044  */
1045 int
1046 write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
1047                    struct wim_reshdr *out_reshdr, int write_resource_flags)
1048 {
1049         struct wim_xml_info *info = wim->xml_info;
1050         int ret;
1051         struct xml_node *orig_totalbytes_element;
1052         struct xml_out_buf buf = {};
1053         const utf16lechar *raw_doc;
1054         size_t raw_doc_size;
1055
1056         /* Make any needed temporary changes to the document.  */
1057         ret = prepare_document_for_write(info, image, total_bytes,
1058                                          &orig_totalbytes_element);
1059         if (ret)
1060                 goto out;
1061
1062         ret = xml_write_document(info->root, &buf);
1063         if (ret)
1064                 goto out_restore_document;
1065
1066         ret = tstr_get_utf16le_and_len(buf.buf, &raw_doc, &raw_doc_size);
1067         if (ret)
1068                 goto out_restore_document;
1069
1070         /* Write the XML data uncompressed.  Although wimlib can handle
1071          * compressed XML data, some other WIM software cannot.  */
1072         ret = write_wim_resource_from_buffer(raw_doc, raw_doc_size,
1073                                              true,
1074                                              &wim->out_fd,
1075                                              WIMLIB_COMPRESSION_TYPE_NONE,
1076                                              0,
1077                                              out_reshdr,
1078                                              NULL,
1079                                              write_resource_flags);
1080         tstr_put_utf16le(raw_doc);
1081 out_restore_document:
1082         /* Revert any temporary changes we made to the document.  */
1083         restore_document_after_write(info, image, orig_totalbytes_element);
1084         FREE(buf.buf);
1085 out:
1086         return ret;
1087 }
1088
1089 /*----------------------------------------------------------------------------*
1090  *                           Library API functions                            *
1091  *----------------------------------------------------------------------------*/
1092
1093 WIMLIBAPI int
1094 wimlib_get_xml_data(WIMStruct *wim, void **buf_ret, size_t *bufsize_ret)
1095 {
1096         const struct wim_reshdr *xml_reshdr;
1097
1098         if (wim->filename == NULL && filedes_is_seekable(&wim->in_fd))
1099                 return WIMLIB_ERR_NO_FILENAME;
1100
1101         if (buf_ret == NULL || bufsize_ret == NULL)
1102                 return WIMLIB_ERR_INVALID_PARAM;
1103
1104         xml_reshdr = &wim->hdr.xml_data_reshdr;
1105
1106         *bufsize_ret = xml_reshdr->uncompressed_size;
1107         return wim_reshdr_to_data(xml_reshdr, wim, buf_ret);
1108 }
1109
1110 WIMLIBAPI int
1111 wimlib_extract_xml_data(WIMStruct *wim, FILE *fp)
1112 {
1113         int ret;
1114         void *buf;
1115         size_t bufsize;
1116
1117         ret = wimlib_get_xml_data(wim, &buf, &bufsize);
1118         if (ret)
1119                 return ret;
1120
1121         if (fwrite(buf, 1, bufsize, fp) != bufsize) {
1122                 ERROR_WITH_ERRNO("Failed to extract XML data");
1123                 ret = WIMLIB_ERR_WRITE;
1124         }
1125         FREE(buf);
1126         return ret;
1127 }
1128
1129 static bool
1130 image_name_in_use(const WIMStruct *wim, const tchar *name, int excluded_image)
1131 {
1132         const struct wim_xml_info *info = wim->xml_info;
1133         const tchar *existing_name;
1134
1135         /* Any number of images can have "no name".  */
1136         if (!name || !*name)
1137                 return false;
1138
1139         /* Check for images that have the specified name.  */
1140         for (int i = 0; i < info->image_count; i++) {
1141                 if (i + 1 == excluded_image)
1142                         continue;
1143                 existing_name = xml_get_text_by_path(info->images[i],
1144                                                      T("NAME"));
1145                 if (existing_name && !tstrcmp(existing_name, name))
1146                         return true;
1147         }
1148         return false;
1149 }
1150
1151 WIMLIBAPI bool
1152 wimlib_image_name_in_use(const WIMStruct *wim, const tchar *name)
1153 {
1154         return image_name_in_use(wim, name, WIMLIB_NO_IMAGE);
1155 }
1156
1157 WIMLIBAPI const tchar *
1158 wimlib_get_image_name(const WIMStruct *wim, int image)
1159 {
1160         const struct wim_xml_info *info = wim->xml_info;
1161         const tchar *name;
1162
1163         if (image < 1 || image > info->image_count)
1164                 return NULL;
1165         name = wimlib_get_image_property(wim, image, T("NAME"));
1166         return name ? name : T("");
1167 }
1168
1169 WIMLIBAPI const tchar *
1170 wimlib_get_image_description(const WIMStruct *wim, int image)
1171 {
1172         return wimlib_get_image_property(wim, image, T("DESCRIPTION"));
1173 }
1174
1175 WIMLIBAPI const tchar *
1176 wimlib_get_image_property(const WIMStruct *wim, int image,
1177                           const tchar *property_name)
1178 {
1179         const struct wim_xml_info *info = wim->xml_info;
1180
1181         if (!property_name || !*property_name)
1182                 return NULL;
1183         if (image < 1 || image > info->image_count)
1184                 return NULL;
1185         return xml_get_text_by_path(info->images[image - 1], property_name);
1186 }
1187
1188 WIMLIBAPI int
1189 wimlib_set_image_name(WIMStruct *wim, int image, const tchar *name)
1190 {
1191         return wimlib_set_image_property(wim, image, T("NAME"), name);
1192 }
1193
1194 WIMLIBAPI int
1195 wimlib_set_image_descripton(WIMStruct *wim, int image, const tchar *description)
1196 {
1197         return wimlib_set_image_property(wim, image, T("DESCRIPTION"),
1198                                          description);
1199 }
1200
1201 WIMLIBAPI int
1202 wimlib_set_image_flags(WIMStruct *wim, int image, const tchar *flags)
1203 {
1204         return wimlib_set_image_property(wim, image, T("FLAGS"), flags);
1205 }
1206
1207 WIMLIBAPI int
1208 wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
1209                           const tchar *property_value)
1210 {
1211         struct wim_xml_info *info = wim->xml_info;
1212
1213         if (!property_name || !*property_name)
1214                 return WIMLIB_ERR_INVALID_PARAM;
1215
1216         if (!xml_legal_path(property_name)) {
1217                 ERROR("Property name '%"TS"' is illegal in XML", property_name);
1218                 return WIMLIB_ERR_INVALID_PARAM;
1219         }
1220
1221         if (property_value && !xml_legal_value(property_value)) {
1222                 WARNING("Value of property '%"TS"' contains illegal characters",
1223                         property_name);
1224                 return WIMLIB_ERR_INVALID_PARAM;
1225         }
1226
1227         if (image < 1 || image > info->image_count)
1228                 return WIMLIB_ERR_INVALID_IMAGE;
1229
1230         if (!tstrcmp(property_name, T("NAME")) &&
1231             image_name_in_use(wim, property_value, image))
1232                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1233
1234         return xml_set_text_by_path(info->images[image - 1], property_name,
1235                                     property_value);
1236 }