]> wimlib.net Git - wimlib/blob - src/xml.c
3194978b015289df871daf5af8b828ae0dbe7d41
[wimlib] / src / xml.c
1 /*
2  * xml.c - deals with the XML information in WIM files
3  */
4
5 /*
6  * Copyright 2012-2023 Eric Biggers
7  *
8  * This file is free software; you can redistribute it and/or modify it under
9  * the terms of the GNU Lesser General Public License as published by the Free
10  * Software Foundation; either version 3 of the License, or (at your option) any
11  * later version.
12  *
13  * This file is distributed in the hope that it will be useful, but WITHOUT
14  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU Lesser General Public License
19  * along with this file; if not, see http://www.gnu.org/licenses/.
20  */
21
22 #ifdef HAVE_CONFIG_H
23 #  include "config.h"
24 #endif
25
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "wimlib/blob_table.h"
30 #include "wimlib/dentry.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/metadata.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/timestamp.h"
37 #include "wimlib/xml.h"
38 #include "wimlib/xmlproc.h"
39 #include "wimlib/write.h"
40
41 /*
42  * A wrapper around a WIM file's XML document.  The XML document contains
43  * metadata about each image in the WIM file as well as metadata about the WIM
44  * file itself.
45  */
46 struct wim_xml_info {
47
48         /* The XML document in tree form */
49         struct xml_node *root;
50
51         /* A malloc()ed array containing a pointer to the IMAGE element for each
52          * WIM image.  The image with 1-based index 'i' is at index 'i - 1' in
53          * this array.  Note: these pointers are cached values, since they could
54          * also be found by searching the document.  */
55         struct xml_node **images;
56
57         /* The number of WIM images (the length of 'images')  */
58         int image_count;
59 };
60
61 static u64
62 parse_number(const tchar *str, int base)
63 {
64         tchar *end;
65         unsigned long long v;
66
67         if (!str)
68                 return 0;
69         v = tstrtoull(str, &end, base);
70         if (end == str || *end || v >= UINT64_MAX)
71                 return 0;
72         return v;
73 }
74
75 /*
76  * Retrieve an unsigned integer from the contents of the specified element,
77  * decoding it using the specified base.  If the element has no contents or does
78  * not contain a valid number, returns 0.
79  */
80 static u64
81 xml_element_get_number(const struct xml_node *element, int base)
82 {
83         return parse_number(xml_element_get_text(element), base);
84 }
85
86 /*
87  * Retrieve the timestamp from a time element.  This element should have child
88  * elements HIGHPART and LOWPART; these elements will be used to construct a
89  * Windows-style timestamp.
90  */
91 static u64
92 xml_element_get_timestamp(const struct xml_node *element)
93 {
94         u64 timestamp = 0;
95         const struct xml_node *child;
96
97         xml_node_for_each_child(element, child) {
98                 if (xml_node_is_element(child, T("HIGHPART")))
99                         timestamp |= xml_element_get_number(child, 16) << 32;
100                 else if (xml_node_is_element(child, T("LOWPART")))
101                         timestamp |= xml_element_get_number(child, 16);
102         }
103         return timestamp;
104 }
105
106 /* Create a new timestamp element and optionally link it into a tree.  */
107 static struct xml_node *
108 xml_new_element_with_timestamp(struct xml_node *parent, const tchar *name,
109                                u64 timestamp)
110 {
111         struct xml_node *element;
112         tchar buf[32];
113
114         element = xml_new_element(NULL, name);
115         if (!element)
116                 goto err;
117
118         tsprintf(buf, T("0x%08"PRIX32), (u32)(timestamp >> 32));
119         if (!xml_new_element_with_text(element, T("HIGHPART"), buf))
120                 goto err;
121
122         tsprintf(buf, T("0x%08"PRIX32), (u32)timestamp);
123         if (!xml_new_element_with_text(element, T("LOWPART"), buf))
124                 goto err;
125
126         if (parent)
127                 xml_add_child(parent, element);
128         return element;
129
130 err:
131         xml_free_node(element);
132         return NULL;
133 }
134
135 /* Create a new number element and optionally link it into a tree.  */
136 static struct xml_node *
137 xml_new_element_with_u64(struct xml_node *parent, const tchar *name, u64 value)
138 {
139         tchar buf[32];
140
141         tsprintf(buf, T("%"PRIu64), value);
142         return xml_new_element_with_text(parent, name, buf);
143 }
144
145 static bool
146 parse_index(tchar **pp, u32 *index_ret)
147 {
148         tchar *p = *pp;
149         u32 index = 0;
150
151         *p++ = '\0'; /* overwrite '[' */
152         while (*p >= '0' && *p <= '9') {
153                 u32 n = (index * 10) + (*p++ - '0');
154                 if (n < index)
155                         return false;
156                 index = n;
157         }
158         if (index == 0)
159                 return false;
160         if (*p != ']')
161                 return false;
162         p++;
163         if (*p != '/' && *p != '\0')
164                 return false;
165
166         *pp = p;
167         *index_ret = index;
168         return true;
169 }
170
171 static int
172 do_xml_path_walk(struct xml_node *element, const tchar *path, bool create,
173                  struct xml_node **result_ret)
174 {
175         size_t n = tstrlen(path) + 1;
176         tchar buf[n];
177         tchar *p;
178         tchar c;
179
180         *result_ret = NULL;
181
182         if (!element)
183                 return 0;
184
185         /* Copy the path to a temporary buffer.  */
186         tmemcpy(buf, path, n);
187         p = buf;
188
189         if (*p == '/')
190                 goto bad_syntax;
191         c = *p;
192
193         while (c != '\0') {
194                 const tchar *name;
195                 struct xml_node *child;
196                 u32 index = 1;
197
198                 /* We have another path component.  */
199
200                 /* Parse the element name.  */
201                 name = p;
202                 while (*p != '/' && *p != '\0' && *p != '[')
203                         p++;
204                 if (p == name) /* empty name?  */
205                         goto bad_syntax;
206
207                 /* Handle a bracketed index, if one was specified.  */
208                 if (*p == '[' && !parse_index(&p, &index))
209                         goto bad_syntax;
210
211                 c = *p;
212                 *p = '\0';
213
214                 /* Look for a matching child.  */
215                 xml_node_for_each_child(element, child)
216                         if (xml_node_is_element(child, name) && !--index)
217                                 goto next_step;
218
219                 /* No child matched the path.  If create=false, the lookup
220                  * failed.  If create=true, create the needed element.  */
221                 if (!create)
222                         return 0;
223
224                 /* We can't create an element at index 'n' if indices 1...n-1
225                  * didn't already exist.  */
226                 if (index != 1)
227                         return WIMLIB_ERR_INVALID_PARAM;
228
229                 child = xml_new_element(element, name);
230                 if (!child)
231                         return WIMLIB_ERR_NOMEM;
232         next_step:
233                 /* Continue to the next path component, if there is one.  */
234                 element = child;
235                 p++;
236         }
237
238         *result_ret = element;
239         return 0;
240
241 bad_syntax:
242         ERROR("The XML path \"%"TS"\" has invalid syntax.", path);
243         return WIMLIB_ERR_INVALID_PARAM;
244 }
245
246 /* Retrieve the XML element, if any, at the specified 'path'.  This supports a
247  * simple filesystem-like syntax.  If the element was found, returns a pointer
248  * to it; otherwise returns NULL.  */
249 static struct xml_node *
250 xml_get_element_by_path(struct xml_node *root, const tchar *path)
251 {
252         struct xml_node *element;
253
254         do_xml_path_walk(root, path, false, &element);
255         return element;
256 }
257
258 /*
259  * Similar to xml_get_element_by_path(), but creates the element and any
260  * requisite ancestor elements as needed.   If successful, 0 is returned and
261  * *element_ret is set to a pointer to the resulting element.  If unsuccessful,
262  * an error code is returned and *element_ret is set to NULL.
263  */
264 static int
265 xml_ensure_element_by_path(struct xml_node *root, const tchar *path,
266                            struct xml_node **element_ret)
267 {
268         return do_xml_path_walk(root, path, true, element_ret);
269 }
270
271 static u64
272 xml_get_number_by_path(struct xml_node *root, const tchar *path)
273 {
274         return xml_element_get_number(xml_get_element_by_path(root, path), 10);
275 }
276
277 static u64
278 xml_get_timestamp_by_path(struct xml_node *root, const tchar *path)
279 {
280         return xml_element_get_timestamp(xml_get_element_by_path(root, path));
281 }
282
283 static const tchar *
284 xml_get_text_by_path(struct xml_node *root, const tchar *path)
285 {
286         return xml_element_get_text(xml_get_element_by_path(root, path));
287 }
288
289 /*
290  * Create/replace (if text is not NULL and not empty) or remove (if text is NULL
291  * or empty) an element containing text.
292  */
293 static int
294 xml_set_text_by_path(struct xml_node *root, const tchar *path, const tchar *text)
295 {
296         int ret;
297         struct xml_node *element;
298
299         if (text && *text) {
300                 /* Create or replace  */
301                 ret = xml_ensure_element_by_path(root, path, &element);
302                 if (ret)
303                         return ret;
304                 return xml_element_set_text(element, text);
305         } else {
306                 /* Remove  */
307                 element = xml_get_element_by_path(root, path);
308                 if (element)
309                         xml_free_node(element);
310                 return 0;
311         }
312 }
313
314 /* Unlink and return the node which represents the INDEX attribute of the
315  * specified IMAGE element.  */
316 static struct xml_node *
317 unlink_index_attribute(struct xml_node *image_node)
318 {
319         struct xml_node *attr = xml_get_attrib(image_node, T("INDEX"));
320
321         xml_unlink_node(attr);
322         return attr;
323 }
324
325 /* Compute the total uncompressed size of the streams of the specified inode. */
326 static u64
327 inode_sum_stream_sizes(const struct wim_inode *inode,
328                        const struct blob_table *blob_table)
329 {
330         u64 total_size = 0;
331
332         for (unsigned i = 0; i < inode->i_num_streams; i++) {
333                 const struct blob_descriptor *blob;
334
335                 blob = stream_blob(&inode->i_streams[i], blob_table);
336                 if (blob)
337                         total_size += blob->size;
338         }
339         return total_size;
340 }
341
342 static int
343 append_image_node(struct wim_xml_info *info, struct xml_node *image_node)
344 {
345         tchar buf[32];
346         struct xml_node **images;
347         int ret;
348
349         /* Limit exceeded?  */
350         if (unlikely(info->image_count >= MAX_IMAGES))
351                 return WIMLIB_ERR_IMAGE_COUNT;
352
353         /* Set the INDEX attribute. */
354         tsprintf(buf, T("%d"), info->image_count + 1);
355         ret = xml_set_attrib(image_node, T("INDEX"), buf);
356         if (ret)
357                 return ret;
358
359         /* Append the IMAGE element to the 'images' array.  */
360         images = REALLOC(info->images,
361                          (info->image_count + 1) * sizeof(info->images[0]));
362         if (unlikely(!images))
363                 return WIMLIB_ERR_NOMEM;
364         info->images = images;
365         images[info->image_count++] = image_node;
366
367         /* Add the IMAGE element to the document.  */
368         xml_add_child(info->root, image_node);
369         return 0;
370 }
371
372 /*----------------------------------------------------------------------------*
373  *                     Functions for internal library use                     *
374  *----------------------------------------------------------------------------*/
375
376 /* Allocate an empty 'struct wim_xml_info', containing no images.  */
377 struct wim_xml_info *
378 xml_new_info_struct(void)
379 {
380         struct wim_xml_info *info = CALLOC(1, sizeof(*info));
381
382         if (!info)
383                 return NULL;
384
385         info->root = xml_new_element(NULL, T("WIM"));
386         if (!info->root) {
387                 FREE(info);
388                 return NULL;
389         }
390         return info;
391 }
392
393 /* Free a 'struct wim_xml_info'.  */
394 void
395 xml_free_info_struct(struct wim_xml_info *info)
396 {
397         if (info) {
398                 xml_free_node(info->root);
399                 FREE(info->images);
400                 FREE(info);
401         }
402 }
403
404 /* Retrieve the number of images for which there exist IMAGE elements in the XML
405  * document.  */
406 int
407 xml_get_image_count(const struct wim_xml_info *info)
408 {
409         return info->image_count;
410 }
411
412 /* Retrieve the TOTALBYTES value for the WIM file, or 0 if this value is
413  * unavailable.  */
414 u64
415 xml_get_total_bytes(const struct wim_xml_info *info)
416 {
417         return xml_get_number_by_path(info->root, T("TOTALBYTES"));
418 }
419
420 /* Retrieve the TOTALBYTES value for the specified image, or 0 if this value is
421  * unavailable.  */
422 u64
423 xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
424 {
425         return xml_get_number_by_path(info->images[image - 1], T("TOTALBYTES"));
426 }
427
428 /* Retrieve the HARDLINKBYTES value for the specified image, or 0 if this value
429  * is unavailable.  */
430 u64
431 xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
432 {
433         return xml_get_number_by_path(info->images[image - 1],
434                                       T("HARDLINKBYTES"));
435 }
436
437 /* Retrieve the WIMBOOT value for the specified image, or false if this value is
438  * unavailable.  */
439 bool
440 xml_get_wimboot(const struct wim_xml_info *info, int image)
441 {
442         return xml_get_number_by_path(info->images[image - 1], T("WIMBOOT"));
443 }
444
445 /* Retrieve the Windows build number for the specified image, or 0 if this
446  * information is not available.  */
447 u64
448 xml_get_windows_build_number(const struct wim_xml_info *info, int image)
449 {
450         return xml_get_number_by_path(info->images[image - 1],
451                                       T("WINDOWS/VERSION/BUILD"));
452 }
453
454 /* Set the WIMBOOT value for the specified image.  */
455 int
456 xml_set_wimboot(struct wim_xml_info *info, int image)
457 {
458         return xml_set_text_by_path(info->images[image - 1],
459                                     T("WIMBOOT"), T("1"));
460 }
461
462 /*
463  * Update the DIRCOUNT, FILECOUNT, TOTALBYTES, HARDLINKBYTES, and
464  * LASTMODIFICATIONTIME elements for the specified WIM image.
465  *
466  * Note: since these stats are likely to be used for display purposes only, we
467  * no longer attempt to duplicate WIMGAPI's weird bugs when calculating them.
468  */
469 int
470 xml_update_image_info(WIMStruct *wim, int image)
471 {
472         const struct wim_image_metadata *imd = wim->image_metadata[image - 1];
473         struct xml_node *image_node = wim->xml_info->images[image - 1];
474         const struct wim_inode *inode;
475         u64 dir_count = 0;
476         u64 file_count = 0;
477         u64 total_bytes = 0;
478         u64 hard_link_bytes = 0;
479         u64 size;
480         struct xml_node *dircount_node;
481         struct xml_node *filecount_node;
482         struct xml_node *totalbytes_node;
483         struct xml_node *hardlinkbytes_node;
484         struct xml_node *lastmodificationtime_node;
485
486         image_for_each_inode(inode, imd) {
487                 if (inode_is_directory(inode))
488                         dir_count += inode->i_nlink;
489                 else
490                         file_count += inode->i_nlink;
491                 size = inode_sum_stream_sizes(inode, wim->blob_table);
492                 total_bytes += size * inode->i_nlink;
493                 hard_link_bytes += size * (inode->i_nlink - 1);
494         }
495
496         dircount_node = xml_new_element_with_u64(NULL, T("DIRCOUNT"),
497                                                  dir_count);
498         filecount_node = xml_new_element_with_u64(NULL, T("FILECOUNT"),
499                                                   file_count);
500         totalbytes_node = xml_new_element_with_u64(NULL, T("TOTALBYTES"),
501                                                    total_bytes);
502         hardlinkbytes_node = xml_new_element_with_u64(NULL, T("HARDLINKBYTES"),
503                                                       hard_link_bytes);
504         lastmodificationtime_node = xml_new_element_with_timestamp(NULL,
505                         T("LASTMODIFICATIONTIME"), now_as_wim_timestamp());
506
507         if (unlikely(!dircount_node || !filecount_node || !totalbytes_node ||
508                      !hardlinkbytes_node || !lastmodificationtime_node)) {
509                 xml_free_node(dircount_node);
510                 xml_free_node(filecount_node);
511                 xml_free_node(totalbytes_node);
512                 xml_free_node(hardlinkbytes_node);
513                 xml_free_node(lastmodificationtime_node);
514                 return WIMLIB_ERR_NOMEM;
515         }
516
517         xml_replace_child(image_node, dircount_node);
518         xml_replace_child(image_node, filecount_node);
519         xml_replace_child(image_node, totalbytes_node);
520         xml_replace_child(image_node, hardlinkbytes_node);
521         xml_replace_child(image_node, lastmodificationtime_node);
522         return 0;
523 }
524
525 /* Add an image to the XML information. */
526 int
527 xml_add_image(struct wim_xml_info *info, const tchar *name)
528 {
529         const u64 now = now_as_wim_timestamp();
530         struct xml_node *image_node;
531         int ret;
532
533         if (name && !xml_legal_value(name)) {
534                 ERROR("Name of new image contains illegal characters");
535                 return WIMLIB_ERR_INVALID_PARAM;
536         }
537
538         ret = WIMLIB_ERR_NOMEM;
539         image_node = xml_new_element(NULL, T("IMAGE"));
540         if (!image_node)
541                 goto err;
542         if (name && *name &&
543             !xml_new_element_with_text(image_node, T("NAME"), name))
544                 goto err;
545         if (!xml_new_element_with_u64(image_node, T("DIRCOUNT"), 0))
546                 goto err;
547         if (!xml_new_element_with_u64(image_node, T("FILECOUNT"), 0))
548                 goto err;
549         if (!xml_new_element_with_u64(image_node, T("TOTALBYTES"), 0))
550                 goto err;
551         if (!xml_new_element_with_u64(image_node, T("HARDLINKBYTES"), 0))
552                 goto err;
553         if (!xml_new_element_with_timestamp(image_node, T("CREATIONTIME"), now))
554                 goto err;
555         if (!xml_new_element_with_timestamp(image_node,
556                                             T("LASTMODIFICATIONTIME"), now))
557                 goto err;
558         ret = append_image_node(info, image_node);
559         if (ret)
560                 goto err;
561         return 0;
562
563 err:
564         xml_free_node(image_node);
565         return ret;
566 }
567
568 /*
569  * Make a copy of the XML information for the image with index @src_image in the
570  * @src_info XML document and append it to the @dest_info XML document.
571  *
572  * In the process, change the image's name and description to the values
573  * specified by @dest_image_name and @dest_image_description.  Either or both
574  * may be NULL, which indicates that the corresponding element will not be
575  * included in the destination image.
576  */
577 int
578 xml_export_image(const struct wim_xml_info *src_info, int src_image,
579                  struct wim_xml_info *dest_info, const tchar *dest_image_name,
580                  const tchar *dest_image_description, bool wimboot)
581 {
582         struct xml_node *dest_node;
583         int ret;
584
585         if (dest_image_name && !xml_legal_value(dest_image_name)) {
586                 ERROR("Destination image name contains illegal characters");
587                 return WIMLIB_ERR_INVALID_PARAM;
588         }
589         if (dest_image_description &&
590             !xml_legal_value(dest_image_description)) {
591                 ERROR("Destination image description contains illegal characters");
592                 return WIMLIB_ERR_INVALID_PARAM;
593         }
594
595         ret = WIMLIB_ERR_NOMEM;
596         dest_node = xml_clone_tree(src_info->images[src_image - 1]);
597         if (!dest_node)
598                 goto err;
599
600         ret = xml_set_text_by_path(dest_node, T("NAME"), dest_image_name);
601         if (ret)
602                 goto err;
603
604         ret = xml_set_text_by_path(dest_node, T("DESCRIPTION"),
605                                    dest_image_description);
606         if (ret)
607                 goto err;
608
609         if (wimboot) {
610                 ret = xml_set_text_by_path(dest_node, T("WIMBOOT"), T("1"));
611                 if (ret)
612                         goto err;
613         }
614
615         ret = append_image_node(dest_info, dest_node);
616         if (ret)
617                 goto err;
618         return 0;
619
620 err:
621         xml_free_node(dest_node);
622         return ret;
623 }
624
625 /* Remove the specified image from the XML document.  */
626 void
627 xml_delete_image(struct wim_xml_info *info, int image)
628 {
629         struct xml_node *next_image;
630         struct xml_node *index_attr, *next_index_attr;
631
632         /* Free the IMAGE element for the deleted image.  Then, shift all
633          * higher-indexed IMAGE elements down by 1, in the process re-assigning
634          * their INDEX attributes.  */
635
636         next_image = info->images[image - 1];
637         next_index_attr = unlink_index_attribute(next_image);
638         xml_free_node(next_image);
639
640         while (image < info->image_count) {
641                 index_attr = next_index_attr;
642                 next_image = info->images[image];
643                 next_index_attr = unlink_index_attribute(next_image);
644                 xml_add_child(next_image, index_attr);
645                 info->images[image - 1] = next_image;
646                 image++;
647         }
648
649         xml_free_node(next_index_attr);
650         info->image_count--;
651 }
652
653 /* Architecture constants are from w64 mingw winnt.h  */
654 #define PROCESSOR_ARCHITECTURE_INTEL            0
655 #define PROCESSOR_ARCHITECTURE_MIPS             1
656 #define PROCESSOR_ARCHITECTURE_ALPHA            2
657 #define PROCESSOR_ARCHITECTURE_PPC              3
658 #define PROCESSOR_ARCHITECTURE_SHX              4
659 #define PROCESSOR_ARCHITECTURE_ARM              5
660 #define PROCESSOR_ARCHITECTURE_IA64             6
661 #define PROCESSOR_ARCHITECTURE_ALPHA64          7
662 #define PROCESSOR_ARCHITECTURE_MSIL             8
663 #define PROCESSOR_ARCHITECTURE_AMD64            9
664 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64    10
665 #define PROCESSOR_ARCHITECTURE_ARM64            12
666
667 static const tchar *
668 describe_arch(u64 arch)
669 {
670         static const tchar * const descriptions[] = {
671                 [PROCESSOR_ARCHITECTURE_INTEL] = T("x86"),
672                 [PROCESSOR_ARCHITECTURE_MIPS]  = T("MIPS"),
673                 [PROCESSOR_ARCHITECTURE_ARM]   = T("ARM"),
674                 [PROCESSOR_ARCHITECTURE_IA64]  = T("ia64"),
675                 [PROCESSOR_ARCHITECTURE_AMD64] = T("x86_64"),
676                 [PROCESSOR_ARCHITECTURE_ARM64] = T("ARM64"),
677         };
678
679         if (arch < ARRAY_LEN(descriptions) && descriptions[arch] != NULL)
680                 return descriptions[arch];
681
682         return T("unknown");
683 }
684
685 /* Print information from the WINDOWS element, if present.  */
686 static void
687 print_windows_info(struct xml_node *image_node)
688 {
689         struct xml_node *windows_node;
690         struct xml_node *langs_node;
691         struct xml_node *version_node;
692         const tchar *text;
693
694         windows_node = xml_get_element_by_path(image_node, T("WINDOWS"));
695         if (!windows_node)
696                 return;
697
698         tprintf(T("Architecture:           %"TS"\n"),
699                 describe_arch(xml_get_number_by_path(windows_node, T("ARCH"))));
700
701         text = xml_get_text_by_path(windows_node, T("PRODUCTNAME"));
702         if (text)
703                 tprintf(T("Product Name:           %"TS"\n"), text);
704
705         text = xml_get_text_by_path(windows_node, T("EDITIONID"));
706         if (text)
707                 tprintf(T("Edition ID:             %"TS"\n"), text);
708
709         text = xml_get_text_by_path(windows_node, T("INSTALLATIONTYPE"));
710         if (text)
711                 tprintf(T("Installation Type:      %"TS"\n"), text);
712
713         text = xml_get_text_by_path(windows_node, T("HAL"));
714         if (text)
715                 tprintf(T("HAL:                    %"TS"\n"), text);
716
717         text = xml_get_text_by_path(windows_node, T("PRODUCTTYPE"));
718         if (text)
719                 tprintf(T("Product Type:           %"TS"\n"), text);
720
721         text = xml_get_text_by_path(windows_node, T("PRODUCTSUITE"));
722         if (text)
723                 tprintf(T("Product Suite:          %"TS"\n"), text);
724
725         langs_node = xml_get_element_by_path(windows_node, T("LANGUAGES"));
726         if (langs_node) {
727                 struct xml_node *lang_node;
728
729                 tprintf(T("Languages:              "));
730                 xml_node_for_each_child(langs_node, lang_node) {
731                         if (!xml_node_is_element(lang_node, T("LANGUAGE")))
732                                 continue;
733                         text = xml_element_get_text(lang_node);
734                         if (!text)
735                                 continue;
736                         tprintf(T("%"TS" "), text);
737                 }
738                 tputchar(T('\n'));
739
740                 text = xml_get_text_by_path(langs_node, T("DEFAULT"));
741                 if (text)
742                         tprintf(T("Default Language:       %"TS"\n"), text);
743         }
744
745         text = xml_get_text_by_path(windows_node, T("SYSTEMROOT"));
746         if (text)
747                 tprintf(T("System Root:            %"TS"\n"), text);
748
749         version_node = xml_get_element_by_path(windows_node, T("VERSION"));
750         if (version_node) {
751                 tprintf(T("Major Version:          %"PRIu64"\n"),
752                         xml_get_number_by_path(version_node, T("MAJOR")));
753                 tprintf(T("Minor Version:          %"PRIu64"\n"),
754                         xml_get_number_by_path(version_node, T("MINOR")));
755                 tprintf(T("Build:                  %"PRIu64"\n"),
756                         xml_get_number_by_path(version_node, T("BUILD")));
757                 tprintf(T("Service Pack Build:     %"PRIu64"\n"),
758                         xml_get_number_by_path(version_node, T("SPBUILD")));
759                 tprintf(T("Service Pack Level:     %"PRIu64"\n"),
760                         xml_get_number_by_path(version_node, T("SPLEVEL")));
761         }
762 }
763
764 /* Prints information about the specified image.  */
765 void
766 xml_print_image_info(struct wim_xml_info *info, int image)
767 {
768         struct xml_node * const image_node = info->images[image - 1];
769         const tchar *text;
770         tchar timebuf[64];
771
772         tprintf(T("Index:                  %d\n"), image);
773
774         /* Always print the Name and Description, even if the corresponding XML
775          * elements are not present.  */
776         text = xml_get_text_by_path(image_node, T("NAME"));
777         tprintf(T("Name:                   %"TS"\n"), text ? text : T(""));
778         text = xml_get_text_by_path(image_node, T("DESCRIPTION"));
779         tprintf(T("Description:            %"TS"\n"), text ? text : T(""));
780
781         text = xml_get_text_by_path(image_node, T("DISPLAYNAME"));
782         if (text)
783                 tprintf(T("Display Name:           %"TS"\n"), text);
784
785         text = xml_get_text_by_path(image_node, T("DISPLAYDESCRIPTION"));
786         if (text)
787                 tprintf(T("Display Description:    %"TS"\n"), text);
788
789         tprintf(T("Directory Count:        %"PRIu64"\n"),
790                 xml_get_number_by_path(image_node, T("DIRCOUNT")));
791
792         tprintf(T("File Count:             %"PRIu64"\n"),
793                 xml_get_number_by_path(image_node, T("FILECOUNT")));
794
795         tprintf(T("Total Bytes:            %"PRIu64"\n"),
796                 xml_get_number_by_path(image_node, T("TOTALBYTES")));
797
798         tprintf(T("Hard Link Bytes:        %"PRIu64"\n"),
799                 xml_get_number_by_path(image_node, T("HARDLINKBYTES")));
800
801         wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
802                                                        T("CREATIONTIME")),
803                              timebuf, ARRAY_LEN(timebuf));
804         tprintf(T("Creation Time:          %"TS"\n"), timebuf);
805
806         wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
807                                         T("LASTMODIFICATIONTIME")),
808                                         timebuf, ARRAY_LEN(timebuf));
809         tprintf(T("Last Modification Time: %"TS"\n"), timebuf);
810
811         print_windows_info(image_node);
812
813         text = xml_get_text_by_path(image_node, T("FLAGS"));
814         if (text)
815                 tprintf(T("Flags:                  %"TS"\n"), text);
816
817         tprintf(T("WIMBoot compatible:     %"TS"\n"),
818                 xml_get_number_by_path(image_node, T("WIMBOOT")) ?
819                         T("yes") : T("no"));
820
821         tputchar('\n');
822 }
823
824 /*----------------------------------------------------------------------------*
825  *                      Reading and writing the XML data                      *
826  *----------------------------------------------------------------------------*/
827
828 static int
829 image_element_get_index(struct xml_node *element)
830 {
831         struct xml_node *attrib = xml_get_attrib(element, T("INDEX"));
832
833         if (!attrib)
834                 return 0;
835         return min(INT_MAX, parse_number(attrib->value, 10));
836 }
837
838 /* Prepare the 'images' array from the XML document tree.  */
839 static int
840 setup_images(struct wim_xml_info *info, struct xml_node *root)
841 {
842         struct xml_node *child;
843         int index;
844         int max_index = 0;
845         int ret;
846
847         xml_node_for_each_child(root, child) {
848                 if (!xml_node_is_element(child, T("IMAGE")))
849                         continue;
850                 index = image_element_get_index(child);
851                 if (unlikely(index < 1 || info->image_count >= MAX_IMAGES))
852                         goto err_indices;
853                 max_index = max(max_index, index);
854                 info->image_count++;
855         }
856         if (unlikely(max_index != info->image_count))
857                 goto err_indices;
858         ret = WIMLIB_ERR_NOMEM;
859         info->images = CALLOC(info->image_count, sizeof(info->images[0]));
860         if (unlikely(!info->images))
861                 goto err;
862         xml_node_for_each_child(root, child) {
863                 if (!xml_node_is_element(child, T("IMAGE")))
864                         continue;
865                 index = image_element_get_index(child);
866                 if (unlikely(info->images[index - 1]))
867                         goto err_indices;
868                 info->images[index - 1] = child;
869         }
870         return 0;
871
872 err_indices:
873         ERROR("The WIM file's XML document does not contain exactly one IMAGE "
874               "element per image!");
875         ret = WIMLIB_ERR_XML;
876 err:
877         FREE(info->images);
878         return ret;
879 }
880
881 static int
882 parse_wim_xml_document(const utf16lechar *raw_doc, size_t raw_doc_size,
883                        struct xml_node **root_ret)
884 {
885         tchar *doc;
886         int ret;
887
888         ret = utf16le_to_tstr(raw_doc, raw_doc_size, &doc, NULL);
889         if (ret)
890                 return ret;
891         ret = xml_parse_document(doc, root_ret);
892         FREE(doc);
893         return ret;
894 }
895
896 /* Reads the XML data from a WIM file.  */
897 int
898 read_wim_xml_data(WIMStruct *wim)
899 {
900         struct wim_xml_info *info;
901         void *raw_doc;
902         size_t raw_doc_size;
903         struct xml_node *root;
904         int ret;
905
906         /* Allocate the 'struct wim_xml_info'.  */
907         ret = WIMLIB_ERR_NOMEM;
908         info = CALLOC(1, sizeof(*info));
909         if (!info)
910                 goto err;
911
912         /* Read the raw UTF-16LE XML document.  */
913         ret = wimlib_get_xml_data(wim, &raw_doc, &raw_doc_size);
914         if (ret)
915                 goto err;
916
917         /* Parse the document, creating the document tree.  */
918         ret = parse_wim_xml_document(raw_doc, raw_doc_size, &info->root);
919         FREE(raw_doc);
920         raw_doc = NULL;
921         if (ret) {
922                 if (ret != WIMLIB_ERR_NOMEM)
923                         ret = WIMLIB_ERR_XML;
924                 ERROR("Unable to parse the WIM file's XML document!");
925                 goto err;
926         }
927         root = info->root;
928
929         /* Verify the root element.  */
930         if (!xml_node_is_element(root, T("WIM"))) {
931                 ERROR("The WIM file's XML document has an unexpected format!");
932                 ret = WIMLIB_ERR_XML;
933                 goto err;
934         }
935
936         /* Verify the WIM file is not encrypted.  */
937         if (xml_get_element_by_path(root, T("ESD/ENCRYPTED"))) {
938                 ret = WIMLIB_ERR_WIM_IS_ENCRYPTED;
939                 goto err;
940         }
941
942         /* Validate the image elements and set up the images[] array.  */
943         ret = setup_images(info, root);
944         if (ret)
945                 goto err;
946
947         /* Success!  */
948         wim->xml_info = info;
949         return 0;
950
951 err:
952         xml_free_info_struct(info);
953         return ret;
954 }
955
956 /* Swap the INDEX attributes of two IMAGE elements.  */
957 static void
958 swap_index_attributes(struct xml_node *image_element_1,
959                       struct xml_node *image_element_2)
960 {
961         struct xml_node *attr_1, *attr_2;
962
963         if (image_element_1 != image_element_2) {
964                 attr_1 = unlink_index_attribute(image_element_1);
965                 attr_2 = unlink_index_attribute(image_element_2);
966                 xml_add_child(image_element_1, attr_2);
967                 xml_add_child(image_element_2, attr_1);
968         }
969 }
970
971 static int
972 prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes,
973                            struct xml_node **orig_totalbytes_element_ret)
974 {
975         struct xml_node *totalbytes_element = NULL;
976
977         /* Allocate the new TOTALBYTES element if needed.  */
978         if (total_bytes != WIM_TOTALBYTES_USE_EXISTING &&
979             total_bytes != WIM_TOTALBYTES_OMIT) {
980                 totalbytes_element = xml_new_element_with_u64(
981                                         NULL, T("TOTALBYTES"), total_bytes);
982                 if (!totalbytes_element)
983                         return WIMLIB_ERR_NOMEM;
984         }
985
986         /* Adjust the IMAGE elements if needed.  */
987         if (image != WIMLIB_ALL_IMAGES) {
988                 /* We're writing a single image only.  Temporarily unlink all
989                  * other IMAGE elements from the document.  */
990                 for (int i = 0; i < info->image_count; i++)
991                         if (i + 1 != image)
992                                 xml_unlink_node(info->images[i]);
993
994                 /* Temporarily set the INDEX attribute of the needed IMAGE
995                  * element to 1.  */
996                 swap_index_attributes(info->images[0], info->images[image - 1]);
997         }
998
999         /* Adjust (add, change, or remove) the TOTALBYTES element if needed.  */
1000         *orig_totalbytes_element_ret = NULL;
1001         if (total_bytes != WIM_TOTALBYTES_USE_EXISTING) {
1002                 /* Unlink the previous TOTALBYTES element, if any.  */
1003                 *orig_totalbytes_element_ret = xml_get_element_by_path(
1004                                                 info->root, T("TOTALBYTES"));
1005                 if (*orig_totalbytes_element_ret)
1006                         xml_unlink_node(*orig_totalbytes_element_ret);
1007
1008                 /* Link in the new TOTALBYTES element, if any.  */
1009                 if (totalbytes_element)
1010                         xml_add_child(info->root, totalbytes_element);
1011         }
1012         return 0;
1013 }
1014
1015 static void
1016 restore_document_after_write(struct wim_xml_info *info, int image,
1017                              struct xml_node *orig_totalbytes_element)
1018 {
1019         /* Restore the IMAGE elements if needed.  */
1020         if (image != WIMLIB_ALL_IMAGES) {
1021                 /* We wrote a single image only.  Re-link all other IMAGE
1022                  * elements to the document.  */
1023                 for (int i = 0; i < info->image_count; i++)
1024                         if (i + 1 != image)
1025                                 xml_add_child(info->root, info->images[i]);
1026
1027                 /* Restore the original INDEX attributes.  */
1028                 swap_index_attributes(info->images[0], info->images[image - 1]);
1029         }
1030
1031         /* Restore the original TOTALBYTES element if needed.  */
1032         if (orig_totalbytes_element)
1033                 xml_replace_child(info->root, orig_totalbytes_element);
1034 }
1035
1036 /*
1037  * Writes the XML data to a WIM file.
1038  *
1039  * 'image' specifies the image(s) to include in the XML data.  Normally it is
1040  * WIMLIB_ALL_IMAGES, but it can also be a 1-based image index.
1041  *
1042  * 'total_bytes' is the number to use in the top-level TOTALBYTES element, or
1043  * WIM_TOTALBYTES_USE_EXISTING to use the existing value from the XML document
1044  * (if any), or WIM_TOTALBYTES_OMIT to omit the TOTALBYTES element entirely.
1045  */
1046 int
1047 write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
1048                    struct wim_reshdr *out_reshdr, int write_resource_flags)
1049 {
1050         struct wim_xml_info *info = wim->xml_info;
1051         int ret;
1052         struct xml_node *orig_totalbytes_element;
1053         struct xml_out_buf buf = {};
1054         const utf16lechar *raw_doc;
1055         size_t raw_doc_size;
1056
1057         /* Make any needed temporary changes to the document.  */
1058         ret = prepare_document_for_write(info, image, total_bytes,
1059                                          &orig_totalbytes_element);
1060         if (ret)
1061                 goto out;
1062
1063         ret = xml_write_document(info->root, &buf);
1064         if (ret)
1065                 goto out_restore_document;
1066
1067         ret = tstr_get_utf16le_and_len(buf.buf, &raw_doc, &raw_doc_size);
1068         if (ret)
1069                 goto out_restore_document;
1070
1071         /* Write the XML data uncompressed.  Although wimlib can handle
1072          * compressed XML data, some other WIM software cannot.  */
1073         ret = write_wim_resource_from_buffer(raw_doc, raw_doc_size,
1074                                              true,
1075                                              &wim->out_fd,
1076                                              WIMLIB_COMPRESSION_TYPE_NONE,
1077                                              0,
1078                                              out_reshdr,
1079                                              NULL,
1080                                              write_resource_flags);
1081         tstr_put_utf16le(raw_doc);
1082 out_restore_document:
1083         /* Revert any temporary changes we made to the document.  */
1084         restore_document_after_write(info, image, orig_totalbytes_element);
1085         FREE(buf.buf);
1086 out:
1087         return ret;
1088 }
1089
1090 /*----------------------------------------------------------------------------*
1091  *                           Library API functions                            *
1092  *----------------------------------------------------------------------------*/
1093
1094 WIMLIBAPI int
1095 wimlib_get_xml_data(WIMStruct *wim, void **buf_ret, size_t *bufsize_ret)
1096 {
1097         const struct wim_reshdr *xml_reshdr;
1098
1099         if (wim->filename == NULL && filedes_is_seekable(&wim->in_fd))
1100                 return WIMLIB_ERR_NO_FILENAME;
1101
1102         if (buf_ret == NULL || bufsize_ret == NULL)
1103                 return WIMLIB_ERR_INVALID_PARAM;
1104
1105         xml_reshdr = &wim->hdr.xml_data_reshdr;
1106
1107         *bufsize_ret = xml_reshdr->uncompressed_size;
1108         return wim_reshdr_to_data(xml_reshdr, wim, buf_ret);
1109 }
1110
1111 WIMLIBAPI int
1112 wimlib_extract_xml_data(WIMStruct *wim, FILE *fp)
1113 {
1114         int ret;
1115         void *buf;
1116         size_t bufsize;
1117
1118         ret = wimlib_get_xml_data(wim, &buf, &bufsize);
1119         if (ret)
1120                 return ret;
1121
1122         if (fwrite(buf, 1, bufsize, fp) != bufsize) {
1123                 ERROR_WITH_ERRNO("Failed to extract XML data");
1124                 ret = WIMLIB_ERR_WRITE;
1125         }
1126         FREE(buf);
1127         return ret;
1128 }
1129
1130 static bool
1131 image_name_in_use(const WIMStruct *wim, const tchar *name, int excluded_image)
1132 {
1133         const struct wim_xml_info *info = wim->xml_info;
1134         const tchar *existing_name;
1135
1136         /* Any number of images can have "no name".  */
1137         if (!name || !*name)
1138                 return false;
1139
1140         /* Check for images that have the specified name.  */
1141         for (int i = 0; i < info->image_count; i++) {
1142                 if (i + 1 == excluded_image)
1143                         continue;
1144                 existing_name = xml_get_text_by_path(info->images[i],
1145                                                      T("NAME"));
1146                 if (existing_name && !tstrcmp(existing_name, name))
1147                         return true;
1148         }
1149         return false;
1150 }
1151
1152 WIMLIBAPI bool
1153 wimlib_image_name_in_use(const WIMStruct *wim, const tchar *name)
1154 {
1155         return image_name_in_use(wim, name, WIMLIB_NO_IMAGE);
1156 }
1157
1158 WIMLIBAPI const tchar *
1159 wimlib_get_image_name(const WIMStruct *wim, int image)
1160 {
1161         const struct wim_xml_info *info = wim->xml_info;
1162         const tchar *name;
1163
1164         if (image < 1 || image > info->image_count)
1165                 return NULL;
1166         name = wimlib_get_image_property(wim, image, T("NAME"));
1167         return name ? name : T("");
1168 }
1169
1170 WIMLIBAPI const tchar *
1171 wimlib_get_image_description(const WIMStruct *wim, int image)
1172 {
1173         return wimlib_get_image_property(wim, image, T("DESCRIPTION"));
1174 }
1175
1176 WIMLIBAPI const tchar *
1177 wimlib_get_image_property(const WIMStruct *wim, int image,
1178                           const tchar *property_name)
1179 {
1180         const struct wim_xml_info *info = wim->xml_info;
1181
1182         if (!property_name || !*property_name)
1183                 return NULL;
1184         if (image < 1 || image > info->image_count)
1185                 return NULL;
1186         return xml_get_text_by_path(info->images[image - 1], property_name);
1187 }
1188
1189 WIMLIBAPI int
1190 wimlib_set_image_name(WIMStruct *wim, int image, const tchar *name)
1191 {
1192         return wimlib_set_image_property(wim, image, T("NAME"), name);
1193 }
1194
1195 WIMLIBAPI int
1196 wimlib_set_image_descripton(WIMStruct *wim, int image, const tchar *description)
1197 {
1198         return wimlib_set_image_property(wim, image, T("DESCRIPTION"),
1199                                          description);
1200 }
1201
1202 WIMLIBAPI int
1203 wimlib_set_image_flags(WIMStruct *wim, int image, const tchar *flags)
1204 {
1205         return wimlib_set_image_property(wim, image, T("FLAGS"), flags);
1206 }
1207
1208 WIMLIBAPI int
1209 wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
1210                           const tchar *property_value)
1211 {
1212         struct wim_xml_info *info = wim->xml_info;
1213
1214         if (!property_name || !*property_name)
1215                 return WIMLIB_ERR_INVALID_PARAM;
1216
1217         if (!xml_legal_name(property_name)) {
1218                 ERROR("Property name '%"TS"' is illegal in XML", property_name);
1219                 return WIMLIB_ERR_INVALID_PARAM;
1220         }
1221
1222         if (property_value && !xml_legal_value(property_value)) {
1223                 WARNING("Value of property '%"TS"' contains illegal characters",
1224                         property_name);
1225                 return WIMLIB_ERR_INVALID_PARAM;
1226         }
1227
1228         if (image < 1 || image > info->image_count)
1229                 return WIMLIB_ERR_INVALID_IMAGE;
1230
1231         if (!tstrcmp(property_name, T("NAME")) &&
1232             image_name_in_use(wim, property_value, image))
1233                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1234
1235         return xml_set_text_by_path(info->images[image - 1], property_name,
1236                                     property_value);
1237 }