2 * xml.c - deals with the XML information in WIM files
6 * Copyright 2012-2023 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
29 #include "wimlib/blob_table.h"
30 #include "wimlib/dentry.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/metadata.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/timestamp.h"
37 #include "wimlib/xml.h"
38 #include "wimlib/xmlproc.h"
39 #include "wimlib/write.h"
42 * A wrapper around a WIM file's XML document. The XML document contains
43 * metadata about each image in the WIM file as well as metadata about the WIM
48 /* The XML document in tree form */
49 struct xml_node *root;
51 /* A malloc()ed array containing a pointer to the IMAGE element for each
52 * WIM image. The image with 1-based index 'i' is at index 'i - 1' in
53 * this array. Note: these pointers are cached values, since they could
54 * also be found by searching the document. */
55 struct xml_node **images;
57 /* The number of WIM images (the length of 'images') */
62 parse_number(const tchar *str, int base)
69 v = tstrtoull(str, &end, base);
70 if (end == str || *end || v >= UINT64_MAX)
76 * Retrieve an unsigned integer from the contents of the specified element,
77 * decoding it using the specified base. If the element has no contents or does
78 * not contain a valid number, returns 0.
81 xml_element_get_number(const struct xml_node *element, int base)
83 return parse_number(xml_element_get_text(element), base);
87 * Retrieve the timestamp from a time element. This element should have child
88 * elements HIGHPART and LOWPART; these elements will be used to construct a
89 * Windows-style timestamp.
92 xml_element_get_timestamp(const struct xml_node *element)
95 const struct xml_node *child;
97 xml_node_for_each_child(element, child) {
98 if (xml_node_is_element(child, T("HIGHPART")))
99 timestamp |= xml_element_get_number(child, 16) << 32;
100 else if (xml_node_is_element(child, T("LOWPART")))
101 timestamp |= xml_element_get_number(child, 16);
106 /* Create a new timestamp element and optionally link it into a tree. */
107 static struct xml_node *
108 xml_new_element_with_timestamp(struct xml_node *parent, const tchar *name,
111 struct xml_node *element;
114 element = xml_new_element(NULL, name);
118 tsprintf(buf, T("0x%08"PRIX32), (u32)(timestamp >> 32));
119 if (!xml_new_element_with_text(element, T("HIGHPART"), buf))
122 tsprintf(buf, T("0x%08"PRIX32), (u32)timestamp);
123 if (!xml_new_element_with_text(element, T("LOWPART"), buf))
127 xml_add_child(parent, element);
131 xml_free_node(element);
135 /* Create a new number element and optionally link it into a tree. */
136 static struct xml_node *
137 xml_new_element_with_u64(struct xml_node *parent, const tchar *name, u64 value)
141 tsprintf(buf, T("%"PRIu64), value);
142 return xml_new_element_with_text(parent, name, buf);
146 parse_index(tchar **pp, u32 *index_ret)
151 *p++ = '\0'; /* overwrite '[' */
152 while (*p >= '0' && *p <= '9') {
153 u32 n = (index * 10) + (*p++ - '0');
163 if (*p != '/' && *p != '\0')
172 do_xml_path_walk(struct xml_node *element, const tchar *path, bool create,
173 struct xml_node **result_ret)
175 size_t n = tstrlen(path) + 1;
185 /* Copy the path to a temporary buffer. */
186 tmemcpy(buf, path, n);
195 struct xml_node *child;
198 /* We have another path component. */
200 /* Parse the element name. */
202 while (*p != '/' && *p != '\0' && *p != '[')
204 if (p == name) /* empty name? */
207 /* Handle a bracketed index, if one was specified. */
208 if (*p == '[' && !parse_index(&p, &index))
214 /* Look for a matching child. */
215 xml_node_for_each_child(element, child)
216 if (xml_node_is_element(child, name) && !--index)
219 /* No child matched the path. If create=false, the lookup
220 * failed. If create=true, create the needed element. */
224 /* We can't create an element at index 'n' if indices 1...n-1
225 * didn't already exist. */
227 return WIMLIB_ERR_INVALID_PARAM;
229 child = xml_new_element(element, name);
231 return WIMLIB_ERR_NOMEM;
233 /* Continue to the next path component, if there is one. */
238 *result_ret = element;
242 ERROR("The XML path \"%"TS"\" has invalid syntax.", path);
243 return WIMLIB_ERR_INVALID_PARAM;
246 /* Retrieve the XML element, if any, at the specified 'path'. This supports a
247 * simple filesystem-like syntax. If the element was found, returns a pointer
248 * to it; otherwise returns NULL. */
249 static struct xml_node *
250 xml_get_element_by_path(struct xml_node *root, const tchar *path)
252 struct xml_node *element;
254 do_xml_path_walk(root, path, false, &element);
259 * Similar to xml_get_element_by_path(), but creates the element and any
260 * requisite ancestor elements as needed. If successful, 0 is returned and
261 * *element_ret is set to a pointer to the resulting element. If unsuccessful,
262 * an error code is returned and *element_ret is set to NULL.
265 xml_ensure_element_by_path(struct xml_node *root, const tchar *path,
266 struct xml_node **element_ret)
268 return do_xml_path_walk(root, path, true, element_ret);
272 xml_get_number_by_path(struct xml_node *root, const tchar *path)
274 return xml_element_get_number(xml_get_element_by_path(root, path), 10);
278 xml_get_timestamp_by_path(struct xml_node *root, const tchar *path)
280 return xml_element_get_timestamp(xml_get_element_by_path(root, path));
284 xml_get_text_by_path(struct xml_node *root, const tchar *path)
286 return xml_element_get_text(xml_get_element_by_path(root, path));
290 * Create/replace (if text is not NULL and not empty) or remove (if text is NULL
291 * or empty) an element containing text.
294 xml_set_text_by_path(struct xml_node *root, const tchar *path, const tchar *text)
297 struct xml_node *element;
300 /* Create or replace */
301 ret = xml_ensure_element_by_path(root, path, &element);
304 return xml_element_set_text(element, text);
307 element = xml_get_element_by_path(root, path);
309 xml_free_node(element);
314 /* Unlink and return the node which represents the INDEX attribute of the
315 * specified IMAGE element. */
316 static struct xml_node *
317 unlink_index_attribute(struct xml_node *image_node)
319 struct xml_node *attr = xml_get_attrib(image_node, T("INDEX"));
321 xml_unlink_node(attr);
325 /* Compute the total uncompressed size of the streams of the specified inode. */
327 inode_sum_stream_sizes(const struct wim_inode *inode,
328 const struct blob_table *blob_table)
332 for (unsigned i = 0; i < inode->i_num_streams; i++) {
333 const struct blob_descriptor *blob;
335 blob = stream_blob(&inode->i_streams[i], blob_table);
337 total_size += blob->size;
343 append_image_node(struct wim_xml_info *info, struct xml_node *image_node)
346 struct xml_node **images;
349 /* Limit exceeded? */
350 if (unlikely(info->image_count >= MAX_IMAGES))
351 return WIMLIB_ERR_IMAGE_COUNT;
353 /* Set the INDEX attribute. */
354 tsprintf(buf, T("%d"), info->image_count + 1);
355 ret = xml_set_attrib(image_node, T("INDEX"), buf);
359 /* Append the IMAGE element to the 'images' array. */
360 images = REALLOC(info->images,
361 (info->image_count + 1) * sizeof(info->images[0]));
362 if (unlikely(!images))
363 return WIMLIB_ERR_NOMEM;
364 info->images = images;
365 images[info->image_count++] = image_node;
367 /* Add the IMAGE element to the document. */
368 xml_add_child(info->root, image_node);
372 /*----------------------------------------------------------------------------*
373 * Functions for internal library use *
374 *----------------------------------------------------------------------------*/
376 /* Allocate an empty 'struct wim_xml_info', containing no images. */
377 struct wim_xml_info *
378 xml_new_info_struct(void)
380 struct wim_xml_info *info = CALLOC(1, sizeof(*info));
385 info->root = xml_new_element(NULL, T("WIM"));
393 /* Free a 'struct wim_xml_info'. */
395 xml_free_info_struct(struct wim_xml_info *info)
398 xml_free_node(info->root);
404 /* Retrieve the number of images for which there exist IMAGE elements in the XML
407 xml_get_image_count(const struct wim_xml_info *info)
409 return info->image_count;
412 /* Retrieve the TOTALBYTES value for the WIM file, or 0 if this value is
415 xml_get_total_bytes(const struct wim_xml_info *info)
417 return xml_get_number_by_path(info->root, T("TOTALBYTES"));
420 /* Retrieve the TOTALBYTES value for the specified image, or 0 if this value is
423 xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
425 return xml_get_number_by_path(info->images[image - 1], T("TOTALBYTES"));
428 /* Retrieve the HARDLINKBYTES value for the specified image, or 0 if this value
431 xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
433 return xml_get_number_by_path(info->images[image - 1],
437 /* Retrieve the WIMBOOT value for the specified image, or false if this value is
440 xml_get_wimboot(const struct wim_xml_info *info, int image)
442 return xml_get_number_by_path(info->images[image - 1], T("WIMBOOT"));
445 /* Retrieve the Windows build number for the specified image, or 0 if this
446 * information is not available. */
448 xml_get_windows_build_number(const struct wim_xml_info *info, int image)
450 return xml_get_number_by_path(info->images[image - 1],
451 T("WINDOWS/VERSION/BUILD"));
454 /* Set the WIMBOOT value for the specified image. */
456 xml_set_wimboot(struct wim_xml_info *info, int image)
458 return xml_set_text_by_path(info->images[image - 1],
459 T("WIMBOOT"), T("1"));
463 * Update the DIRCOUNT, FILECOUNT, TOTALBYTES, HARDLINKBYTES, and
464 * LASTMODIFICATIONTIME elements for the specified WIM image.
466 * Note: since these stats are likely to be used for display purposes only, we
467 * no longer attempt to duplicate WIMGAPI's weird bugs when calculating them.
470 xml_update_image_info(WIMStruct *wim, int image)
472 const struct wim_image_metadata *imd = wim->image_metadata[image - 1];
473 struct xml_node *image_node = wim->xml_info->images[image - 1];
474 const struct wim_inode *inode;
478 u64 hard_link_bytes = 0;
480 struct xml_node *dircount_node;
481 struct xml_node *filecount_node;
482 struct xml_node *totalbytes_node;
483 struct xml_node *hardlinkbytes_node;
484 struct xml_node *lastmodificationtime_node;
486 image_for_each_inode(inode, imd) {
487 if (inode_is_directory(inode))
488 dir_count += inode->i_nlink;
490 file_count += inode->i_nlink;
491 size = inode_sum_stream_sizes(inode, wim->blob_table);
492 total_bytes += size * inode->i_nlink;
493 hard_link_bytes += size * (inode->i_nlink - 1);
496 dircount_node = xml_new_element_with_u64(NULL, T("DIRCOUNT"),
498 filecount_node = xml_new_element_with_u64(NULL, T("FILECOUNT"),
500 totalbytes_node = xml_new_element_with_u64(NULL, T("TOTALBYTES"),
502 hardlinkbytes_node = xml_new_element_with_u64(NULL, T("HARDLINKBYTES"),
504 lastmodificationtime_node = xml_new_element_with_timestamp(NULL,
505 T("LASTMODIFICATIONTIME"), now_as_wim_timestamp());
507 if (unlikely(!dircount_node || !filecount_node || !totalbytes_node ||
508 !hardlinkbytes_node || !lastmodificationtime_node)) {
509 xml_free_node(dircount_node);
510 xml_free_node(filecount_node);
511 xml_free_node(totalbytes_node);
512 xml_free_node(hardlinkbytes_node);
513 xml_free_node(lastmodificationtime_node);
514 return WIMLIB_ERR_NOMEM;
517 xml_replace_child(image_node, dircount_node);
518 xml_replace_child(image_node, filecount_node);
519 xml_replace_child(image_node, totalbytes_node);
520 xml_replace_child(image_node, hardlinkbytes_node);
521 xml_replace_child(image_node, lastmodificationtime_node);
525 /* Add an image to the XML information. */
527 xml_add_image(struct wim_xml_info *info, const tchar *name)
529 const u64 now = now_as_wim_timestamp();
530 struct xml_node *image_node;
533 if (name && !xml_legal_value(name)) {
534 ERROR("Name of new image contains illegal characters");
535 return WIMLIB_ERR_INVALID_PARAM;
538 ret = WIMLIB_ERR_NOMEM;
539 image_node = xml_new_element(NULL, T("IMAGE"));
543 !xml_new_element_with_text(image_node, T("NAME"), name))
545 if (!xml_new_element_with_u64(image_node, T("DIRCOUNT"), 0))
547 if (!xml_new_element_with_u64(image_node, T("FILECOUNT"), 0))
549 if (!xml_new_element_with_u64(image_node, T("TOTALBYTES"), 0))
551 if (!xml_new_element_with_u64(image_node, T("HARDLINKBYTES"), 0))
553 if (!xml_new_element_with_timestamp(image_node, T("CREATIONTIME"), now))
555 if (!xml_new_element_with_timestamp(image_node,
556 T("LASTMODIFICATIONTIME"), now))
558 ret = append_image_node(info, image_node);
564 xml_free_node(image_node);
569 * Make a copy of the XML information for the image with index @src_image in the
570 * @src_info XML document and append it to the @dest_info XML document.
572 * In the process, change the image's name and description to the values
573 * specified by @dest_image_name and @dest_image_description. Either or both
574 * may be NULL, which indicates that the corresponding element will not be
575 * included in the destination image.
578 xml_export_image(const struct wim_xml_info *src_info, int src_image,
579 struct wim_xml_info *dest_info, const tchar *dest_image_name,
580 const tchar *dest_image_description, bool wimboot)
582 struct xml_node *dest_node;
585 if (dest_image_name && !xml_legal_value(dest_image_name)) {
586 ERROR("Destination image name contains illegal characters");
587 return WIMLIB_ERR_INVALID_PARAM;
589 if (dest_image_description &&
590 !xml_legal_value(dest_image_description)) {
591 ERROR("Destination image description contains illegal characters");
592 return WIMLIB_ERR_INVALID_PARAM;
595 ret = WIMLIB_ERR_NOMEM;
596 dest_node = xml_clone_tree(src_info->images[src_image - 1]);
600 ret = xml_set_text_by_path(dest_node, T("NAME"), dest_image_name);
604 ret = xml_set_text_by_path(dest_node, T("DESCRIPTION"),
605 dest_image_description);
610 ret = xml_set_text_by_path(dest_node, T("WIMBOOT"), T("1"));
615 ret = append_image_node(dest_info, dest_node);
621 xml_free_node(dest_node);
625 /* Remove the specified image from the XML document. */
627 xml_delete_image(struct wim_xml_info *info, int image)
629 struct xml_node *next_image;
630 struct xml_node *index_attr, *next_index_attr;
632 /* Free the IMAGE element for the deleted image. Then, shift all
633 * higher-indexed IMAGE elements down by 1, in the process re-assigning
634 * their INDEX attributes. */
636 next_image = info->images[image - 1];
637 next_index_attr = unlink_index_attribute(next_image);
638 xml_free_node(next_image);
640 while (image < info->image_count) {
641 index_attr = next_index_attr;
642 next_image = info->images[image];
643 next_index_attr = unlink_index_attribute(next_image);
644 xml_add_child(next_image, index_attr);
645 info->images[image - 1] = next_image;
649 xml_free_node(next_index_attr);
653 /* Architecture constants are from w64 mingw winnt.h */
654 #define PROCESSOR_ARCHITECTURE_INTEL 0
655 #define PROCESSOR_ARCHITECTURE_MIPS 1
656 #define PROCESSOR_ARCHITECTURE_ALPHA 2
657 #define PROCESSOR_ARCHITECTURE_PPC 3
658 #define PROCESSOR_ARCHITECTURE_SHX 4
659 #define PROCESSOR_ARCHITECTURE_ARM 5
660 #define PROCESSOR_ARCHITECTURE_IA64 6
661 #define PROCESSOR_ARCHITECTURE_ALPHA64 7
662 #define PROCESSOR_ARCHITECTURE_MSIL 8
663 #define PROCESSOR_ARCHITECTURE_AMD64 9
664 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64 10
665 #define PROCESSOR_ARCHITECTURE_ARM64 12
668 describe_arch(u64 arch)
670 static const tchar * const descriptions[] = {
671 [PROCESSOR_ARCHITECTURE_INTEL] = T("x86"),
672 [PROCESSOR_ARCHITECTURE_MIPS] = T("MIPS"),
673 [PROCESSOR_ARCHITECTURE_ARM] = T("ARM"),
674 [PROCESSOR_ARCHITECTURE_IA64] = T("ia64"),
675 [PROCESSOR_ARCHITECTURE_AMD64] = T("x86_64"),
676 [PROCESSOR_ARCHITECTURE_ARM64] = T("ARM64"),
679 if (arch < ARRAY_LEN(descriptions) && descriptions[arch] != NULL)
680 return descriptions[arch];
685 /* Print information from the WINDOWS element, if present. */
687 print_windows_info(struct xml_node *image_node)
689 struct xml_node *windows_node;
690 struct xml_node *langs_node;
691 struct xml_node *version_node;
694 windows_node = xml_get_element_by_path(image_node, T("WINDOWS"));
698 tprintf(T("Architecture: %"TS"\n"),
699 describe_arch(xml_get_number_by_path(windows_node, T("ARCH"))));
701 text = xml_get_text_by_path(windows_node, T("PRODUCTNAME"));
703 tprintf(T("Product Name: %"TS"\n"), text);
705 text = xml_get_text_by_path(windows_node, T("EDITIONID"));
707 tprintf(T("Edition ID: %"TS"\n"), text);
709 text = xml_get_text_by_path(windows_node, T("INSTALLATIONTYPE"));
711 tprintf(T("Installation Type: %"TS"\n"), text);
713 text = xml_get_text_by_path(windows_node, T("HAL"));
715 tprintf(T("HAL: %"TS"\n"), text);
717 text = xml_get_text_by_path(windows_node, T("PRODUCTTYPE"));
719 tprintf(T("Product Type: %"TS"\n"), text);
721 text = xml_get_text_by_path(windows_node, T("PRODUCTSUITE"));
723 tprintf(T("Product Suite: %"TS"\n"), text);
725 langs_node = xml_get_element_by_path(windows_node, T("LANGUAGES"));
727 struct xml_node *lang_node;
729 tprintf(T("Languages: "));
730 xml_node_for_each_child(langs_node, lang_node) {
731 if (!xml_node_is_element(lang_node, T("LANGUAGE")))
733 text = xml_element_get_text(lang_node);
736 tprintf(T("%"TS" "), text);
740 text = xml_get_text_by_path(langs_node, T("DEFAULT"));
742 tprintf(T("Default Language: %"TS"\n"), text);
745 text = xml_get_text_by_path(windows_node, T("SYSTEMROOT"));
747 tprintf(T("System Root: %"TS"\n"), text);
749 version_node = xml_get_element_by_path(windows_node, T("VERSION"));
751 tprintf(T("Major Version: %"PRIu64"\n"),
752 xml_get_number_by_path(version_node, T("MAJOR")));
753 tprintf(T("Minor Version: %"PRIu64"\n"),
754 xml_get_number_by_path(version_node, T("MINOR")));
755 tprintf(T("Build: %"PRIu64"\n"),
756 xml_get_number_by_path(version_node, T("BUILD")));
757 tprintf(T("Service Pack Build: %"PRIu64"\n"),
758 xml_get_number_by_path(version_node, T("SPBUILD")));
759 tprintf(T("Service Pack Level: %"PRIu64"\n"),
760 xml_get_number_by_path(version_node, T("SPLEVEL")));
764 /* Prints information about the specified image. */
766 xml_print_image_info(struct wim_xml_info *info, int image)
768 struct xml_node * const image_node = info->images[image - 1];
772 tprintf(T("Index: %d\n"), image);
774 /* Always print the Name and Description, even if the corresponding XML
775 * elements are not present. */
776 text = xml_get_text_by_path(image_node, T("NAME"));
777 tprintf(T("Name: %"TS"\n"), text ? text : T(""));
778 text = xml_get_text_by_path(image_node, T("DESCRIPTION"));
779 tprintf(T("Description: %"TS"\n"), text ? text : T(""));
781 text = xml_get_text_by_path(image_node, T("DISPLAYNAME"));
783 tprintf(T("Display Name: %"TS"\n"), text);
785 text = xml_get_text_by_path(image_node, T("DISPLAYDESCRIPTION"));
787 tprintf(T("Display Description: %"TS"\n"), text);
789 tprintf(T("Directory Count: %"PRIu64"\n"),
790 xml_get_number_by_path(image_node, T("DIRCOUNT")));
792 tprintf(T("File Count: %"PRIu64"\n"),
793 xml_get_number_by_path(image_node, T("FILECOUNT")));
795 tprintf(T("Total Bytes: %"PRIu64"\n"),
796 xml_get_number_by_path(image_node, T("TOTALBYTES")));
798 tprintf(T("Hard Link Bytes: %"PRIu64"\n"),
799 xml_get_number_by_path(image_node, T("HARDLINKBYTES")));
801 wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
803 timebuf, ARRAY_LEN(timebuf));
804 tprintf(T("Creation Time: %"TS"\n"), timebuf);
806 wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
807 T("LASTMODIFICATIONTIME")),
808 timebuf, ARRAY_LEN(timebuf));
809 tprintf(T("Last Modification Time: %"TS"\n"), timebuf);
811 print_windows_info(image_node);
813 text = xml_get_text_by_path(image_node, T("FLAGS"));
815 tprintf(T("Flags: %"TS"\n"), text);
817 tprintf(T("WIMBoot compatible: %"TS"\n"),
818 xml_get_number_by_path(image_node, T("WIMBOOT")) ?
824 /*----------------------------------------------------------------------------*
825 * Reading and writing the XML data *
826 *----------------------------------------------------------------------------*/
829 image_element_get_index(struct xml_node *element)
831 struct xml_node *attrib = xml_get_attrib(element, T("INDEX"));
835 return min(INT_MAX, parse_number(attrib->value, 10));
838 /* Prepare the 'images' array from the XML document tree. */
840 setup_images(struct wim_xml_info *info, struct xml_node *root)
842 struct xml_node *child;
847 xml_node_for_each_child(root, child) {
848 if (!xml_node_is_element(child, T("IMAGE")))
850 index = image_element_get_index(child);
851 if (unlikely(index < 1 || info->image_count >= MAX_IMAGES))
853 max_index = max(max_index, index);
856 if (unlikely(max_index != info->image_count))
858 ret = WIMLIB_ERR_NOMEM;
859 info->images = CALLOC(info->image_count, sizeof(info->images[0]));
860 if (unlikely(!info->images))
862 xml_node_for_each_child(root, child) {
863 if (!xml_node_is_element(child, T("IMAGE")))
865 index = image_element_get_index(child);
866 if (unlikely(info->images[index - 1]))
868 info->images[index - 1] = child;
873 ERROR("The WIM file's XML document does not contain exactly one IMAGE "
874 "element per image!");
875 ret = WIMLIB_ERR_XML;
882 parse_wim_xml_document(const utf16lechar *raw_doc, size_t raw_doc_size,
883 struct xml_node **root_ret)
888 ret = utf16le_to_tstr(raw_doc, raw_doc_size, &doc, NULL);
891 ret = xml_parse_document(doc, root_ret);
896 /* Reads the XML data from a WIM file. */
898 read_wim_xml_data(WIMStruct *wim)
900 struct wim_xml_info *info;
903 struct xml_node *root;
906 /* Allocate the 'struct wim_xml_info'. */
907 ret = WIMLIB_ERR_NOMEM;
908 info = CALLOC(1, sizeof(*info));
912 /* Read the raw UTF-16LE XML document. */
913 ret = wimlib_get_xml_data(wim, &raw_doc, &raw_doc_size);
917 /* Parse the document, creating the document tree. */
918 ret = parse_wim_xml_document(raw_doc, raw_doc_size, &info->root);
922 if (ret != WIMLIB_ERR_NOMEM)
923 ret = WIMLIB_ERR_XML;
924 ERROR("Unable to parse the WIM file's XML document!");
929 /* Verify the root element. */
930 if (!xml_node_is_element(root, T("WIM"))) {
931 ERROR("The WIM file's XML document has an unexpected format!");
932 ret = WIMLIB_ERR_XML;
936 /* Verify the WIM file is not encrypted. */
937 if (xml_get_element_by_path(root, T("ESD/ENCRYPTED"))) {
938 ret = WIMLIB_ERR_WIM_IS_ENCRYPTED;
942 /* Validate the image elements and set up the images[] array. */
943 ret = setup_images(info, root);
948 wim->xml_info = info;
952 xml_free_info_struct(info);
956 /* Swap the INDEX attributes of two IMAGE elements. */
958 swap_index_attributes(struct xml_node *image_element_1,
959 struct xml_node *image_element_2)
961 struct xml_node *attr_1, *attr_2;
963 if (image_element_1 != image_element_2) {
964 attr_1 = unlink_index_attribute(image_element_1);
965 attr_2 = unlink_index_attribute(image_element_2);
966 xml_add_child(image_element_1, attr_2);
967 xml_add_child(image_element_2, attr_1);
972 prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes,
973 struct xml_node **orig_totalbytes_element_ret)
975 struct xml_node *totalbytes_element = NULL;
977 /* Allocate the new TOTALBYTES element if needed. */
978 if (total_bytes != WIM_TOTALBYTES_USE_EXISTING &&
979 total_bytes != WIM_TOTALBYTES_OMIT) {
980 totalbytes_element = xml_new_element_with_u64(
981 NULL, T("TOTALBYTES"), total_bytes);
982 if (!totalbytes_element)
983 return WIMLIB_ERR_NOMEM;
986 /* Adjust the IMAGE elements if needed. */
987 if (image != WIMLIB_ALL_IMAGES) {
988 /* We're writing a single image only. Temporarily unlink all
989 * other IMAGE elements from the document. */
990 for (int i = 0; i < info->image_count; i++)
992 xml_unlink_node(info->images[i]);
994 /* Temporarily set the INDEX attribute of the needed IMAGE
996 swap_index_attributes(info->images[0], info->images[image - 1]);
999 /* Adjust (add, change, or remove) the TOTALBYTES element if needed. */
1000 *orig_totalbytes_element_ret = NULL;
1001 if (total_bytes != WIM_TOTALBYTES_USE_EXISTING) {
1002 /* Unlink the previous TOTALBYTES element, if any. */
1003 *orig_totalbytes_element_ret = xml_get_element_by_path(
1004 info->root, T("TOTALBYTES"));
1005 if (*orig_totalbytes_element_ret)
1006 xml_unlink_node(*orig_totalbytes_element_ret);
1008 /* Link in the new TOTALBYTES element, if any. */
1009 if (totalbytes_element)
1010 xml_add_child(info->root, totalbytes_element);
1016 restore_document_after_write(struct wim_xml_info *info, int image,
1017 struct xml_node *orig_totalbytes_element)
1019 /* Restore the IMAGE elements if needed. */
1020 if (image != WIMLIB_ALL_IMAGES) {
1021 /* We wrote a single image only. Re-link all other IMAGE
1022 * elements to the document. */
1023 for (int i = 0; i < info->image_count; i++)
1025 xml_add_child(info->root, info->images[i]);
1027 /* Restore the original INDEX attributes. */
1028 swap_index_attributes(info->images[0], info->images[image - 1]);
1031 /* Restore the original TOTALBYTES element if needed. */
1032 if (orig_totalbytes_element)
1033 xml_replace_child(info->root, orig_totalbytes_element);
1037 * Writes the XML data to a WIM file.
1039 * 'image' specifies the image(s) to include in the XML data. Normally it is
1040 * WIMLIB_ALL_IMAGES, but it can also be a 1-based image index.
1042 * 'total_bytes' is the number to use in the top-level TOTALBYTES element, or
1043 * WIM_TOTALBYTES_USE_EXISTING to use the existing value from the XML document
1044 * (if any), or WIM_TOTALBYTES_OMIT to omit the TOTALBYTES element entirely.
1047 write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
1048 struct wim_reshdr *out_reshdr, int write_resource_flags)
1050 struct wim_xml_info *info = wim->xml_info;
1052 struct xml_node *orig_totalbytes_element;
1053 struct xml_out_buf buf = {};
1054 const utf16lechar *raw_doc;
1055 size_t raw_doc_size;
1057 /* Make any needed temporary changes to the document. */
1058 ret = prepare_document_for_write(info, image, total_bytes,
1059 &orig_totalbytes_element);
1063 ret = xml_write_document(info->root, &buf);
1065 goto out_restore_document;
1067 ret = tstr_get_utf16le_and_len(buf.buf, &raw_doc, &raw_doc_size);
1069 goto out_restore_document;
1071 /* Write the XML data uncompressed. Although wimlib can handle
1072 * compressed XML data, some other WIM software cannot. */
1073 ret = write_wim_resource_from_buffer(raw_doc, raw_doc_size,
1076 WIMLIB_COMPRESSION_TYPE_NONE,
1080 write_resource_flags);
1081 tstr_put_utf16le(raw_doc);
1082 out_restore_document:
1083 /* Revert any temporary changes we made to the document. */
1084 restore_document_after_write(info, image, orig_totalbytes_element);
1090 /*----------------------------------------------------------------------------*
1091 * Library API functions *
1092 *----------------------------------------------------------------------------*/
1095 wimlib_get_xml_data(WIMStruct *wim, void **buf_ret, size_t *bufsize_ret)
1097 const struct wim_reshdr *xml_reshdr;
1099 if (wim->filename == NULL && filedes_is_seekable(&wim->in_fd))
1100 return WIMLIB_ERR_NO_FILENAME;
1102 if (buf_ret == NULL || bufsize_ret == NULL)
1103 return WIMLIB_ERR_INVALID_PARAM;
1105 xml_reshdr = &wim->hdr.xml_data_reshdr;
1107 *bufsize_ret = xml_reshdr->uncompressed_size;
1108 return wim_reshdr_to_data(xml_reshdr, wim, buf_ret);
1112 wimlib_extract_xml_data(WIMStruct *wim, FILE *fp)
1118 ret = wimlib_get_xml_data(wim, &buf, &bufsize);
1122 if (fwrite(buf, 1, bufsize, fp) != bufsize) {
1123 ERROR_WITH_ERRNO("Failed to extract XML data");
1124 ret = WIMLIB_ERR_WRITE;
1131 image_name_in_use(const WIMStruct *wim, const tchar *name, int excluded_image)
1133 const struct wim_xml_info *info = wim->xml_info;
1134 const tchar *existing_name;
1136 /* Any number of images can have "no name". */
1137 if (!name || !*name)
1140 /* Check for images that have the specified name. */
1141 for (int i = 0; i < info->image_count; i++) {
1142 if (i + 1 == excluded_image)
1144 existing_name = xml_get_text_by_path(info->images[i],
1146 if (existing_name && !tstrcmp(existing_name, name))
1153 wimlib_image_name_in_use(const WIMStruct *wim, const tchar *name)
1155 return image_name_in_use(wim, name, WIMLIB_NO_IMAGE);
1158 WIMLIBAPI const tchar *
1159 wimlib_get_image_name(const WIMStruct *wim, int image)
1161 const struct wim_xml_info *info = wim->xml_info;
1164 if (image < 1 || image > info->image_count)
1166 name = wimlib_get_image_property(wim, image, T("NAME"));
1167 return name ? name : T("");
1170 WIMLIBAPI const tchar *
1171 wimlib_get_image_description(const WIMStruct *wim, int image)
1173 return wimlib_get_image_property(wim, image, T("DESCRIPTION"));
1176 WIMLIBAPI const tchar *
1177 wimlib_get_image_property(const WIMStruct *wim, int image,
1178 const tchar *property_name)
1180 const struct wim_xml_info *info = wim->xml_info;
1182 if (!property_name || !*property_name)
1184 if (image < 1 || image > info->image_count)
1186 return xml_get_text_by_path(info->images[image - 1], property_name);
1190 wimlib_set_image_name(WIMStruct *wim, int image, const tchar *name)
1192 return wimlib_set_image_property(wim, image, T("NAME"), name);
1196 wimlib_set_image_descripton(WIMStruct *wim, int image, const tchar *description)
1198 return wimlib_set_image_property(wim, image, T("DESCRIPTION"),
1203 wimlib_set_image_flags(WIMStruct *wim, int image, const tchar *flags)
1205 return wimlib_set_image_property(wim, image, T("FLAGS"), flags);
1209 wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
1210 const tchar *property_value)
1212 struct wim_xml_info *info = wim->xml_info;
1214 if (!property_name || !*property_name)
1215 return WIMLIB_ERR_INVALID_PARAM;
1217 if (!xml_legal_name(property_name)) {
1218 ERROR("Property name '%"TS"' is illegal in XML", property_name);
1219 return WIMLIB_ERR_INVALID_PARAM;
1222 if (property_value && !xml_legal_value(property_value)) {
1223 WARNING("Value of property '%"TS"' contains illegal characters",
1225 return WIMLIB_ERR_INVALID_PARAM;
1228 if (image < 1 || image > info->image_count)
1229 return WIMLIB_ERR_INVALID_IMAGE;
1231 if (!tstrcmp(property_name, T("NAME")) &&
1232 image_name_in_use(wim, property_value, image))
1233 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1235 return xml_set_text_by_path(info->images[image - 1], property_name,