2 * xml.c - deals with the XML information in WIM files
6 * Copyright 2012-2023 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
29 #include "wimlib/blob_table.h"
30 #include "wimlib/dentry.h"
31 #include "wimlib/encoding.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/metadata.h"
35 #include "wimlib/resource.h"
36 #include "wimlib/timestamp.h"
37 #include "wimlib/xml.h"
38 #include "wimlib/xmlproc.h"
39 #include "wimlib/write.h"
42 * A wrapper around a WIM file's XML document. The XML document contains
43 * metadata about each image in the WIM file as well as metadata about the WIM
48 /* The XML document in tree form */
49 struct xml_node *root;
51 /* A malloc()ed array containing a pointer to the IMAGE element for each
52 * WIM image. The image with 1-based index 'i' is at index 'i - 1' in
53 * this array. Note: these pointers are cached values, since they could
54 * also be found by searching the document. */
55 struct xml_node **images;
57 /* The number of WIM images (the length of 'images') */
62 parse_number(const tchar *str, int base)
69 v = tstrtoull(str, &end, base);
70 if (end == str || *end || v >= UINT64_MAX)
76 * Retrieve an unsigned integer from the contents of the specified element,
77 * decoding it using the specified base. If the element has no contents or does
78 * not contain a valid number, returns 0.
81 xml_element_get_number(const struct xml_node *element, int base)
83 return parse_number(xml_element_get_text(element), base);
87 * Retrieve the timestamp from a time element. This element should have child
88 * elements HIGHPART and LOWPART; these elements will be used to construct a
89 * Windows-style timestamp.
92 xml_element_get_timestamp(const struct xml_node *element)
95 const struct xml_node *child;
97 xml_node_for_each_child(element, child) {
98 if (xml_node_is_element(child, T("HIGHPART")))
99 timestamp |= xml_element_get_number(child, 16) << 32;
100 else if (xml_node_is_element(child, T("LOWPART")))
101 timestamp |= xml_element_get_number(child, 16);
106 /* Create a new timestamp element and optionally link it into a tree. */
107 static struct xml_node *
108 xml_new_element_with_timestamp(struct xml_node *parent, const tchar *name,
111 struct xml_node *element;
114 element = xml_new_element(NULL, name);
118 tsprintf(buf, T("0x%08"PRIX32), (u32)(timestamp >> 32));
119 if (!xml_new_element_with_text(element, T("HIGHPART"), buf))
122 tsprintf(buf, T("0x%08"PRIX32), (u32)timestamp);
123 if (!xml_new_element_with_text(element, T("LOWPART"), buf))
127 xml_add_child(parent, element);
131 xml_free_node(element);
135 /* Create a new number element and optionally link it into a tree. */
136 static struct xml_node *
137 xml_new_element_with_u64(struct xml_node *parent, const tchar *name, u64 value)
141 tsprintf(buf, T("%"PRIu64), value);
142 return xml_new_element_with_text(parent, name, buf);
146 parse_index(tchar **pp, u32 *index_ret)
151 *p++ = '\0'; /* overwrite '[' */
152 while (*p >= '0' && *p <= '9') {
153 u32 n = (index * 10) + (*p++ - '0');
163 if (*p != '/' && *p != '\0')
172 do_xml_path_walk(struct xml_node *element, const tchar *path, bool create,
173 struct xml_node **result_ret)
175 size_t n = tstrlen(path) + 1;
185 /* Copy the path to a temporary buffer. */
186 tmemcpy(buf, path, n);
195 struct xml_node *child;
198 /* We have another path component. */
200 /* Parse the element name. */
202 while (*p != '/' && *p != '\0' && *p != '[')
204 if (p == name) /* empty name? */
207 /* Handle a bracketed index, if one was specified. */
208 if (*p == '[' && !parse_index(&p, &index))
214 /* Look for a matching child. */
215 xml_node_for_each_child(element, child)
216 if (xml_node_is_element(child, name) && !--index)
219 /* No child matched the path. If create=false, the lookup
220 * failed. If create=true, create the needed element. */
224 /* We can't create an element at index 'n' if indices 1...n-1
225 * didn't already exist. */
227 return WIMLIB_ERR_INVALID_PARAM;
229 child = xml_new_element(element, name);
231 return WIMLIB_ERR_NOMEM;
233 /* Continue to the next path component, if there is one. */
238 *result_ret = element;
242 ERROR("The XML path \"%"TS"\" has invalid syntax.", path);
243 return WIMLIB_ERR_INVALID_PARAM;
246 /* Retrieve the XML element, if any, at the specified 'path'. This supports a
247 * simple filesystem-like syntax. If the element was found, returns a pointer
248 * to it; otherwise returns NULL. */
249 static struct xml_node *
250 xml_get_element_by_path(struct xml_node *root, const tchar *path)
252 struct xml_node *element;
254 do_xml_path_walk(root, path, false, &element);
259 * Similar to xml_get_element_by_path(), but creates the element and any
260 * requisite ancestor elements as needed. If successful, 0 is returned and
261 * *element_ret is set to a pointer to the resulting element. If unsuccessful,
262 * an error code is returned and *element_ret is set to NULL.
265 xml_ensure_element_by_path(struct xml_node *root, const tchar *path,
266 struct xml_node **element_ret)
268 return do_xml_path_walk(root, path, true, element_ret);
272 xml_get_number_by_path(struct xml_node *root, const tchar *path)
274 return xml_element_get_number(xml_get_element_by_path(root, path), 10);
278 xml_get_timestamp_by_path(struct xml_node *root, const tchar *path)
280 return xml_element_get_timestamp(xml_get_element_by_path(root, path));
284 xml_get_text_by_path(struct xml_node *root, const tchar *path)
286 return xml_element_get_text(xml_get_element_by_path(root, path));
290 * Create/replace (if text is not NULL and not empty) or remove (if text is NULL
291 * or empty) an element containing text.
294 xml_set_text_by_path(struct xml_node *root, const tchar *path,
298 struct xml_node *element;
301 /* Create or replace */
302 ret = xml_ensure_element_by_path(root, path, &element);
305 return xml_element_set_text(element, text);
308 xml_free_node(xml_get_element_by_path(root, path));
313 /* Unlink and return the node which represents the INDEX attribute of the
314 * specified IMAGE element. */
315 static struct xml_node *
316 unlink_index_attribute(struct xml_node *image_node)
318 struct xml_node *attr = xml_get_attrib(image_node, T("INDEX"));
320 xml_unlink_node(attr);
324 /* Compute the total uncompressed size of the streams of the specified inode. */
326 inode_sum_stream_sizes(const struct wim_inode *inode,
327 const struct blob_table *blob_table)
331 for (unsigned i = 0; i < inode->i_num_streams; i++) {
332 const struct blob_descriptor *blob;
334 blob = stream_blob(&inode->i_streams[i], blob_table);
336 total_size += blob->size;
342 append_image_node(struct wim_xml_info *info, struct xml_node *image_node)
345 struct xml_node **images;
348 /* Limit exceeded? */
349 if (unlikely(info->image_count >= MAX_IMAGES))
350 return WIMLIB_ERR_IMAGE_COUNT;
352 /* Set the INDEX attribute. */
353 tsprintf(buf, T("%d"), info->image_count + 1);
354 ret = xml_set_attrib(image_node, T("INDEX"), buf);
358 /* Append the IMAGE element to the 'images' array. */
359 images = REALLOC(info->images,
360 (info->image_count + 1) * sizeof(info->images[0]));
361 if (unlikely(!images))
362 return WIMLIB_ERR_NOMEM;
363 info->images = images;
364 images[info->image_count++] = image_node;
366 /* Add the IMAGE element to the document. */
367 xml_add_child(info->root, image_node);
371 /*----------------------------------------------------------------------------*
372 * Functions for internal library use *
373 *----------------------------------------------------------------------------*/
375 /* Allocate an empty 'struct wim_xml_info', containing no images. */
376 struct wim_xml_info *
377 xml_new_info_struct(void)
379 struct wim_xml_info *info = CALLOC(1, sizeof(*info));
384 info->root = xml_new_element(NULL, T("WIM"));
392 /* Free a 'struct wim_xml_info'. */
394 xml_free_info_struct(struct wim_xml_info *info)
397 xml_free_node(info->root);
403 /* Retrieve the number of images for which there exist IMAGE elements in the XML
406 xml_get_image_count(const struct wim_xml_info *info)
408 return info->image_count;
411 /* Retrieve the TOTALBYTES value for the WIM file, or 0 if this value is
414 xml_get_total_bytes(const struct wim_xml_info *info)
416 return xml_get_number_by_path(info->root, T("TOTALBYTES"));
419 /* Retrieve the TOTALBYTES value for the specified image, or 0 if this value is
422 xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
424 return xml_get_number_by_path(info->images[image - 1], T("TOTALBYTES"));
427 /* Retrieve the HARDLINKBYTES value for the specified image, or 0 if this value
430 xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
432 return xml_get_number_by_path(info->images[image - 1],
436 /* Retrieve the WIMBOOT value for the specified image, or false if this value is
439 xml_get_wimboot(const struct wim_xml_info *info, int image)
441 return xml_get_number_by_path(info->images[image - 1], T("WIMBOOT"));
444 /* Retrieve the Windows build number for the specified image, or 0 if this
445 * information is not available. */
447 xml_get_windows_build_number(const struct wim_xml_info *info, int image)
449 return xml_get_number_by_path(info->images[image - 1],
450 T("WINDOWS/VERSION/BUILD"));
453 /* Set the WIMBOOT value for the specified image. */
455 xml_set_wimboot(struct wim_xml_info *info, int image)
457 return xml_set_text_by_path(info->images[image - 1],
458 T("WIMBOOT"), T("1"));
462 * Update the DIRCOUNT, FILECOUNT, TOTALBYTES, HARDLINKBYTES, and
463 * LASTMODIFICATIONTIME elements for the specified WIM image.
465 * Note: since these stats are likely to be used for display purposes only, we
466 * no longer attempt to duplicate WIMGAPI's weird bugs when calculating them.
469 xml_update_image_info(WIMStruct *wim, int image)
471 const struct wim_image_metadata *imd = wim->image_metadata[image - 1];
472 struct xml_node *image_node = wim->xml_info->images[image - 1];
473 const struct wim_inode *inode;
477 u64 hard_link_bytes = 0;
479 struct xml_node *dircount_node;
480 struct xml_node *filecount_node;
481 struct xml_node *totalbytes_node;
482 struct xml_node *hardlinkbytes_node;
483 struct xml_node *lastmodificationtime_node;
485 image_for_each_inode(inode, imd) {
486 if (inode_is_directory(inode))
487 dir_count += inode->i_nlink;
489 file_count += inode->i_nlink;
490 size = inode_sum_stream_sizes(inode, wim->blob_table);
491 total_bytes += size * inode->i_nlink;
492 hard_link_bytes += size * (inode->i_nlink - 1);
495 dircount_node = xml_new_element_with_u64(NULL, T("DIRCOUNT"),
497 filecount_node = xml_new_element_with_u64(NULL, T("FILECOUNT"),
499 totalbytes_node = xml_new_element_with_u64(NULL, T("TOTALBYTES"),
501 hardlinkbytes_node = xml_new_element_with_u64(NULL, T("HARDLINKBYTES"),
503 lastmodificationtime_node = xml_new_element_with_timestamp(NULL,
504 T("LASTMODIFICATIONTIME"), now_as_wim_timestamp());
506 if (unlikely(!dircount_node || !filecount_node || !totalbytes_node ||
507 !hardlinkbytes_node || !lastmodificationtime_node)) {
508 xml_free_node(dircount_node);
509 xml_free_node(filecount_node);
510 xml_free_node(totalbytes_node);
511 xml_free_node(hardlinkbytes_node);
512 xml_free_node(lastmodificationtime_node);
513 return WIMLIB_ERR_NOMEM;
516 xml_replace_child(image_node, dircount_node);
517 xml_replace_child(image_node, filecount_node);
518 xml_replace_child(image_node, totalbytes_node);
519 xml_replace_child(image_node, hardlinkbytes_node);
520 xml_replace_child(image_node, lastmodificationtime_node);
524 /* Add an image to the XML information. */
526 xml_add_image(struct wim_xml_info *info, const tchar *name)
528 const u64 now = now_as_wim_timestamp();
529 struct xml_node *image_node;
532 if (name && !xml_legal_value(name)) {
533 ERROR("Name of new image contains illegal characters");
534 return WIMLIB_ERR_INVALID_PARAM;
537 ret = WIMLIB_ERR_NOMEM;
538 image_node = xml_new_element(NULL, T("IMAGE"));
542 !xml_new_element_with_text(image_node, T("NAME"), name))
544 if (!xml_new_element_with_u64(image_node, T("DIRCOUNT"), 0))
546 if (!xml_new_element_with_u64(image_node, T("FILECOUNT"), 0))
548 if (!xml_new_element_with_u64(image_node, T("TOTALBYTES"), 0))
550 if (!xml_new_element_with_u64(image_node, T("HARDLINKBYTES"), 0))
552 if (!xml_new_element_with_timestamp(image_node, T("CREATIONTIME"), now))
554 if (!xml_new_element_with_timestamp(image_node,
555 T("LASTMODIFICATIONTIME"), now))
557 ret = append_image_node(info, image_node);
563 xml_free_node(image_node);
568 * Make a copy of the XML information for the image with index @src_image in the
569 * @src_info XML document and append it to the @dest_info XML document.
571 * In the process, change the image's name and description to the values
572 * specified by @dest_image_name and @dest_image_description. Either or both
573 * may be NULL, which indicates that the corresponding element will not be
574 * included in the destination image.
577 xml_export_image(const struct wim_xml_info *src_info, int src_image,
578 struct wim_xml_info *dest_info, const tchar *dest_image_name,
579 const tchar *dest_image_description, bool wimboot)
581 struct xml_node *dest_node;
584 if (dest_image_name && !xml_legal_value(dest_image_name)) {
585 ERROR("Destination image name contains illegal characters");
586 return WIMLIB_ERR_INVALID_PARAM;
588 if (dest_image_description &&
589 !xml_legal_value(dest_image_description)) {
590 ERROR("Destination image description contains illegal characters");
591 return WIMLIB_ERR_INVALID_PARAM;
594 ret = WIMLIB_ERR_NOMEM;
595 dest_node = xml_clone_tree(src_info->images[src_image - 1]);
599 ret = xml_set_text_by_path(dest_node, T("NAME"), dest_image_name);
603 ret = xml_set_text_by_path(dest_node, T("DESCRIPTION"),
604 dest_image_description);
609 ret = xml_set_text_by_path(dest_node, T("WIMBOOT"), T("1"));
614 ret = append_image_node(dest_info, dest_node);
620 xml_free_node(dest_node);
624 /* Remove the specified image from the XML document. */
626 xml_delete_image(struct wim_xml_info *info, int image)
628 struct xml_node *next_image;
629 struct xml_node *index_attr, *next_index_attr;
631 /* Free the IMAGE element for the deleted image. Then, shift all
632 * higher-indexed IMAGE elements down by 1, in the process re-assigning
633 * their INDEX attributes. */
635 next_image = info->images[image - 1];
636 next_index_attr = unlink_index_attribute(next_image);
637 xml_free_node(next_image);
639 while (image < info->image_count) {
640 index_attr = next_index_attr;
641 next_image = info->images[image];
642 next_index_attr = unlink_index_attribute(next_image);
643 xml_add_child(next_image, index_attr);
644 info->images[image - 1] = next_image;
648 xml_free_node(next_index_attr);
652 /* Architecture constants are from w64 mingw winnt.h */
653 #define PROCESSOR_ARCHITECTURE_INTEL 0
654 #define PROCESSOR_ARCHITECTURE_MIPS 1
655 #define PROCESSOR_ARCHITECTURE_ALPHA 2
656 #define PROCESSOR_ARCHITECTURE_PPC 3
657 #define PROCESSOR_ARCHITECTURE_SHX 4
658 #define PROCESSOR_ARCHITECTURE_ARM 5
659 #define PROCESSOR_ARCHITECTURE_IA64 6
660 #define PROCESSOR_ARCHITECTURE_ALPHA64 7
661 #define PROCESSOR_ARCHITECTURE_MSIL 8
662 #define PROCESSOR_ARCHITECTURE_AMD64 9
663 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64 10
664 #define PROCESSOR_ARCHITECTURE_ARM64 12
667 describe_arch(u64 arch)
669 static const tchar * const descriptions[] = {
670 [PROCESSOR_ARCHITECTURE_INTEL] = T("x86"),
671 [PROCESSOR_ARCHITECTURE_MIPS] = T("MIPS"),
672 [PROCESSOR_ARCHITECTURE_ARM] = T("ARM"),
673 [PROCESSOR_ARCHITECTURE_IA64] = T("ia64"),
674 [PROCESSOR_ARCHITECTURE_AMD64] = T("x86_64"),
675 [PROCESSOR_ARCHITECTURE_ARM64] = T("ARM64"),
678 if (arch < ARRAY_LEN(descriptions) && descriptions[arch] != NULL)
679 return descriptions[arch];
684 /* Print information from the WINDOWS element, if present. */
686 print_windows_info(struct xml_node *image_node)
688 struct xml_node *windows_node;
689 struct xml_node *langs_node;
690 struct xml_node *version_node;
693 windows_node = xml_get_element_by_path(image_node, T("WINDOWS"));
697 tprintf(T("Architecture: %"TS"\n"),
698 describe_arch(xml_get_number_by_path(windows_node, T("ARCH"))));
700 text = xml_get_text_by_path(windows_node, T("PRODUCTNAME"));
702 tprintf(T("Product Name: %"TS"\n"), text);
704 text = xml_get_text_by_path(windows_node, T("EDITIONID"));
706 tprintf(T("Edition ID: %"TS"\n"), text);
708 text = xml_get_text_by_path(windows_node, T("INSTALLATIONTYPE"));
710 tprintf(T("Installation Type: %"TS"\n"), text);
712 text = xml_get_text_by_path(windows_node, T("HAL"));
714 tprintf(T("HAL: %"TS"\n"), text);
716 text = xml_get_text_by_path(windows_node, T("PRODUCTTYPE"));
718 tprintf(T("Product Type: %"TS"\n"), text);
720 text = xml_get_text_by_path(windows_node, T("PRODUCTSUITE"));
722 tprintf(T("Product Suite: %"TS"\n"), text);
724 langs_node = xml_get_element_by_path(windows_node, T("LANGUAGES"));
726 struct xml_node *lang_node;
728 tprintf(T("Languages: "));
729 xml_node_for_each_child(langs_node, lang_node) {
730 if (!xml_node_is_element(lang_node, T("LANGUAGE")))
732 text = xml_element_get_text(lang_node);
735 tprintf(T("%"TS" "), text);
739 text = xml_get_text_by_path(langs_node, T("DEFAULT"));
741 tprintf(T("Default Language: %"TS"\n"), text);
744 text = xml_get_text_by_path(windows_node, T("SYSTEMROOT"));
746 tprintf(T("System Root: %"TS"\n"), text);
748 version_node = xml_get_element_by_path(windows_node, T("VERSION"));
750 tprintf(T("Major Version: %"PRIu64"\n"),
751 xml_get_number_by_path(version_node, T("MAJOR")));
752 tprintf(T("Minor Version: %"PRIu64"\n"),
753 xml_get_number_by_path(version_node, T("MINOR")));
754 tprintf(T("Build: %"PRIu64"\n"),
755 xml_get_number_by_path(version_node, T("BUILD")));
756 tprintf(T("Service Pack Build: %"PRIu64"\n"),
757 xml_get_number_by_path(version_node, T("SPBUILD")));
758 tprintf(T("Service Pack Level: %"PRIu64"\n"),
759 xml_get_number_by_path(version_node, T("SPLEVEL")));
763 /* Prints information about the specified image. */
765 xml_print_image_info(struct wim_xml_info *info, int image)
767 struct xml_node * const image_node = info->images[image - 1];
771 tprintf(T("Index: %d\n"), image);
773 /* Always print the Name and Description, even if the corresponding XML
774 * elements are not present. */
775 text = xml_get_text_by_path(image_node, T("NAME"));
776 tprintf(T("Name: %"TS"\n"), text ? text : T(""));
777 text = xml_get_text_by_path(image_node, T("DESCRIPTION"));
778 tprintf(T("Description: %"TS"\n"), text ? text : T(""));
780 text = xml_get_text_by_path(image_node, T("DISPLAYNAME"));
782 tprintf(T("Display Name: %"TS"\n"), text);
784 text = xml_get_text_by_path(image_node, T("DISPLAYDESCRIPTION"));
786 tprintf(T("Display Description: %"TS"\n"), text);
788 tprintf(T("Directory Count: %"PRIu64"\n"),
789 xml_get_number_by_path(image_node, T("DIRCOUNT")));
791 tprintf(T("File Count: %"PRIu64"\n"),
792 xml_get_number_by_path(image_node, T("FILECOUNT")));
794 tprintf(T("Total Bytes: %"PRIu64"\n"),
795 xml_get_number_by_path(image_node, T("TOTALBYTES")));
797 tprintf(T("Hard Link Bytes: %"PRIu64"\n"),
798 xml_get_number_by_path(image_node, T("HARDLINKBYTES")));
800 wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
802 timebuf, ARRAY_LEN(timebuf));
803 tprintf(T("Creation Time: %"TS"\n"), timebuf);
805 wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
806 T("LASTMODIFICATIONTIME")),
807 timebuf, ARRAY_LEN(timebuf));
808 tprintf(T("Last Modification Time: %"TS"\n"), timebuf);
810 print_windows_info(image_node);
812 text = xml_get_text_by_path(image_node, T("FLAGS"));
814 tprintf(T("Flags: %"TS"\n"), text);
816 tprintf(T("WIMBoot compatible: %"TS"\n"),
817 xml_get_number_by_path(image_node, T("WIMBOOT")) ?
823 /*----------------------------------------------------------------------------*
824 * Reading and writing the XML data *
825 *----------------------------------------------------------------------------*/
828 image_element_get_index(struct xml_node *element)
830 struct xml_node *attrib = xml_get_attrib(element, T("INDEX"));
834 return min(INT_MAX, parse_number(attrib->value, 10));
837 /* Prepare the 'images' array from the XML document tree. */
839 setup_images(struct wim_xml_info *info, struct xml_node *root)
841 struct xml_node *child;
846 xml_node_for_each_child(root, child) {
847 if (!xml_node_is_element(child, T("IMAGE")))
849 index = image_element_get_index(child);
850 if (unlikely(index < 1 || info->image_count >= MAX_IMAGES))
852 max_index = max(max_index, index);
855 if (unlikely(max_index != info->image_count))
857 ret = WIMLIB_ERR_NOMEM;
858 info->images = CALLOC(info->image_count, sizeof(info->images[0]));
859 if (unlikely(!info->images))
861 xml_node_for_each_child(root, child) {
862 if (!xml_node_is_element(child, T("IMAGE")))
864 index = image_element_get_index(child);
865 if (unlikely(info->images[index - 1]))
867 info->images[index - 1] = child;
872 ERROR("The WIM file's XML document does not contain exactly one IMAGE "
873 "element per image!");
874 ret = WIMLIB_ERR_XML;
881 parse_wim_xml_document(const utf16lechar *raw_doc, size_t raw_doc_size,
882 struct xml_node **root_ret)
887 ret = utf16le_to_tstr(raw_doc, raw_doc_size, &doc, NULL);
890 ret = xml_parse_document(doc, root_ret);
895 /* Reads the XML data from a WIM file. */
897 read_wim_xml_data(WIMStruct *wim)
899 struct wim_xml_info *info;
902 struct xml_node *root;
905 /* Allocate the 'struct wim_xml_info'. */
906 ret = WIMLIB_ERR_NOMEM;
907 info = CALLOC(1, sizeof(*info));
911 /* Read the raw UTF-16LE XML document. */
912 ret = wimlib_get_xml_data(wim, &raw_doc, &raw_doc_size);
916 /* Parse the document, creating the document tree. */
917 ret = parse_wim_xml_document(raw_doc, raw_doc_size, &info->root);
921 if (ret != WIMLIB_ERR_NOMEM)
922 ret = WIMLIB_ERR_XML;
923 ERROR("Unable to parse the WIM file's XML document!");
928 /* Verify the root element. */
929 if (!xml_node_is_element(root, T("WIM"))) {
930 ERROR("The WIM file's XML document has an unexpected format!");
931 ret = WIMLIB_ERR_XML;
935 /* Verify the WIM file is not encrypted. */
936 if (xml_get_element_by_path(root, T("ESD/ENCRYPTED"))) {
937 ret = WIMLIB_ERR_WIM_IS_ENCRYPTED;
941 /* Validate the image elements and set up the images[] array. */
942 ret = setup_images(info, root);
947 wim->xml_info = info;
951 xml_free_info_struct(info);
955 /* Swap the INDEX attributes of two IMAGE elements. */
957 swap_index_attributes(struct xml_node *image_element_1,
958 struct xml_node *image_element_2)
960 struct xml_node *attr_1, *attr_2;
962 if (image_element_1 != image_element_2) {
963 attr_1 = unlink_index_attribute(image_element_1);
964 attr_2 = unlink_index_attribute(image_element_2);
965 xml_add_child(image_element_1, attr_2);
966 xml_add_child(image_element_2, attr_1);
971 prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes,
972 struct xml_node **orig_totalbytes_element_ret)
974 struct xml_node *totalbytes_element = NULL;
976 /* Allocate the new TOTALBYTES element if needed. */
977 if (total_bytes != WIM_TOTALBYTES_USE_EXISTING &&
978 total_bytes != WIM_TOTALBYTES_OMIT) {
979 totalbytes_element = xml_new_element_with_u64(
980 NULL, T("TOTALBYTES"), total_bytes);
981 if (!totalbytes_element)
982 return WIMLIB_ERR_NOMEM;
985 /* Adjust the IMAGE elements if needed. */
986 if (image != WIMLIB_ALL_IMAGES) {
987 /* We're writing a single image only. Temporarily unlink all
988 * other IMAGE elements from the document. */
989 for (int i = 0; i < info->image_count; i++)
991 xml_unlink_node(info->images[i]);
993 /* Temporarily set the INDEX attribute of the needed IMAGE
995 swap_index_attributes(info->images[0], info->images[image - 1]);
998 /* Adjust (add, change, or remove) the TOTALBYTES element if needed. */
999 *orig_totalbytes_element_ret = NULL;
1000 if (total_bytes != WIM_TOTALBYTES_USE_EXISTING) {
1001 /* Unlink the previous TOTALBYTES element, if any. */
1002 *orig_totalbytes_element_ret = xml_get_element_by_path(
1003 info->root, T("TOTALBYTES"));
1004 if (*orig_totalbytes_element_ret)
1005 xml_unlink_node(*orig_totalbytes_element_ret);
1007 /* Link in the new TOTALBYTES element, if any. */
1008 if (totalbytes_element)
1009 xml_add_child(info->root, totalbytes_element);
1015 restore_document_after_write(struct wim_xml_info *info, int image,
1016 struct xml_node *orig_totalbytes_element)
1018 /* Restore the IMAGE elements if needed. */
1019 if (image != WIMLIB_ALL_IMAGES) {
1020 /* We wrote a single image only. Re-link all other IMAGE
1021 * elements to the document. */
1022 for (int i = 0; i < info->image_count; i++)
1024 xml_add_child(info->root, info->images[i]);
1026 /* Restore the original INDEX attributes. */
1027 swap_index_attributes(info->images[0], info->images[image - 1]);
1030 /* Restore the original TOTALBYTES element if needed. */
1031 if (orig_totalbytes_element)
1032 xml_replace_child(info->root, orig_totalbytes_element);
1036 * Writes the XML data to a WIM file.
1038 * 'image' specifies the image(s) to include in the XML data. Normally it is
1039 * WIMLIB_ALL_IMAGES, but it can also be a 1-based image index.
1041 * 'total_bytes' is the number to use in the top-level TOTALBYTES element, or
1042 * WIM_TOTALBYTES_USE_EXISTING to use the existing value from the XML document
1043 * (if any), or WIM_TOTALBYTES_OMIT to omit the TOTALBYTES element entirely.
1046 write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
1047 struct wim_reshdr *out_reshdr, int write_resource_flags)
1049 struct wim_xml_info *info = wim->xml_info;
1051 struct xml_node *orig_totalbytes_element;
1052 struct xml_out_buf buf = {};
1053 const utf16lechar *raw_doc;
1054 size_t raw_doc_size;
1056 /* Make any needed temporary changes to the document. */
1057 ret = prepare_document_for_write(info, image, total_bytes,
1058 &orig_totalbytes_element);
1062 ret = xml_write_document(info->root, &buf);
1064 goto out_restore_document;
1066 ret = tstr_get_utf16le_and_len(buf.buf, &raw_doc, &raw_doc_size);
1068 goto out_restore_document;
1070 /* Write the XML data uncompressed. Although wimlib can handle
1071 * compressed XML data, some other WIM software cannot. */
1072 ret = write_wim_resource_from_buffer(raw_doc, raw_doc_size,
1075 WIMLIB_COMPRESSION_TYPE_NONE,
1079 write_resource_flags);
1080 tstr_put_utf16le(raw_doc);
1081 out_restore_document:
1082 /* Revert any temporary changes we made to the document. */
1083 restore_document_after_write(info, image, orig_totalbytes_element);
1089 /*----------------------------------------------------------------------------*
1090 * Library API functions *
1091 *----------------------------------------------------------------------------*/
1094 wimlib_get_xml_data(WIMStruct *wim, void **buf_ret, size_t *bufsize_ret)
1096 const struct wim_reshdr *xml_reshdr;
1098 if (wim->filename == NULL && filedes_is_seekable(&wim->in_fd))
1099 return WIMLIB_ERR_NO_FILENAME;
1101 if (buf_ret == NULL || bufsize_ret == NULL)
1102 return WIMLIB_ERR_INVALID_PARAM;
1104 xml_reshdr = &wim->hdr.xml_data_reshdr;
1106 *bufsize_ret = xml_reshdr->uncompressed_size;
1107 return wim_reshdr_to_data(xml_reshdr, wim, buf_ret);
1111 wimlib_extract_xml_data(WIMStruct *wim, FILE *fp)
1117 ret = wimlib_get_xml_data(wim, &buf, &bufsize);
1121 if (fwrite(buf, 1, bufsize, fp) != bufsize) {
1122 ERROR_WITH_ERRNO("Failed to extract XML data");
1123 ret = WIMLIB_ERR_WRITE;
1130 image_name_in_use(const WIMStruct *wim, const tchar *name, int excluded_image)
1132 const struct wim_xml_info *info = wim->xml_info;
1133 const tchar *existing_name;
1135 /* Any number of images can have "no name". */
1136 if (!name || !*name)
1139 /* Check for images that have the specified name. */
1140 for (int i = 0; i < info->image_count; i++) {
1141 if (i + 1 == excluded_image)
1143 existing_name = xml_get_text_by_path(info->images[i],
1145 if (existing_name && !tstrcmp(existing_name, name))
1152 wimlib_image_name_in_use(const WIMStruct *wim, const tchar *name)
1154 return image_name_in_use(wim, name, WIMLIB_NO_IMAGE);
1157 WIMLIBAPI const tchar *
1158 wimlib_get_image_name(const WIMStruct *wim, int image)
1160 const struct wim_xml_info *info = wim->xml_info;
1163 if (image < 1 || image > info->image_count)
1165 name = wimlib_get_image_property(wim, image, T("NAME"));
1166 return name ? name : T("");
1169 WIMLIBAPI const tchar *
1170 wimlib_get_image_description(const WIMStruct *wim, int image)
1172 return wimlib_get_image_property(wim, image, T("DESCRIPTION"));
1175 WIMLIBAPI const tchar *
1176 wimlib_get_image_property(const WIMStruct *wim, int image,
1177 const tchar *property_name)
1179 const struct wim_xml_info *info = wim->xml_info;
1181 if (!property_name || !*property_name)
1183 if (image < 1 || image > info->image_count)
1185 return xml_get_text_by_path(info->images[image - 1], property_name);
1189 wimlib_set_image_name(WIMStruct *wim, int image, const tchar *name)
1191 return wimlib_set_image_property(wim, image, T("NAME"), name);
1195 wimlib_set_image_descripton(WIMStruct *wim, int image, const tchar *description)
1197 return wimlib_set_image_property(wim, image, T("DESCRIPTION"),
1202 wimlib_set_image_flags(WIMStruct *wim, int image, const tchar *flags)
1204 return wimlib_set_image_property(wim, image, T("FLAGS"), flags);
1208 wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
1209 const tchar *property_value)
1211 struct wim_xml_info *info = wim->xml_info;
1213 if (!property_name || !*property_name)
1214 return WIMLIB_ERR_INVALID_PARAM;
1216 if (!xml_legal_path(property_name)) {
1217 ERROR("Property name '%"TS"' is illegal in XML", property_name);
1218 return WIMLIB_ERR_INVALID_PARAM;
1221 if (property_value && !xml_legal_value(property_value)) {
1222 WARNING("Value of property '%"TS"' contains illegal characters",
1224 return WIMLIB_ERR_INVALID_PARAM;
1227 if (image < 1 || image > info->image_count)
1228 return WIMLIB_ERR_INVALID_IMAGE;
1230 if (!tstrcmp(property_name, T("NAME")) &&
1231 image_name_in_use(wim, property_value, image))
1232 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1234 return xml_set_text_by_path(info->images[image - 1], property_name,