4 * Deals with the XML information in WIM files. Uses the C library libxml2.
8 * Copyright (C) 2012, 2013, 2015 Eric Biggers
10 * This file is free software; you can redistribute it and/or modify it under
11 * the terms of the GNU Lesser General Public License as published by the Free
12 * Software Foundation; either version 3 of the License, or (at your option) any
15 * This file is distributed in the hope that it will be useful, but WITHOUT
16 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this file; if not, see http://www.gnu.org/licenses/.
28 #include <libxml/parser.h>
29 #include <libxml/tree.h>
30 #include <libxml/xmlsave.h>
33 #include "wimlib/blob_table.h"
34 #include "wimlib/dentry.h"
35 #include "wimlib/encoding.h"
36 #include "wimlib/error.h"
37 #include "wimlib/file_io.h"
38 #include "wimlib/metadata.h"
39 #include "wimlib/resource.h"
40 #include "wimlib/timestamp.h"
41 #include "wimlib/xml.h"
42 #include "wimlib/write.h"
45 * A wrapper around a WIM file's XML document. The XML document contains
46 * metadata about each image in the WIM file as well as metadata about the WIM
51 /* The parsed XML document as a libxml2 document tree */
54 /* The root element of the document. This is a cached value, equal to
55 * xmlDocGetRootElement(doc). */
58 /* A malloc()ed array containing a pointer to the IMAGE element for each
59 * WIM image. The image with 1-based index 'i' is at index 'i - 1' in
60 * this array. Note: these pointers are cached values, since they could
61 * also be found by searching the document. */
64 /* The number of WIM images (the length of 'images') */
67 /* Temporary memory for UTF-8 => 'tchar' string translations. When an
68 * API function needs to return a 'tchar' string, it uses one of these
69 * array slots to hold the string and returns a pointer to it. */
71 size_t next_string_idx;
75 /*----------------------------------------------------------------------------*
76 * Internal functions *
77 *----------------------------------------------------------------------------*/
79 /* Iterate through the children of an xmlNode. */
80 #define node_for_each_child(parent, child) \
81 for (child = (parent)->children; child != NULL; child = child->next)
83 /* Is the specified node an element of the specified name? */
85 node_is_element(const xmlNode *node, const xmlChar *name)
87 return node->type == XML_ELEMENT_NODE && xmlStrEqual(node->name, name);
90 /* Retrieve a pointer to the UTF-8 text contents of the specified node, or NULL
91 * if the node has no text contents. This assumes the simple case where the
92 * node has a single TEXT child node. */
93 static const xmlChar *
94 node_get_text(const xmlNode *node)
100 node_for_each_child(node, child)
101 if (child->type == XML_TEXT_NODE && child->content)
102 return child->content;
106 /* Retrieve an unsigned integer from the contents of the specified node,
107 * decoding it using the specified base. If the node has no contents or does
108 * not contain a valid number, returns 0. */
110 node_get_number(const xmlNode *node, int base)
112 const xmlChar *str = node_get_text(node);
114 unsigned long long v;
118 v = strtoull(str, &end, base);
119 if ((xmlChar *)end == str || *end || v >= UINT64_MAX)
124 /* Retrieve the timestamp from a time node. This node should have child
125 * elements HIGHPART and LOWPART; these elements will be used to construct a
126 * Windows-style timestamp. */
128 node_get_timestamp(const xmlNode *node)
135 node_for_each_child(node, child) {
136 if (node_is_element(child, "HIGHPART"))
137 timestamp |= node_get_number(child, 16) << 32;
138 else if (node_is_element(child, "LOWPART"))
139 timestamp |= node_get_number(child, 16);
145 tstr_get_utf8(const tchar *tstr, const xmlChar **utf8_ret)
147 if (wimlib_mbs_is_utf8) {
148 *utf8_ret = (xmlChar *)tstr;
151 return tstr_to_utf8_simple(tstr, (char **)utf8_ret);
155 tstr_put_utf8(const xmlChar *utf8)
157 if (!wimlib_mbs_is_utf8)
161 /* Retrieve the text contents of an XML element as a 'tchar' string. If not
162 * found or if the text could not be translated, returns NULL. */
164 node_get_ttext(struct wim_xml_info *info, xmlNode *node)
169 text = node_get_text(node);
171 if (!text || wimlib_mbs_is_utf8)
172 return (const tchar *)text;
174 ttext_p = &info->strings[info->next_string_idx];
175 if (info->num_strings >= ARRAY_LEN(info->strings)) {
179 if (utf8_to_tstr_simple(text, ttext_p))
181 if (info->num_strings < ARRAY_LEN(info->strings))
183 info->next_string_idx++;
184 info->next_string_idx %= ARRAY_LEN(info->strings);
188 /* Unlink the specified node from its parent, then free it (recursively). */
190 unlink_and_free_tree(xmlNode *node)
196 /* Unlink and free (recursively) all children of the specified node. */
198 unlink_and_free_children(xmlNode *node)
202 while ((child = node->last) != NULL)
203 unlink_and_free_tree(child);
206 /* Add the new child element 'replacement' to 'parent', replacing any same-named
207 * element that may already exist. */
209 node_replace_child_element(xmlNode *parent, xmlNode *replacement)
213 node_for_each_child(parent, child) {
214 if (node_is_element(child, replacement->name)) {
215 xmlReplaceNode(child, replacement);
221 xmlAddChild(parent, replacement);
224 /* Set the text contents of the specified element to the specified string,
225 * replacing the existing contents (if any). The string is "raw" and is
226 * permitted to contain characters that have special meaning in XML. */
228 node_set_text(xmlNode *node, const xmlChar *text)
230 xmlNode *text_node = xmlNewText(text);
232 return WIMLIB_ERR_NOMEM;
233 unlink_and_free_children(node);
234 xmlAddChild(node, text_node);
238 /* Like 'node_set_text()', but takes in a 'tchar' string. */
240 node_set_ttext(xmlNode *node, const tchar *ttext)
245 ret = tstr_get_utf8(ttext, &text);
248 ret = node_set_text(node, text);
253 /* Create a new element containing text and optionally link it into a tree. */
255 new_element_with_text(xmlNode *parent, const xmlChar *name, const xmlChar *text)
259 node = xmlNewNode(NULL, name);
263 if (node_set_text(node, text)) {
269 xmlAddChild(parent, node);
273 /* Create a new element containing text and optionally link it into a tree. */
275 new_element_with_ttext(xmlNode *parent, const xmlChar *name, const tchar *ttext,
282 ret = tstr_get_utf8(ttext, &text);
285 node = new_element_with_text(parent, name, text);
288 return WIMLIB_ERR_NOMEM;
294 /* Create a new timestamp element and optionally link it into a tree. */
296 new_element_with_timestamp(xmlNode *parent, const xmlChar *name, u64 timestamp)
301 node = xmlNewNode(NULL, name);
305 sprintf(buf, "0x%08"PRIX32, (u32)(timestamp >> 32));
306 if (!new_element_with_text(node, "HIGHPART", buf))
309 sprintf(buf, "0x%08"PRIX32, (u32)timestamp);
310 if (!new_element_with_text(node, "LOWPART", buf))
314 xmlAddChild(parent, node);
322 /* Create a new number element and optionally link it into a tree. */
324 new_element_with_u64(xmlNode *parent, const xmlChar *name, u64 value)
328 sprintf(buf, "%"PRIu64, value);
329 return new_element_with_text(parent, name, buf);
332 /* Allocate a 'struct wim_xml_info'. The caller is responsible for initializing
333 * the document and the images array. */
334 static struct wim_xml_info *
335 alloc_wim_xml_info(void)
337 struct wim_xml_info *info = MALLOC(sizeof(*info));
339 info->next_string_idx = 0;
340 info->num_strings = 0;
346 parse_index(xmlChar **pp, uint32_t *index_ret)
351 *p++ = '\0'; /* overwrite '[' */
352 while (*p >= '0' && *p <= '9') {
353 uint32_t n = (index * 10) + (*p++ - '0');
363 if (*p != '/' && *p != '\0')
372 do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
373 xmlNode **result_ret)
375 size_t n = strlen(path) + 1;
385 /* Copy the path to a temporary buffer. */
386 memcpy(buf, path, n);
398 /* We have another path component. */
400 /* Parse the element name. */
402 while (*p != '/' && *p != '\0' && *p != '[')
404 if (p == name) /* empty name? */
407 /* Handle a bracketed index, if one was specified. */
408 if (*p == '[' && !parse_index(&p, &index))
414 /* Look for a matching child. */
415 node_for_each_child(node, child)
416 if (node_is_element(child, name) && !--index)
419 /* No child matched the path. If create=false, the lookup
420 * failed. If create=true, create the needed element. */
424 /* We can't create an element at index 'n' if indices 1...n-1
425 * didn't already exist. */
427 return WIMLIB_ERR_INVALID_PARAM;
429 child = xmlNewChild(node, NULL, name, NULL);
431 return WIMLIB_ERR_NOMEM;
433 /* Continue to the next path component, if there is one. */
442 ERROR("The XML path \"%s\" has invalid syntax.", path);
443 return WIMLIB_ERR_INVALID_PARAM;
446 /* Retrieve the XML element, if any, at the specified 'path'. This supports a
447 * simple filesystem-like syntax. If the element was found, returns a pointer
448 * to it; otherwise returns NULL. */
450 xml_get_node_by_path(xmlNode *root, const xmlChar *path)
453 do_xml_path_walk(root, path, false, &node);
457 /* Similar to xml_get_node_by_path(), but creates the element and any requisite
458 * ancestor elements as needed. If successful, 0 is returned and *node_ret is
459 * set to a pointer to the resulting element. If unsuccessful, an error code is
460 * returned and *node_ret is set to NULL. */
462 xml_ensure_node_by_path(xmlNode *root, const xmlChar *path, xmlNode **node_ret)
464 return do_xml_path_walk(root, path, true, node_ret);
468 xml_get_number_by_path(xmlNode *root, const xmlChar *path)
470 return node_get_number(xml_get_node_by_path(root, path), 10);
474 xml_get_timestamp_by_path(xmlNode *root, const xmlChar *path)
476 return node_get_timestamp(xml_get_node_by_path(root, path));
479 static const xmlChar *
480 xml_get_text_by_path(xmlNode *root, const xmlChar *path)
482 return node_get_text(xml_get_node_by_path(root, path));
486 xml_get_ttext_by_path(struct wim_xml_info *info, xmlNode *root,
489 return node_get_ttext(info, xml_get_node_by_path(root, path));
492 /* Creates/replaces (if ttext is not NULL and not empty) or removes (if ttext is
493 * NULL or empty) an element containing text. */
495 xml_set_ttext_by_path(xmlNode *root, const xmlChar *path, const tchar *ttext)
500 if (ttext && *ttext) {
501 /* Create or replace */
502 ret = xml_ensure_node_by_path(root, path, &node);
505 return node_set_ttext(node, ttext);
508 node = xml_get_node_by_path(root, path);
510 unlink_and_free_tree(node);
515 /* Sets a string property for the specified WIM image. */
517 set_image_property(WIMStruct *wim, int image, const xmlChar *name,
520 struct wim_xml_info *info = wim->xml_info;
522 if (image < 1 || image > info->image_count)
523 return WIMLIB_ERR_INVALID_IMAGE;
525 return xml_set_ttext_by_path(info->images[image - 1], name, value);
528 /* Gets a string property for the specified WIM image as a 'tchar' string.
529 * Returns a pointer to the property value if found; NULL if the image doesn't
530 * exist; or 'default_value' if the property doesn't exist in the image or if
531 * the property value could not be translated to a 'tchar' string. */
533 get_image_property(const WIMStruct *wim, int image, const xmlChar *name,
534 const tchar *default_value)
536 struct wim_xml_info *info = wim->xml_info;
539 if (image < 1 || image > info->image_count)
542 value = xml_get_ttext_by_path(info, info->images[image - 1], name);
543 return value ? value : default_value;
546 /* Unlink and return the node which represents the INDEX attribute of the
547 * specified IMAGE element. */
549 unlink_index_attribute(xmlNode *image_node)
551 xmlAttr *attr = xmlHasProp(image_node, "INDEX");
552 xmlUnlinkNode((xmlNode *)attr);
556 /* Compute the total uncompressed size of the streams of the specified inode. */
558 inode_sum_stream_sizes(const struct wim_inode *inode,
559 const struct blob_table *blob_table)
563 for (unsigned i = 0; i < inode->i_num_streams; i++) {
564 const struct blob_descriptor *blob;
566 blob = stream_blob(&inode->i_streams[i], blob_table);
568 total_size += blob->size;
574 append_image_node(struct wim_xml_info *info, xmlNode *image_node)
579 /* Limit exceeded? */
580 if (unlikely(info->image_count >= MAX_IMAGES))
581 return WIMLIB_ERR_IMAGE_COUNT;
583 /* Add the INDEX attribute. */
584 sprintf(buf, "%d", info->image_count + 1);
585 if (!xmlNewProp(image_node, "INDEX", buf))
586 return WIMLIB_ERR_NOMEM;
588 /* Append the IMAGE element to the 'images' array. */
589 images = REALLOC(info->images,
590 (info->image_count + 1) * sizeof(info->images[0]));
591 if (unlikely(!images))
592 return WIMLIB_ERR_NOMEM;
593 info->images = images;
594 images[info->image_count++] = image_node;
596 /* Add the IMAGE element to the document. */
597 xmlAddChild(info->root, image_node);
601 /*----------------------------------------------------------------------------*
602 * Functions for internal library use *
603 *----------------------------------------------------------------------------*/
605 /* Allocate an empty 'struct wim_xml_info', containing no images. */
606 struct wim_xml_info *
607 xml_new_info_struct(void)
609 struct wim_xml_info *info;
611 info = alloc_wim_xml_info();
615 info->doc = xmlNewDoc("1.0");
619 info->root = xmlNewNode(NULL, "WIM");
622 xmlDocSetRootElement(info->doc, info->root);
625 info->image_count = 0;
629 xmlFreeDoc(info->doc);
636 /* Free a 'struct wim_xml_info'. */
638 xml_free_info_struct(struct wim_xml_info *info)
641 xmlFreeDoc(info->doc);
643 for (size_t i = 0; i < info->num_strings; i++)
644 FREE(info->strings[i]);
649 /* Retrieve the number of images for which there exist IMAGE elements in the XML
652 xml_get_image_count(const struct wim_xml_info *info)
654 return info->image_count;
657 /* Retrieve the TOTALBYTES value for the WIM file, or 0 if this value is
660 xml_get_total_bytes(const struct wim_xml_info *info)
662 return xml_get_number_by_path(info->root, "TOTALBYTES");
665 /* Retrieve the TOTALBYTES value for the specified image, or 0 if this value is
668 xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
670 return xml_get_number_by_path(info->images[image - 1], "TOTALBYTES");
673 /* Retrieve the HARDLINKBYTES value for the specified image, or 0 if this value
676 xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
678 return xml_get_number_by_path(info->images[image - 1], "HARDLINKBYTES");
681 /* Retrieve the WIMBOOT value for the specified image, or false if this value is
684 xml_get_wimboot(const struct wim_xml_info *info, int image)
686 return xml_get_number_by_path(info->images[image - 1], "WIMBOOT");
689 /* Retrieve the Windows build number for the specified image, or 0 if this
690 * information is not available. */
692 xml_get_windows_build_number(const struct wim_xml_info *info, int image)
694 return xml_get_number_by_path(info->images[image - 1],
695 "WINDOWS/VERSION/BUILD");
698 /* Set the WIMBOOT value for the specified image. */
700 xml_set_wimboot(struct wim_xml_info *info, int image)
702 return xml_set_ttext_by_path(info->images[image - 1], "WIMBOOT", T("1"));
706 * Update the DIRCOUNT, FILECOUNT, TOTALBYTES, HARDLINKBYTES, and
707 * LASTMODIFICATIONTIME elements for the specified WIM image.
709 * Note: since these stats are likely to be used for display purposes only, we
710 * no longer attempt to duplicate WIMGAPI's weird bugs when calculating them.
713 xml_update_image_info(WIMStruct *wim, int image)
715 const struct wim_image_metadata *imd = wim->image_metadata[image - 1];
716 xmlNode *image_node = wim->xml_info->images[image - 1];
717 const struct wim_inode *inode;
721 u64 hard_link_bytes = 0;
723 xmlNode *dircount_node;
724 xmlNode *filecount_node;
725 xmlNode *totalbytes_node;
726 xmlNode *hardlinkbytes_node;
727 xmlNode *lastmodificationtime_node;
729 image_for_each_inode(inode, imd) {
730 if (inode_is_directory(inode))
731 dir_count += inode->i_nlink;
733 file_count += inode->i_nlink;
734 size = inode_sum_stream_sizes(inode, wim->blob_table);
735 total_bytes += size * inode->i_nlink;
736 hard_link_bytes += size * (inode->i_nlink - 1);
739 dircount_node = new_element_with_u64(NULL, "DIRCOUNT", dir_count);
740 filecount_node = new_element_with_u64(NULL, "FILECOUNT", file_count);
741 totalbytes_node = new_element_with_u64(NULL, "TOTALBYTES", total_bytes);
742 hardlinkbytes_node = new_element_with_u64(NULL, "HARDLINKBYTES",
744 lastmodificationtime_node =
745 new_element_with_timestamp(NULL, "LASTMODIFICATIONTIME",
746 now_as_wim_timestamp());
748 if (unlikely(!dircount_node || !filecount_node || !totalbytes_node ||
749 !hardlinkbytes_node || !lastmodificationtime_node)) {
750 xmlFreeNode(dircount_node);
751 xmlFreeNode(filecount_node);
752 xmlFreeNode(totalbytes_node);
753 xmlFreeNode(hardlinkbytes_node);
754 xmlFreeNode(lastmodificationtime_node);
755 return WIMLIB_ERR_NOMEM;
758 node_replace_child_element(image_node, dircount_node);
759 node_replace_child_element(image_node, filecount_node);
760 node_replace_child_element(image_node, totalbytes_node);
761 node_replace_child_element(image_node, hardlinkbytes_node);
762 node_replace_child_element(image_node, lastmodificationtime_node);
766 /* Add an image to the XML information. */
768 xml_add_image(struct wim_xml_info *info, const tchar *name)
770 const u64 now = now_as_wim_timestamp();
774 ret = WIMLIB_ERR_NOMEM;
775 image_node = xmlNewNode(NULL, "IMAGE");
780 ret = new_element_with_ttext(image_node, "NAME", name, NULL);
784 ret = WIMLIB_ERR_NOMEM;
785 if (!new_element_with_u64(image_node, "DIRCOUNT", 0))
787 if (!new_element_with_u64(image_node, "FILECOUNT", 0))
789 if (!new_element_with_u64(image_node, "TOTALBYTES", 0))
791 if (!new_element_with_u64(image_node, "HARDLINKBYTES", 0))
793 if (!new_element_with_timestamp(image_node, "CREATIONTIME", now))
795 if (!new_element_with_timestamp(image_node, "LASTMODIFICATIONTIME", now))
797 ret = append_image_node(info, image_node);
803 xmlFreeNode(image_node);
808 * Make a copy of the XML information for the image with index @src_image in the
809 * @src_info XML document and append it to the @dest_info XML document.
811 * In the process, change the image's name and description to the values
812 * specified by @dest_image_name and @dest_image_description. Either or both
813 * may be NULL, which indicates that the corresponding element will not be
814 * included in the destination image.
817 xml_export_image(const struct wim_xml_info *src_info, int src_image,
818 struct wim_xml_info *dest_info, const tchar *dest_image_name,
819 const tchar *dest_image_description, bool wimboot)
824 ret = WIMLIB_ERR_NOMEM;
825 dest_node = xmlDocCopyNode(src_info->images[src_image - 1],
830 ret = xml_set_ttext_by_path(dest_node, "NAME", dest_image_name);
834 ret = xml_set_ttext_by_path(dest_node, "DESCRIPTION",
835 dest_image_description);
840 ret = xml_set_ttext_by_path(dest_node, "WIMBOOT", T("1"));
845 xmlFreeProp(unlink_index_attribute(dest_node));
847 ret = append_image_node(dest_info, dest_node);
853 xmlFreeNode(dest_node);
857 /* Remove the specified image from the XML document. */
859 xml_delete_image(struct wim_xml_info *info, int image)
862 xmlAttr *index_attr, *next_index_attr;
864 /* Free the IMAGE element for the deleted image. Then, shift all
865 * higher-indexed IMAGE elements down by 1, in the process re-assigning
866 * their INDEX attributes. */
868 next_image = info->images[image - 1];
869 next_index_attr = unlink_index_attribute(next_image);
870 unlink_and_free_tree(next_image);
872 while (image < info->image_count) {
873 index_attr = next_index_attr;
874 next_image = info->images[image];
875 next_index_attr = unlink_index_attribute(next_image);
876 xmlAddChild(next_image, (xmlNode *)index_attr);
877 info->images[image - 1] = next_image;
881 xmlFreeProp(next_index_attr);
885 /* Architecture constants are from w64 mingw winnt.h */
886 #define PROCESSOR_ARCHITECTURE_INTEL 0
887 #define PROCESSOR_ARCHITECTURE_MIPS 1
888 #define PROCESSOR_ARCHITECTURE_ALPHA 2
889 #define PROCESSOR_ARCHITECTURE_PPC 3
890 #define PROCESSOR_ARCHITECTURE_SHX 4
891 #define PROCESSOR_ARCHITECTURE_ARM 5
892 #define PROCESSOR_ARCHITECTURE_IA64 6
893 #define PROCESSOR_ARCHITECTURE_ALPHA64 7
894 #define PROCESSOR_ARCHITECTURE_MSIL 8
895 #define PROCESSOR_ARCHITECTURE_AMD64 9
896 #define PROCESSOR_ARCHITECTURE_IA32_ON_WIN64 10
899 describe_arch(u64 arch)
901 static const tchar * const descriptions[] = {
902 [PROCESSOR_ARCHITECTURE_INTEL] = T("x86"),
903 [PROCESSOR_ARCHITECTURE_MIPS] = T("MIPS"),
904 [PROCESSOR_ARCHITECTURE_ARM] = T("ARM"),
905 [PROCESSOR_ARCHITECTURE_IA64] = T("ia64"),
906 [PROCESSOR_ARCHITECTURE_AMD64] = T("x86_64"),
909 if (arch < ARRAY_LEN(descriptions) && descriptions[arch] != NULL)
910 return descriptions[arch];
915 /* Print information from the WINDOWS element, if present. */
917 print_windows_info(struct wim_xml_info *info, xmlNode *image_node)
919 xmlNode *windows_node;
921 xmlNode *version_node;
924 windows_node = xml_get_node_by_path(image_node, "WINDOWS");
928 tprintf(T("Architecture: %"TS"\n"),
929 describe_arch(xml_get_number_by_path(windows_node, "ARCH")));
931 text = xml_get_ttext_by_path(info, windows_node, "PRODUCTNAME");
933 tprintf(T("Product Name: %"TS"\n"), text);
935 text = xml_get_ttext_by_path(info, windows_node, "EDITIONID");
937 tprintf(T("Edition ID: %"TS"\n"), text);
939 text = xml_get_ttext_by_path(info, windows_node, "INSTALLATIONTYPE");
941 tprintf(T("Installation Type: %"TS"\n"), text);
943 text = xml_get_ttext_by_path(info, windows_node, "HAL");
945 tprintf(T("HAL: %"TS"\n"), text);
947 text = xml_get_ttext_by_path(info, windows_node, "PRODUCTTYPE");
949 tprintf(T("Product Type: %"TS"\n"), text);
951 text = xml_get_ttext_by_path(info, windows_node, "PRODUCTSUITE");
953 tprintf(T("Product Suite: %"TS"\n"), text);
955 langs_node = xml_get_node_by_path(windows_node, "LANGUAGES");
959 tprintf(T("Languages: "));
960 node_for_each_child(langs_node, lang_node) {
961 if (!node_is_element(lang_node, "LANGUAGE"))
963 text = node_get_ttext(info, lang_node);
966 tprintf(T("%"TS" "), text);
970 text = xml_get_ttext_by_path(info, langs_node, "DEFAULT");
972 tprintf(T("Default Language: %"TS"\n"), text);
975 text = xml_get_ttext_by_path(info, windows_node, "SYSTEMROOT");
977 tprintf(T("System Root: %"TS"\n"), text);
979 version_node = xml_get_node_by_path(windows_node, "VERSION");
981 tprintf(T("Major Version: %"PRIu64"\n"),
982 xml_get_number_by_path(version_node, "MAJOR"));
983 tprintf(T("Minor Version: %"PRIu64"\n"),
984 xml_get_number_by_path(version_node, "MINOR"));
985 tprintf(T("Build: %"PRIu64"\n"),
986 xml_get_number_by_path(version_node, "BUILD"));
987 tprintf(T("Service Pack Build: %"PRIu64"\n"),
988 xml_get_number_by_path(version_node, "SPBUILD"));
989 tprintf(T("Service Pack Level: %"PRIu64"\n"),
990 xml_get_number_by_path(version_node, "SPLEVEL"));
994 /* Prints information about the specified image. */
996 xml_print_image_info(struct wim_xml_info *info, int image)
998 xmlNode * const image_node = info->images[image - 1];
1002 tprintf(T("Index: %d\n"), image);
1004 /* Always print the Name and Description, even if the corresponding XML
1005 * elements are not present. */
1006 text = xml_get_ttext_by_path(info, image_node, "NAME");
1007 tprintf(T("Name: %"TS"\n"), text ? text : T(""));
1008 text = xml_get_ttext_by_path(info, image_node, "DESCRIPTION");
1009 tprintf(T("Description: %"TS"\n"), text ? text : T(""));
1011 text = xml_get_ttext_by_path(info, image_node, "DISPLAYNAME");
1013 tprintf(T("Display Name: %"TS"\n"), text);
1015 text = xml_get_ttext_by_path(info, image_node, "DISPLAYDESCRIPTION");
1017 tprintf(T("Display Description: %"TS"\n"), text);
1019 tprintf(T("Directory Count: %"PRIu64"\n"),
1020 xml_get_number_by_path(image_node, "DIRCOUNT"));
1022 tprintf(T("File Count: %"PRIu64"\n"),
1023 xml_get_number_by_path(image_node, "FILECOUNT"));
1025 tprintf(T("Total Bytes: %"PRIu64"\n"),
1026 xml_get_number_by_path(image_node, "TOTALBYTES"));
1028 tprintf(T("Hard Link Bytes: %"PRIu64"\n"),
1029 xml_get_number_by_path(image_node, "HARDLINKBYTES"));
1031 wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
1033 timebuf, ARRAY_LEN(timebuf));
1034 tprintf(T("Creation Time: %"TS"\n"), timebuf);
1036 wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
1037 "LASTMODIFICATIONTIME"),
1038 timebuf, ARRAY_LEN(timebuf));
1039 tprintf(T("Last Modification Time: %"TS"\n"), timebuf);
1041 print_windows_info(info, image_node);
1043 text = xml_get_ttext_by_path(info, image_node, "FLAGS");
1045 tprintf(T("Flags: %"TS"\n"), text);
1047 tprintf(T("WIMBoot compatible: %"TS"\n"),
1048 xml_get_number_by_path(image_node, "WIMBOOT") ?
1049 T("yes") : T("no"));
1054 /*----------------------------------------------------------------------------*
1055 * Reading and writing the XML data *
1056 *----------------------------------------------------------------------------*/
1059 image_node_get_index(const xmlNode *node)
1061 u64 v = node_get_number((const xmlNode *)xmlHasProp(node, "INDEX"), 10);
1062 return min(v, INT_MAX);
1065 /* Prepare the 'images' array from the XML document tree. */
1067 setup_images(struct wim_xml_info *info, xmlNode *root)
1074 info->images = NULL;
1075 info->image_count = 0;
1077 node_for_each_child(root, child) {
1078 if (!node_is_element(child, "IMAGE"))
1080 index = image_node_get_index(child);
1081 if (unlikely(index < 1 || info->image_count >= MAX_IMAGES))
1083 max_index = max(max_index, index);
1084 info->image_count++;
1086 if (unlikely(max_index != info->image_count))
1088 ret = WIMLIB_ERR_NOMEM;
1089 info->images = CALLOC(info->image_count, sizeof(info->images[0]));
1090 if (unlikely(!info->images))
1092 node_for_each_child(root, child) {
1093 if (!node_is_element(child, "IMAGE"))
1095 index = image_node_get_index(child);
1096 if (unlikely(info->images[index - 1]))
1098 info->images[index - 1] = child;
1103 ERROR("The WIM file's XML document does not contain exactly one IMAGE "
1104 "element per image!");
1105 ret = WIMLIB_ERR_XML;
1111 /* Reads the XML data from a WIM file. */
1113 read_wim_xml_data(WIMStruct *wim)
1115 struct wim_xml_info *info;
1122 /* Allocate the 'struct wim_xml_info'. */
1123 ret = WIMLIB_ERR_NOMEM;
1124 info = alloc_wim_xml_info();
1128 /* Read the raw UTF-16LE bytes. */
1129 ret = wimlib_get_xml_data(wim, &buf, &bufsize);
1133 /* Parse the document with libxml2, creating the document tree. */
1134 doc = xmlReadMemory(buf, bufsize, NULL, "UTF-16LE", XML_PARSE_NONET);
1138 ERROR("Unable to parse the WIM file's XML document!");
1139 ret = WIMLIB_ERR_XML;
1143 /* Verify the root element. */
1144 root = xmlDocGetRootElement(doc);
1145 if (!node_is_element(root, "WIM")) {
1146 ERROR("The WIM file's XML document has an unexpected format!");
1147 ret = WIMLIB_ERR_XML;
1151 /* Verify the WIM file is not encrypted. */
1152 if (xml_get_node_by_path(root, "ESD/ENCRYPTED")) {
1153 ret = WIMLIB_ERR_WIM_IS_ENCRYPTED;
1157 /* Validate the image elements and set up the images[] array. */
1158 ret = setup_images(info, root);
1162 /* Save the document and return. */
1165 wim->xml_info = info;
1176 /* Swap the INDEX attributes of two IMAGE elements. */
1178 swap_index_attributes(xmlNode *image_node_1, xmlNode *image_node_2)
1180 xmlAttr *attr_1, *attr_2;
1182 if (image_node_1 != image_node_2) {
1183 attr_1 = unlink_index_attribute(image_node_1);
1184 attr_2 = unlink_index_attribute(image_node_2);
1185 xmlAddChild(image_node_1, (xmlNode *)attr_2);
1186 xmlAddChild(image_node_2, (xmlNode *)attr_1);
1191 prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes,
1192 xmlNode **orig_totalbytes_node_ret)
1194 xmlNode *totalbytes_node = NULL;
1196 /* Allocate the new TOTALBYTES element if needed. */
1197 if (total_bytes != WIM_TOTALBYTES_USE_EXISTING &&
1198 total_bytes != WIM_TOTALBYTES_OMIT) {
1199 totalbytes_node = new_element_with_u64(NULL, "TOTALBYTES",
1201 if (!totalbytes_node)
1202 return WIMLIB_ERR_NOMEM;
1205 /* Adjust the IMAGE elements if needed. */
1206 if (image != WIMLIB_ALL_IMAGES) {
1207 /* We're writing a single image only. Temporarily unlink all
1208 * other IMAGE elements from the document. */
1209 for (int i = 0; i < info->image_count; i++)
1211 xmlUnlinkNode(info->images[i]);
1213 /* Temporarily set the INDEX attribute of the needed IMAGE
1215 swap_index_attributes(info->images[0], info->images[image - 1]);
1218 /* Adjust (add, change, or remove) the TOTALBYTES element if needed. */
1219 *orig_totalbytes_node_ret = NULL;
1220 if (total_bytes != WIM_TOTALBYTES_USE_EXISTING) {
1221 /* Unlink the previous TOTALBYTES element, if any. */
1222 *orig_totalbytes_node_ret = xml_get_node_by_path(info->root,
1224 if (*orig_totalbytes_node_ret)
1225 xmlUnlinkNode(*orig_totalbytes_node_ret);
1227 /* Link in the new TOTALBYTES element, if any. */
1228 if (totalbytes_node)
1229 xmlAddChild(info->root, totalbytes_node);
1235 restore_document_after_write(struct wim_xml_info *info, int image,
1236 xmlNode *orig_totalbytes_node)
1238 /* Restore the IMAGE elements if needed. */
1239 if (image != WIMLIB_ALL_IMAGES) {
1240 /* We wrote a single image only. Re-link all other IMAGE
1241 * elements to the document. */
1242 for (int i = 0; i < info->image_count; i++)
1244 xmlAddChild(info->root, info->images[i]);
1246 /* Restore the original INDEX attributes. */
1247 swap_index_attributes(info->images[0], info->images[image - 1]);
1250 /* Restore the original TOTALBYTES element if needed. */
1251 if (orig_totalbytes_node)
1252 node_replace_child_element(info->root, orig_totalbytes_node);
1256 * Writes the XML data to a WIM file.
1258 * 'image' specifies the image(s) to include in the XML data. Normally it is
1259 * WIMLIB_ALL_IMAGES, but it can also be a 1-based image index.
1261 * 'total_bytes' is the number to use in the top-level TOTALBYTES element, or
1262 * WIM_TOTALBYTES_USE_EXISTING to use the existing value from the XML document
1263 * (if any), or WIM_TOTALBYTES_OMIT to omit the TOTALBYTES element entirely.
1266 write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
1267 struct wim_reshdr *out_reshdr, int write_resource_flags)
1269 struct wim_xml_info *info = wim->xml_info;
1273 xmlNode *orig_totalbytes_node;
1274 xmlSaveCtxt *save_ctx;
1276 /* Make any needed temporary changes to the document. */
1277 ret = prepare_document_for_write(info, image, total_bytes,
1278 &orig_totalbytes_node);
1282 /* Create an in-memory buffer to hold the encoded document. */
1283 ret = WIMLIB_ERR_NOMEM;
1284 buffer = xmlBufferCreate();
1286 goto out_restore_document;
1288 /* Encode the document in UTF-16LE, with a byte order mark, and with no
1289 * XML declaration. Some other WIM software requires all of these
1290 * characteristics. */
1291 ret = WIMLIB_ERR_NOMEM;
1292 if (xmlBufferCat(buffer, "\xff\xfe"))
1293 goto out_free_buffer;
1294 save_ctx = xmlSaveToBuffer(buffer, "UTF-16LE", XML_SAVE_NO_DECL);
1296 goto out_free_buffer;
1297 ret = xmlSaveDoc(save_ctx, info->doc);
1298 ret2 = xmlSaveClose(save_ctx);
1299 if (ret < 0 || ret2 < 0) {
1300 ERROR("Unable to serialize the WIM file's XML document!");
1301 ret = WIMLIB_ERR_NOMEM;
1302 goto out_free_buffer;
1305 /* Write the XML data uncompressed. Although wimlib can handle
1306 * compressed XML data, some other WIM software cannot. */
1307 ret = write_wim_resource_from_buffer(xmlBufferContent(buffer),
1308 xmlBufferLength(buffer),
1311 WIMLIB_COMPRESSION_TYPE_NONE,
1315 write_resource_flags);
1317 xmlBufferFree(buffer);
1318 out_restore_document:
1319 /* Revert any temporary changes we made to the document. */
1320 restore_document_after_write(info, image, orig_totalbytes_node);
1325 /*----------------------------------------------------------------------------*
1326 * Global setup functions *
1327 *----------------------------------------------------------------------------*/
1330 xml_global_init(void)
1336 xml_global_cleanup(void)
1342 xml_set_memory_allocator(void *(*malloc_func)(size_t),
1343 void (*free_func)(void *),
1344 void *(*realloc_func)(void *, size_t))
1346 xmlMemSetup(free_func, malloc_func, realloc_func, wimlib_strdup);
1349 /*----------------------------------------------------------------------------*
1350 * Library API functions *
1351 *----------------------------------------------------------------------------*/
1354 wimlib_get_xml_data(WIMStruct *wim, void **buf_ret, size_t *bufsize_ret)
1356 const struct wim_reshdr *xml_reshdr;
1358 if (wim->filename == NULL && filedes_is_seekable(&wim->in_fd))
1359 return WIMLIB_ERR_NO_FILENAME;
1361 if (buf_ret == NULL || bufsize_ret == NULL)
1362 return WIMLIB_ERR_INVALID_PARAM;
1364 xml_reshdr = &wim->hdr.xml_data_reshdr;
1366 *bufsize_ret = xml_reshdr->uncompressed_size;
1367 return wim_reshdr_to_data(xml_reshdr, wim, buf_ret);
1371 wimlib_extract_xml_data(WIMStruct *wim, FILE *fp)
1377 ret = wimlib_get_xml_data(wim, &buf, &bufsize);
1381 if (fwrite(buf, 1, bufsize, fp) != bufsize) {
1382 ERROR_WITH_ERRNO("Failed to extract XML data");
1383 ret = WIMLIB_ERR_WRITE;
1390 image_name_in_use(const WIMStruct *wim, const tchar *name, int excluded_image)
1392 const struct wim_xml_info *info = wim->xml_info;
1393 const xmlChar *name_utf8;
1396 /* Any number of images can have "no name". */
1397 if (!name || !*name)
1400 /* Check for images that have the specified name. */
1401 if (tstr_get_utf8(name, &name_utf8))
1403 for (int i = 0; i < info->image_count && !found; i++) {
1404 if (i + 1 == excluded_image)
1406 found = xmlStrEqual(name_utf8, xml_get_text_by_path(
1407 info->images[i], "NAME"));
1409 tstr_put_utf8(name_utf8);
1414 wimlib_image_name_in_use(const WIMStruct *wim, const tchar *name)
1416 return image_name_in_use(wim, name, WIMLIB_NO_IMAGE);
1419 WIMLIBAPI const tchar *
1420 wimlib_get_image_name(const WIMStruct *wim, int image)
1422 return get_image_property(wim, image, "NAME", T(""));
1425 WIMLIBAPI const tchar *
1426 wimlib_get_image_description(const WIMStruct *wim, int image)
1428 return get_image_property(wim, image, "DESCRIPTION", NULL);
1431 WIMLIBAPI const tchar *
1432 wimlib_get_image_property(const WIMStruct *wim, int image,
1433 const tchar *property_name)
1435 const xmlChar *name;
1438 if (!property_name || !*property_name)
1440 if (tstr_get_utf8(property_name, &name))
1442 value = get_image_property(wim, image, name, NULL);
1443 tstr_put_utf8(name);
1448 wimlib_set_image_name(WIMStruct *wim, int image, const tchar *name)
1450 if (image_name_in_use(wim, name, image))
1451 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
1453 return set_image_property(wim, image, "NAME", name);
1457 wimlib_set_image_descripton(WIMStruct *wim, int image, const tchar *description)
1459 return set_image_property(wim, image, "DESCRIPTION", description);
1463 wimlib_set_image_flags(WIMStruct *wim, int image, const tchar *flags)
1465 return set_image_property(wim, image, "FLAGS", flags);
1469 wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
1470 const tchar *property_value)
1472 const xmlChar *name;
1475 if (!property_name || !*property_name)
1476 return WIMLIB_ERR_INVALID_PARAM;
1478 ret = tstr_get_utf8(property_name, &name);
1481 ret = set_image_property(wim, image, name, property_value);
1482 tstr_put_utf8(name);