]> wimlib.net Git - wimlib/commitdiff
Eliminate the dependency on libxml2
authorEric Biggers <ebiggers3@gmail.com>
Mon, 27 Mar 2023 00:25:46 +0000 (17:25 -0700)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 27 Mar 2023 00:25:46 +0000 (17:25 -0700)
libxml2 is the only remaining third-party library that Windows builds of
wimlib need.  It's a bit of a pain to have to download it, build it, and
trick libtool into linking it into the resulting DLL.  It then
constitutes a significant part of the size of the resulting DLL, even
with the minimal libxml2 configuration options being used.

In reality, WIM files only use a small subset of XML containing the most
commonly used XML features.  Using a full-featured XML library (that
supports "features" like External Entities that we have to remember to
disable) is a bit dangerous and not actually necessary.  7-Zip's WIM
support, for example, just uses a very minimal home-brew XML processor.

Another issue is that the libxml2 API always uses UTF-8, which causes
the conversion UTF-16LE => UTF-8 => UTF-16LE to be needed on Windows.
This isn't really an "issue", per se, but it shouldn't be necessary.

Finally, wimlib was integrating with libxml2 at a low level via the tree
API, and it overlooked some things.  For example, libxml2 trees have
separate CDATA and TEXT nodes, but wimlib was only looking at TEXT, so
CDATA was ignored.  It was also possible for wimlib to create a document
containing control characters, which is not valid XML so it could not be
read.  These weren't very important issues, but the point is, just using
an XML library doesn't solve quite as many problems as one would hope...

Therefore, just add a simple XML 1.0 processor directly in the source
code.  It handles all XML features that are used in WIM files, plus a
bit more for futureproofing.  It's also faster than libxml2.

19 files changed:
.github/workflows/ci.yml
.gitignore
Makefile.am
NEWS
README
README.WINDOWS
configure.ac
include/wimlib/test_support.h
include/wimlib/xml.h
include/wimlib/xmlproc.h [new file with mode: 0644]
include/wimlib_tchar.h
src/util.c
src/wim.c
src/xml.c
src/xmlproc.c [new file with mode: 0644]
tools/make-windows-release
tools/run-sparse
tools/windeps/Makefile [deleted file]
tools/windeps/sha256sums [deleted file]

index 02fa6ea5b3d254ac00274fb9711c7530ada9b0b5..95d88aaccad6efa0f8dacc9b0d95bf7865516a2e 100644 (file)
@@ -2,7 +2,7 @@ name: CI
 on: [pull_request, push]
 env:
   DEF_CFLAGS: -O2 -g -Wall -Werror
-  DEPENDENCIES: autoconf automake libtool pkg-config libxml2-dev libfuse-dev ntfs-3g-dev
+  DEPENDENCIES: autoconf automake libtool pkg-config libfuse-dev ntfs-3g-dev
 
 jobs:
   gcc-build-and-test:
@@ -54,7 +54,7 @@ jobs:
       run: |
         sudo dpkg --add-architecture i386
         sudo apt-get update
-        sudo apt-get install -y gcc-multilib $DEPENDENCIES libxml2-dev:i386
+        sudo apt-get install -y gcc-multilib $DEPENDENCIES
     - run: ./bootstrap
     - run: ./configure CC=gcc CFLAGS="-m32 $DEF_CFLAGS" --without-fuse --without-ntfs-3g
     - run: make -j8 check V=1
@@ -109,7 +109,7 @@ jobs:
     - uses: actions/checkout@v3
     - name: Install dependencies
       run: |
-        brew install autoconf automake libtool pkg-config libxml2
+        brew install autoconf automake libtool pkg-config
     - run: ./bootstrap
     - run: ./configure CFLAGS="$DEF_CFLAGS" --without-fuse --without-ntfs-3g
     - run: make -j8 check V=1
@@ -145,7 +145,6 @@ jobs:
           pkg-config
           make
           mingw-w64-${{matrix.env}}-cc
-          mingw-w64-${{matrix.env}}-libxml2
     - run: ./bootstrap
     - run: ./configure CFLAGS="$DEF_CFLAGS" --without-fuse --without-ntfs-3g
     - run: make -j8
index ef29a1d11853db45cd1e870ccf14b527ba8cd9e4..feb1eaba93b7c19acbd965f201bbcb1ca216b82b 100644 (file)
 /tests/tree-cmp
 /tests/wlfuzz
 /tests/wlfuzz.exe
-/tools/windeps/*.tar.*
-/tools/windeps/COPYING.*
-/tools/windeps/build_*
-/tools/windeps/libxml2*
-/tools/windeps/mingw*
-/tools/windeps/sysroot_*
 /wimlib-*-bin/
 /wimlib-*.tar
 /wimlib-*.tar.*
index bb801a523856e7604ce3b89df751a356fce801f5..d784d46a9949e3102976956457c58e0b4142becc 100644 (file)
@@ -92,6 +92,7 @@ libwim_la_SOURCES =           \
        src/write.c             \
        src/xml.c               \
        src/xml_windows.c       \
+       src/xmlproc.c           \
        src/xpress_compress.c   \
        src/xpress_decompress.c \
        include/wimlib/alloca.h         \
@@ -156,6 +157,7 @@ libwim_la_SOURCES =         \
        include/wimlib/xattr.h          \
        include/wimlib/xml.h            \
        include/wimlib/xml_windows.h    \
+       include/wimlib/xmlproc.h        \
        include/wimlib/xpress_constants.h
 
 if WITH_NTFS_3G
@@ -191,7 +193,6 @@ endif
 libwim_la_CFLAGS =             \
        $(AM_CFLAGS)            \
        $(PTHREAD_CFLAGS)       \
-       $(LIBXML2_CFLAGS)       \
        $(LIBNTFS_3G_CFLAGS)    \
        $(LIBFUSE_CFLAGS)
 
@@ -199,7 +200,6 @@ libwim_la_LDFLAGS = $(AM_LDFLAGS) -version-info 36:0:21
 
 libwim_la_LIBADD =             \
        $(PTHREAD_LIBS)         \
-       $(LIBXML2_LIBS)         \
        $(LIBNTFS_3G_LIBS)      \
        $(LIBFUSE_LIBS)         \
        $(LIBRT_LIBS)           \
diff --git a/NEWS b/NEWS
index 7447e5e52896cff6f7bba550a1eee0b7046d83d0..df8603359f02eb0c1705908ba952903022e7af3b 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,13 +1,12 @@
 Version 1.14.0-BETA1:
-       Removed OpenSSL (libcrypto) as a dependency of wimlib.
+       Removed libxml2 and libcrypto (OpenSSL) as dependencies of wimlib.
+       Also removed winpthreads as a dependency of wimlib on Windows.
 
        Improved the performance of the Windows binaries on CPUs that have SHA-1
        instructions.
 
        Removed support for Windows XP.
 
-       On Windows, wimlib no longer depends on winpthreads.
-
        Fixed a bug in 'wimsplit' where it didn't accept part sizes of 4 GiB or
        larger on Windows and on 32-bit platforms.
 
diff --git a/README b/README
index 6ea31854848e6b0a288f98e7341053e940fd763f..7f382cfe422b39cb76b573d74d4c8cd2ca8e13db 100644 (file)
--- a/README
+++ b/README
@@ -122,12 +122,6 @@ with it, when building for a UNIX-like system from source.  If you have
 downloaded the Windows binary distribution of wimlib and wimlib-imagex then all
 dependencies were already included and this section is irrelevant.
 
-* libxml2 (required)
-       This is a commonly used free library to read and write XML documents.
-       Almost all Linux distributions should include this; however, you may
-       need to install the header files, which might be in a package named
-       "libxml2-dev" or similar.  For more information see http://xmlsoft.org/.
-
 * libfuse (optional but recommended)
        Unless configured --without-fuse, wimlib requires a non-ancient version
        of libfuse.  Most Linux distributions already include this, but make
index 0308a2edc9b54f8aa9e2a1751c547ca7383a79c0..28d8d7b4e9fffe2a6b82a43314e50752701712a0 100644 (file)
@@ -103,7 +103,6 @@ packages from category "Devel":
     - make
     - mingw64-x86_64-binutils
     - mingw64-x86_64-gcc-g++
-    - mingw64-x86_64-libxml2
     - pkg-config
 
 Download wimlib's source code from https://wimlib.net/downloads/wimlib-1.13.6.tar.gz.
@@ -122,17 +121,6 @@ have been produced in the .libs directory.
 By default the binaries are built with debug symbols.  If desired, you can use
 x86_64-w64-mingw32-strip to strip them.
 
-libwim-15.dll will be linked to several other DLLs which you will need as well:
-
-    - libxml2-2.dll, which also requires:
-        - iconv.dll
-        - liblzma-5.dll
-        - zlib1.dll
-
-These DLLs can be found in "C:\cygwin\usr\x86_64-w64-mingw32\sys-root\mingw\bin"
-and must be placed alongside libwim-15.dll for it to run portably.  But see
-below for an alternative.
-
 Building 32-bit binaries is very similar, but you'll need to replace "x86_64"
 with "i686" everywhere in the above instructions, and libwim-15.dll will also
 depend on libgcc_s_sjlj-1.dll.  Note that you can build both 32-bit and 64-bit
@@ -163,8 +151,7 @@ bootstrap the repository, and run the Windows release script:
     ./bootstrap
     ./tools/make-windows-release x86_64
 
-The release script will download and build libxml2 as a static library, then
-build wimlib, then do some final tasks and bundle the resulting files up into a
-ZIP archive.  If successful you'll end up with a file like
-"wimlib-1.13.6-windows-x86_64-bin.zip", just like the official releases.  For
-32-bit binaries just use "i686" instead of "x86_64".
+The release script will build wimlib, then do some final tasks and bundle the
+resulting files up into a ZIP archive.  If successful you'll end up with a file
+like "wimlib-1.13.6-windows-x86_64-bin.zip", just like the official releases.
+For 32-bit binaries just use "i686" instead of "x86_64".
index 1d53735331136c9c612ea36b3cd7145e53839a1f..34b2e184d383ccfef848d1c8f96eecd44bb9570a 100644 (file)
@@ -103,10 +103,6 @@ if test "$WINDOWS_NATIVE_BUILD" != "yes"; then
        AX_PTHREAD([], [AC_MSG_ERROR(["cannot find pthreads library"])])
 fi
 
-# ------------------------------ libxml2 --------------------------------------
-PKG_CHECK_MODULES([LIBXML2], [libxml-2.0])
-PKGCONFIG_PRIVATE_REQUIRES="$PKGCONFIG_PRIVATE_REQUIRES libxml-2.0"
-
 ###############################################################################
 #                        Configuration options                               #
 ###############################################################################
index 8c19a01a7c8a926e26b146c82930442470dea45a..4d6df5578673f448244913d29eb249ec75ff4cda 100644 (file)
@@ -17,6 +17,9 @@ extern int
 wimlib_compare_images(WIMStruct *wim1, int image1,
                      WIMStruct *wim2, int image2, int cmp_flags);
 
+extern int
+wimlib_parse_and_write_xml_doc(const tchar *in, tchar **out_ret);
+
 #endif /* ENABLE_TEST_SUPPORT */
 
 #endif /* _WIMLIB_TEST_SUPPORT_H */
index 9ce52104093ea646686775448733672f43794613..a8d8f91f5ca40e61a8da38757c5ee7ea7b5db196 100644 (file)
@@ -71,17 +71,4 @@ write_wim_xml_data(WIMStruct *wim, int image,
                   u64 total_bytes, struct wim_reshdr *out_reshdr,
                   int write_resource_flags);
 
-/*****************************************************************************/
-
-extern void
-xml_global_init(void);
-
-extern void
-xml_global_cleanup(void);
-
-extern void
-xml_set_memory_allocator(void *(*malloc_func)(size_t),
-                        void (*free_func)(void *),
-                        void *(*realloc_func)(void *, size_t));
-
 #endif /* _WIMLIB_XML_H */
diff --git a/include/wimlib/xmlproc.h b/include/wimlib/xmlproc.h
new file mode 100644 (file)
index 0000000..374d27e
--- /dev/null
@@ -0,0 +1,94 @@
+#ifndef _WIMLIB_XMLPROC_H
+#define _WIMLIB_XMLPROC_H
+
+#include "wimlib/list.h"
+#include "wimlib/types.h"
+
+/*****************************************************************************/
+
+enum xml_node_type {
+       XML_ELEMENT_NODE,
+       XML_TEXT_NODE,
+       XML_ATTRIBUTE_NODE,
+};
+
+struct xml_node {
+       enum xml_node_type type;        /* type of node */
+       tchar *name;                    /* name of ELEMENT or ATTRIBUTE */
+       tchar *value;                   /* value of TEXT or ATTRIBUTE */
+       struct xml_node *parent;        /* parent, or NULL if none */
+       struct list_head children;      /* children; only used for ELEMENT */
+       struct list_head sibling_link;
+};
+
+/* Iterate through the children of an xml_node.  Does nothing if passed NULL. */
+#define xml_node_for_each_child(parent, child) \
+       if (parent) list_for_each_entry(child, &(parent)->children, sibling_link)
+
+static inline bool
+xml_node_is_element(const struct xml_node *node, const tchar *name)
+{
+       return node->type == XML_ELEMENT_NODE && !tstrcmp(node->name, name);
+}
+
+struct xml_node *
+xml_new_element(struct xml_node *parent, const tchar *name);
+
+struct xml_node *
+xml_new_element_with_text(struct xml_node *parent, const tchar *name,
+                         const tchar *text);
+
+struct xml_node *
+xml_new_attrib(struct xml_node *parent, const tchar *name, const tchar *value);
+
+void
+xml_add_child(struct xml_node *parent, struct xml_node *child);
+
+void
+xml_unlink_node(struct xml_node *node);
+
+void
+xml_free_node(struct xml_node *node);
+
+const tchar *
+xml_element_get_text(const struct xml_node *element);
+
+int
+xml_element_set_text(struct xml_node *element, const tchar *text);
+
+struct xml_node *
+xml_get_attrib(const struct xml_node *element, const tchar *name);
+
+int
+xml_set_attrib(struct xml_node *element, const tchar *name, const tchar *value);
+
+void
+xml_replace_child(struct xml_node *parent, struct xml_node *replacement);
+
+struct xml_node *
+xml_clone_tree(struct xml_node *orig);
+
+bool
+xml_legal_name(const tchar *name);
+
+bool
+xml_legal_value(const tchar *value);
+
+/*****************************************************************************/
+
+int
+xml_parse_document(const tchar *p, struct xml_node **doc_ret);
+
+/*****************************************************************************/
+
+struct xml_out_buf {
+       tchar *buf;
+       size_t count;
+       size_t capacity;
+       bool oom;
+};
+
+int
+xml_write_document(struct xml_node *doc, struct xml_out_buf *buf);
+
+#endif /* _WIMLIB_XMLPROC_H */
index 339b46d6bdbdfe40db4d731f2112de7f8f0a3fc4..781f370c7d42760ebb3391ee6adc7b6daea2e702 100644 (file)
@@ -39,6 +39,7 @@ typedef wchar_t tchar;
 #  define tstrchr      wcschr
 #  define tstrpbrk     wcspbrk
 #  define tstrrchr     wcsrchr
+#  define tstrstr      wcsstr
 #  define tstrlen      wcslen
 #  define tmemcmp      wmemcmp
 #  define tstrcasecmp   _wcsicmp
@@ -102,6 +103,7 @@ typedef char tchar;
 #  define tstrchr      strchr
 #  define tstrpbrk     strpbrk
 #  define tstrrchr     strrchr
+#  define tstrstr      strstr
 #  define tstrlen      strlen
 #  define tmemcmp      memcmp
 #  define tstrcasecmp   strcasecmp
index ef4554efc7c19e189cebfe7528585370c5f6869d..1f9f779282c9834a017e47abefb1ca958520b7c4 100644 (file)
@@ -3,7 +3,7 @@
  */
 
 /*
- * Copyright (C) 2012-2016 Eric Biggers
+ * Copyright 2012-2023 Eric Biggers
  *
  * This file is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
@@ -43,7 +43,6 @@
 #include "wimlib/error.h"
 #include "wimlib/timestamp.h"
 #include "wimlib/util.h"
-#include "wimlib/xml.h"
 
 /*******************
  * Memory allocation
@@ -153,9 +152,6 @@ wimlib_set_memory_allocator(void *(*malloc_func)(size_t),
        wimlib_malloc_func  = malloc_func  ? malloc_func  : malloc;
        wimlib_free_func    = free_func    ? free_func    : free;
        wimlib_realloc_func = realloc_func ? realloc_func : realloc;
-
-       xml_set_memory_allocator(wimlib_malloc_func, wimlib_free_func,
-                                wimlib_realloc_func);
        return 0;
 }
 
index e9a6c8eba1ca665d619272e82c9eb92c1ed2faf8..97125c4b10549769d65c2792040f4b6fb4f4e6ee 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -979,7 +979,6 @@ wimlib_global_init(int init_flags)
                goto out_unlock;
 
        init_cpu_features();
-       xml_global_init();
 #ifdef _WIN32
        ret = win32_global_init(init_flags);
        if (ret)
@@ -1010,7 +1009,6 @@ wimlib_global_cleanup(void)
        if (!lib_initialized)
                goto out_unlock;
 
-       xml_global_cleanup();
 #ifdef _WIN32
        win32_global_cleanup();
 #endif
index 3812ce1e19d7926e499a9c230e0bddb9664d02ea..3194978b015289df871daf5af8b828ae0dbe7d41 100644 (file)
--- a/src/xml.c
+++ b/src/xml.c
@@ -1,11 +1,9 @@
 /*
- * xml.c
- *
- * Deals with the XML information in WIM files.  Uses the C library libxml2.
+ * xml.c - deals with the XML information in WIM files
  */
 
 /*
- * Copyright (C) 2012-2016 Eric Biggers
+ * Copyright 2012-2023 Eric Biggers
  *
  * This file is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
@@ -25,9 +23,7 @@
 #  include "config.h"
 #endif
 
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-#include <libxml/xmlsave.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include "wimlib/blob_table.h"
@@ -39,6 +35,7 @@
 #include "wimlib/resource.h"
 #include "wimlib/timestamp.h"
 #include "wimlib/xml.h"
+#include "wimlib/xmlproc.h"
 #include "wimlib/write.h"
 
 /*
  */
 struct wim_xml_info {
 
-       /* The parsed XML document as a libxml2 document tree  */
-       xmlDocPtr doc;
-
-       /* The root element of the document.  This is a cached value, equal to
-        * xmlDocGetRootElement(doc).  */
-       xmlNode *root;
+       /* The XML document in tree form */
+       struct xml_node *root;
 
        /* A malloc()ed array containing a pointer to the IMAGE element for each
         * WIM image.  The image with 1-based index 'i' is at index 'i - 1' in
         * this array.  Note: these pointers are cached values, since they could
         * also be found by searching the document.  */
-       xmlNode **images;
+       struct xml_node **images;
 
        /* The number of WIM images (the length of 'images')  */
        int image_count;
-
-#if TCHAR_IS_UTF16LE
-       /* Temporary memory for UTF-8 => 'tchar' string translations.  When an
-        * API function needs to return a 'tchar' string, it uses one of these
-        * array slots to hold the string and returns a pointer to it.  */
-       tchar *strings[128];
-       size_t next_string_idx;
-       size_t num_strings;
-#endif
 };
 
-/*----------------------------------------------------------------------------*
- *                            Internal functions                              *
- *----------------------------------------------------------------------------*/
-
-/* Iterate through the children of an xmlNode.  */
-#define node_for_each_child(parent, child)     \
-       for (child = (parent)->children; child != NULL; child = child->next)
-
-/* Is the specified node an element of the specified name?  */
-static bool
-node_is_element(const xmlNode *node, const xmlChar *name)
-{
-       return node->type == XML_ELEMENT_NODE && xmlStrEqual(node->name, name);
-}
-
-/* Retrieve a pointer to the UTF-8 text contents of the specified node, or NULL
- * if the node has no text contents.  This assumes the simple case where the
- * node has a single TEXT child node.  */
-static const xmlChar *
-node_get_text(const xmlNode *node)
-{
-       const xmlNode *child;
-
-       if (!node)
-               return NULL;
-       node_for_each_child(node, child)
-               if (child->type == XML_TEXT_NODE && child->content)
-                       return child->content;
-       return NULL;
-}
-
-/* Retrieve an unsigned integer from the contents of the specified node,
- * decoding it using the specified base.  If the node has no contents or does
- * not contain a valid number, returns 0.  */
 static u64
-node_get_number(const xmlNode *node, int base)
+parse_number(const tchar *str, int base)
 {
-       const xmlChar *str = node_get_text(node);
-       char *end;
+       tchar *end;
        unsigned long long v;
 
        if (!str)
                return 0;
-       v = strtoull(str, &end, base);
-       if ((xmlChar *)end == str || *end || v >= UINT64_MAX)
+       v = tstrtoull(str, &end, base);
+       if (end == str || *end || v >= UINT64_MAX)
                return 0;
        return v;
 }
 
-/* Retrieve the timestamp from a time node.  This node should have child
- * elements HIGHPART and LOWPART; these elements will be used to construct a
- * Windows-style timestamp.  */
+/*
+ * Retrieve an unsigned integer from the contents of the specified element,
+ * decoding it using the specified base.  If the element has no contents or does
+ * not contain a valid number, returns 0.
+ */
 static u64
-node_get_timestamp(const xmlNode *node)
-{
-       u64 timestamp = 0;
-       xmlNode *child;
-
-       if (!node)
-               return 0;
-       node_for_each_child(node, child) {
-               if (node_is_element(child, "HIGHPART"))
-                       timestamp |= node_get_number(child, 16) << 32;
-               else if (node_is_element(child, "LOWPART"))
-                       timestamp |= node_get_number(child, 16);
-       }
-       return timestamp;
-}
-
-static int
-tstr_get_utf8(const tchar *tstr, const xmlChar **utf8_ret)
-{
-#if TCHAR_IS_UTF16LE
-       return utf16le_to_utf8(tstr, tstrlen(tstr) * sizeof(tchar),
-                              (char **)utf8_ret, NULL);
-#else
-       *utf8_ret = (const xmlChar *)tstr;
-       return 0;
-#endif
-}
-
-static void
-tstr_put_utf8(const xmlChar *utf8)
-{
-#if TCHAR_IS_UTF16LE
-       FREE((char *)utf8);
-#endif
-}
-
-/* Retrieve the text contents of an XML element as a 'tchar' string.  If not
- * found or if the text could not be translated, returns NULL.  */
-static const tchar *
-node_get_ttext(struct wim_xml_info *info, xmlNode *node)
-{
-       const xmlChar *text = node_get_text(node);
-
-#if TCHAR_IS_UTF16LE
-       tchar **ttext_p;
-
-       if (!text)
-               return NULL;
-
-       ttext_p = &info->strings[info->next_string_idx];
-       if (info->num_strings >= ARRAY_LEN(info->strings)) {
-               FREE(*ttext_p);
-               *ttext_p = NULL;
-       }
-       if (utf8_to_tstr(text, strlen(text), ttext_p, NULL))
-               return NULL;
-       if (info->num_strings < ARRAY_LEN(info->strings))
-               info->num_strings++;
-       info->next_string_idx++;
-       info->next_string_idx %= ARRAY_LEN(info->strings);
-       return *ttext_p;
-#else
-       return text;
-#endif
-}
-
-/* Unlink the specified node from its parent, then free it (recursively).  */
-static void
-unlink_and_free_tree(xmlNode *node)
+xml_element_get_number(const struct xml_node *element, int base)
 {
-       xmlUnlinkNode(node);
-       xmlFreeNode(node);
+       return parse_number(xml_element_get_text(element), base);
 }
 
-/* Unlink and free (recursively) all children of the specified node.  */
-static void
-unlink_and_free_children(xmlNode *node)
-{
-       xmlNode *child;
-
-       while ((child = node->last) != NULL)
-               unlink_and_free_tree(child);
-}
-
-/* Add the new child element 'replacement' to 'parent', replacing any same-named
- * element that may already exist.  */
-static void
-node_replace_child_element(xmlNode *parent, xmlNode *replacement)
-{
-       xmlNode *child;
-
-       node_for_each_child(parent, child) {
-               if (node_is_element(child, replacement->name)) {
-                       xmlReplaceNode(child, replacement);
-                       xmlFreeNode(child);
-                       return;
-               }
-       }
-
-       xmlAddChild(parent, replacement);
-}
-
-/* Set the text contents of the specified element to the specified string,
- * replacing the existing contents (if any).  The string is "raw" and is
- * permitted to contain characters that have special meaning in XML.  */
-static int
-node_set_text(xmlNode *node, const xmlChar *text)
-{
-       xmlNode *text_node = xmlNewText(text);
-       if (!text_node)
-               return WIMLIB_ERR_NOMEM;
-       unlink_and_free_children(node);
-       xmlAddChild(node, text_node);
-       return 0;
-}
-
-/* Like 'node_set_text()', but takes in a 'tchar' string.  */
-static int
-node_set_ttext(xmlNode *node, const tchar *ttext)
-{
-       const xmlChar *text;
-       int ret;
-
-       ret = tstr_get_utf8(ttext, &text);
-       if (ret)
-               return ret;
-       ret = node_set_text(node, text);
-       tstr_put_utf8(text);
-       return ret;
-}
-
-/* Create a new element containing text and optionally link it into a tree.  */
-static xmlNode *
-new_element_with_text(xmlNode *parent, const xmlChar *name, const xmlChar *text)
+/*
+ * Retrieve the timestamp from a time element.  This element should have child
+ * elements HIGHPART and LOWPART; these elements will be used to construct a
+ * Windows-style timestamp.
+ */
+static u64
+xml_element_get_timestamp(const struct xml_node *element)
 {
-       xmlNode *node;
-
-       node = xmlNewNode(NULL, name);
-       if (!node)
-               return NULL;
+       u64 timestamp = 0;
+       const struct xml_node *child;
 
-       if (node_set_text(node, text)) {
-               xmlFreeNode(node);
-               return NULL;
+       xml_node_for_each_child(element, child) {
+               if (xml_node_is_element(child, T("HIGHPART")))
+                       timestamp |= xml_element_get_number(child, 16) << 32;
+               else if (xml_node_is_element(child, T("LOWPART")))
+                       timestamp |= xml_element_get_number(child, 16);
        }
-
-       if (parent)
-               xmlAddChild(parent, node);
-       return node;
-}
-
-/* Create a new element containing text and optionally link it into a tree.  */
-static int
-new_element_with_ttext(xmlNode *parent, const xmlChar *name, const tchar *ttext,
-                      xmlNode **node_ret)
-{
-       const xmlChar *text;
-       int ret;
-       xmlNode *node;
-
-       ret = tstr_get_utf8(ttext, &text);
-       if (ret)
-               return ret;
-       node = new_element_with_text(parent, name, text);
-       tstr_put_utf8(text);
-       if (!node)
-               return WIMLIB_ERR_NOMEM;
-       if (node_ret)
-               *node_ret = node;
-       return 0;
+       return timestamp;
 }
 
 /* Create a new timestamp element and optionally link it into a tree.  */
-static xmlNode *
-new_element_with_timestamp(xmlNode *parent, const xmlChar *name, u64 timestamp)
+static struct xml_node *
+xml_new_element_with_timestamp(struct xml_node *parent, const tchar *name,
+                              u64 timestamp)
 {
-       xmlNode *node;
-       char buf[32];
+       struct xml_node *element;
+       tchar buf[32];
 
-       node = xmlNewNode(NULL, name);
-       if (!node)
+       element = xml_new_element(NULL, name);
+       if (!element)
                goto err;
 
-       sprintf(buf, "0x%08"PRIX32, (u32)(timestamp >> 32));
-       if (!new_element_with_text(node, "HIGHPART", buf))
+       tsprintf(buf, T("0x%08"PRIX32), (u32)(timestamp >> 32));
+       if (!xml_new_element_with_text(element, T("HIGHPART"), buf))
                goto err;
 
-       sprintf(buf, "0x%08"PRIX32, (u32)timestamp);
-       if (!new_element_with_text(node, "LOWPART", buf))
+       tsprintf(buf, T("0x%08"PRIX32), (u32)timestamp);
+       if (!xml_new_element_with_text(element, T("LOWPART"), buf))
                goto err;
 
        if (parent)
-               xmlAddChild(parent, node);
-       return node;
+               xml_add_child(parent, element);
+       return element;
 
 err:
-       xmlFreeNode(node);
+       xml_free_node(element);
        return NULL;
 }
 
 /* Create a new number element and optionally link it into a tree.  */
-static xmlNode *
-new_element_with_u64(xmlNode *parent, const xmlChar *name, u64 value)
+static struct xml_node *
+xml_new_element_with_u64(struct xml_node *parent, const tchar *name, u64 value)
 {
-       char buf[32];
+       tchar buf[32];
 
-       sprintf(buf, "%"PRIu64, value);
-       return new_element_with_text(parent, name, buf);
-}
-
-/* Allocate a 'struct wim_xml_info'.  The caller is responsible for initializing
- * the document and the images array.  */
-static struct wim_xml_info *
-alloc_wim_xml_info(void)
-{
-       struct wim_xml_info *info = MALLOC(sizeof(*info));
-#if TCHAR_IS_UTF16LE
-       if (info) {
-               info->next_string_idx = 0;
-               info->num_strings = 0;
-       }
-#endif
-       return info;
+       tsprintf(buf, T("%"PRIu64), value);
+       return xml_new_element_with_text(parent, name, buf);
 }
 
 static bool
-parse_index(xmlChar **pp, u32 *index_ret)
+parse_index(tchar **pp, u32 *index_ret)
 {
-       xmlChar *p = *pp;
+       tchar *p = *pp;
        u32 index = 0;
 
        *p++ = '\0'; /* overwrite '[' */
@@ -379,21 +169,21 @@ parse_index(xmlChar **pp, u32 *index_ret)
 }
 
 static int
-do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
-                xmlNode **result_ret)
+do_xml_path_walk(struct xml_node *element, const tchar *path, bool create,
+                struct xml_node **result_ret)
 {
-       size_t n = strlen(path) + 1;
-       xmlChar buf[n];
-       xmlChar *p;
-       xmlChar c;
+       size_t n = tstrlen(path) + 1;
+       tchar buf[n];
+       tchar *p;
+       tchar c;
 
        *result_ret = NULL;
 
-       if (!node)
+       if (!element)
                return 0;
 
        /* Copy the path to a temporary buffer.  */
-       memcpy(buf, path, n);
+       tmemcpy(buf, path, n);
        p = buf;
 
        if (*p == '/')
@@ -401,8 +191,8 @@ do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
        c = *p;
 
        while (c != '\0') {
-               const xmlChar *name;
-               xmlNode *child;
+               const tchar *name;
+               struct xml_node *child;
                u32 index = 1;
 
                /* We have another path component.  */
@@ -422,8 +212,8 @@ do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
                *p = '\0';
 
                /* Look for a matching child.  */
-               node_for_each_child(node, child)
-                       if (node_is_element(child, name) && !--index)
+               xml_node_for_each_child(element, child)
+                       if (xml_node_is_element(child, name) && !--index)
                                goto next_step;
 
                /* No child matched the path.  If create=false, the lookup
@@ -436,99 +226,99 @@ do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
                if (index != 1)
                        return WIMLIB_ERR_INVALID_PARAM;
 
-               child = xmlNewChild(node, NULL, name, NULL);
+               child = xml_new_element(element, name);
                if (!child)
                        return WIMLIB_ERR_NOMEM;
        next_step:
                /* Continue to the next path component, if there is one.  */
-               node = child;
+               element = child;
                p++;
        }
 
-       *result_ret = node;
+       *result_ret = element;
        return 0;
 
 bad_syntax:
-       ERROR("The XML path \"%s\" has invalid syntax.", path);
+       ERROR("The XML path \"%"TS"\" has invalid syntax.", path);
        return WIMLIB_ERR_INVALID_PARAM;
 }
 
 /* Retrieve the XML element, if any, at the specified 'path'.  This supports a
  * simple filesystem-like syntax.  If the element was found, returns a pointer
  * to it; otherwise returns NULL.  */
-static xmlNode *
-xml_get_node_by_path(xmlNode *root, const xmlChar *path)
+static struct xml_node *
+xml_get_element_by_path(struct xml_node *root, const tchar *path)
 {
-       xmlNode *node;
-       do_xml_path_walk(root, path, false, &node);
-       return node;
+       struct xml_node *element;
+
+       do_xml_path_walk(root, path, false, &element);
+       return element;
 }
 
-/* Similar to xml_get_node_by_path(), but creates the element and any requisite
- * ancestor elements as needed.   If successful, 0 is returned and *node_ret is
- * set to a pointer to the resulting element.  If unsuccessful, an error code is
- * returned and *node_ret is set to NULL.  */
+/*
+ * Similar to xml_get_element_by_path(), but creates the element and any
+ * requisite ancestor elements as needed.   If successful, 0 is returned and
+ * *element_ret is set to a pointer to the resulting element.  If unsuccessful,
+ * an error code is returned and *element_ret is set to NULL.
+ */
 static int
-xml_ensure_node_by_path(xmlNode *root, const xmlChar *path, xmlNode **node_ret)
+xml_ensure_element_by_path(struct xml_node *root, const tchar *path,
+                          struct xml_node **element_ret)
 {
-       return do_xml_path_walk(root, path, true, node_ret);
+       return do_xml_path_walk(root, path, true, element_ret);
 }
 
 static u64
-xml_get_number_by_path(xmlNode *root, const xmlChar *path)
+xml_get_number_by_path(struct xml_node *root, const tchar *path)
 {
-       return node_get_number(xml_get_node_by_path(root, path), 10);
+       return xml_element_get_number(xml_get_element_by_path(root, path), 10);
 }
 
 static u64
-xml_get_timestamp_by_path(xmlNode *root, const xmlChar *path)
+xml_get_timestamp_by_path(struct xml_node *root, const tchar *path)
 {
-       return node_get_timestamp(xml_get_node_by_path(root, path));
-}
-
-static const xmlChar *
-xml_get_text_by_path(xmlNode *root, const xmlChar *path)
-{
-       return node_get_text(xml_get_node_by_path(root, path));
+       return xml_element_get_timestamp(xml_get_element_by_path(root, path));
 }
 
 static const tchar *
-xml_get_ttext_by_path(struct wim_xml_info *info, xmlNode *root,
-                     const xmlChar *path)
+xml_get_text_by_path(struct xml_node *root, const tchar *path)
 {
-       return node_get_ttext(info, xml_get_node_by_path(root, path));
+       return xml_element_get_text(xml_get_element_by_path(root, path));
 }
 
-/* Creates/replaces (if ttext is not NULL and not empty) or removes (if ttext is
- * NULL or empty) an element containing text.  */
+/*
+ * Create/replace (if text is not NULL and not empty) or remove (if text is NULL
+ * or empty) an element containing text.
+ */
 static int
-xml_set_ttext_by_path(xmlNode *root, const xmlChar *path, const tchar *ttext)
+xml_set_text_by_path(struct xml_node *root, const tchar *path, const tchar *text)
 {
        int ret;
-       xmlNode *node;
+       struct xml_node *element;
 
-       if (ttext && *ttext) {
+       if (text && *text) {
                /* Create or replace  */
-               ret = xml_ensure_node_by_path(root, path, &node);
+               ret = xml_ensure_element_by_path(root, path, &element);
                if (ret)
                        return ret;
-               return node_set_ttext(node, ttext);
+               return xml_element_set_text(element, text);
        } else {
                /* Remove  */
-               node = xml_get_node_by_path(root, path);
-               if (node)
-                       unlink_and_free_tree(node);
+               element = xml_get_element_by_path(root, path);
+               if (element)
+                       xml_free_node(element);
                return 0;
        }
 }
 
 /* Unlink and return the node which represents the INDEX attribute of the
  * specified IMAGE element.  */
-static xmlAttr *
-unlink_index_attribute(xmlNode *image_node)
+static struct xml_node *
+unlink_index_attribute(struct xml_node *image_node)
 {
-       xmlAttr *attr = xmlHasProp(image_node, "INDEX");
-       xmlUnlinkNode((xmlNode *)attr);
+       struct xml_node *attr = xml_get_attrib(image_node, T("INDEX"));
+
+       xml_unlink_node(attr);
        return attr;
 }
 
@@ -550,19 +340,21 @@ inode_sum_stream_sizes(const struct wim_inode *inode,
 }
 
 static int
-append_image_node(struct wim_xml_info *info, xmlNode *image_node)
+append_image_node(struct wim_xml_info *info, struct xml_node *image_node)
 {
-       char buf[32];
-       xmlNode **images;
+       tchar buf[32];
+       struct xml_node **images;
+       int ret;
 
        /* Limit exceeded?  */
        if (unlikely(info->image_count >= MAX_IMAGES))
                return WIMLIB_ERR_IMAGE_COUNT;
 
-       /* Add the INDEX attribute.  */
-       sprintf(buf, "%d", info->image_count + 1);
-       if (!xmlNewProp(image_node, "INDEX", buf))
-               return WIMLIB_ERR_NOMEM;
+       /* Set the INDEX attribute. */
+       tsprintf(buf, T("%d"), info->image_count + 1);
+       ret = xml_set_attrib(image_node, T("INDEX"), buf);
+       if (ret)
+               return ret;
 
        /* Append the IMAGE element to the 'images' array.  */
        images = REALLOC(info->images,
@@ -573,7 +365,7 @@ append_image_node(struct wim_xml_info *info, xmlNode *image_node)
        images[info->image_count++] = image_node;
 
        /* Add the IMAGE element to the document.  */
-       xmlAddChild(info->root, image_node);
+       xml_add_child(info->root, image_node);
        return 0;
 }
 
@@ -585,31 +377,17 @@ append_image_node(struct wim_xml_info *info, xmlNode *image_node)
 struct wim_xml_info *
 xml_new_info_struct(void)
 {
-       struct wim_xml_info *info;
+       struct wim_xml_info *info = CALLOC(1, sizeof(*info));
 
-       info = alloc_wim_xml_info();
        if (!info)
-               goto err;
-
-       info->doc = xmlNewDoc("1.0");
-       if (!info->doc)
-               goto err_free_info;
-
-       info->root = xmlNewNode(NULL, "WIM");
-       if (!info->root)
-               goto err_free_doc;
-       xmlDocSetRootElement(info->doc, info->root);
+               return NULL;
 
-       info->images = NULL;
-       info->image_count = 0;
+       info->root = xml_new_element(NULL, T("WIM"));
+       if (!info->root) {
+               FREE(info);
+               return NULL;
+       }
        return info;
-
-err_free_doc:
-       xmlFreeDoc(info->doc);
-err_free_info:
-       FREE(info);
-err:
-       return NULL;
 }
 
 /* Free a 'struct wim_xml_info'.  */
@@ -617,12 +395,8 @@ void
 xml_free_info_struct(struct wim_xml_info *info)
 {
        if (info) {
-               xmlFreeDoc(info->doc);
+               xml_free_node(info->root);
                FREE(info->images);
-       #if TCHAR_IS_UTF16LE
-               for (size_t i = 0; i < info->num_strings; i++)
-                       FREE(info->strings[i]);
-       #endif
                FREE(info);
        }
 }
@@ -640,7 +414,7 @@ xml_get_image_count(const struct wim_xml_info *info)
 u64
 xml_get_total_bytes(const struct wim_xml_info *info)
 {
-       return xml_get_number_by_path(info->root, "TOTALBYTES");
+       return xml_get_number_by_path(info->root, T("TOTALBYTES"));
 }
 
 /* Retrieve the TOTALBYTES value for the specified image, or 0 if this value is
@@ -648,7 +422,7 @@ xml_get_total_bytes(const struct wim_xml_info *info)
 u64
 xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
 {
-       return xml_get_number_by_path(info->images[image - 1], "TOTALBYTES");
+       return xml_get_number_by_path(info->images[image - 1], T("TOTALBYTES"));
 }
 
 /* Retrieve the HARDLINKBYTES value for the specified image, or 0 if this value
@@ -656,7 +430,8 @@ xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
 u64
 xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
 {
-       return xml_get_number_by_path(info->images[image - 1], "HARDLINKBYTES");
+       return xml_get_number_by_path(info->images[image - 1],
+                                     T("HARDLINKBYTES"));
 }
 
 /* Retrieve the WIMBOOT value for the specified image, or false if this value is
@@ -664,7 +439,7 @@ xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
 bool
 xml_get_wimboot(const struct wim_xml_info *info, int image)
 {
-       return xml_get_number_by_path(info->images[image - 1], "WIMBOOT");
+       return xml_get_number_by_path(info->images[image - 1], T("WIMBOOT"));
 }
 
 /* Retrieve the Windows build number for the specified image, or 0 if this
@@ -673,14 +448,15 @@ u64
 xml_get_windows_build_number(const struct wim_xml_info *info, int image)
 {
        return xml_get_number_by_path(info->images[image - 1],
-                                     "WINDOWS/VERSION/BUILD");
+                                     T("WINDOWS/VERSION/BUILD"));
 }
 
 /* Set the WIMBOOT value for the specified image.  */
 int
 xml_set_wimboot(struct wim_xml_info *info, int image)
 {
-       return xml_set_ttext_by_path(info->images[image - 1], "WIMBOOT", T("1"));
+       return xml_set_text_by_path(info->images[image - 1],
+                                   T("WIMBOOT"), T("1"));
 }
 
 /*
@@ -694,18 +470,18 @@ int
 xml_update_image_info(WIMStruct *wim, int image)
 {
        const struct wim_image_metadata *imd = wim->image_metadata[image - 1];
-       xmlNode *image_node = wim->xml_info->images[image - 1];
+       struct xml_node *image_node = wim->xml_info->images[image - 1];
        const struct wim_inode *inode;
        u64 dir_count = 0;
        u64 file_count = 0;
        u64 total_bytes = 0;
        u64 hard_link_bytes = 0;
        u64 size;
-       xmlNode *dircount_node;
-       xmlNode *filecount_node;
-       xmlNode *totalbytes_node;
-       xmlNode *hardlinkbytes_node;
-       xmlNode *lastmodificationtime_node;
+       struct xml_node *dircount_node;
+       struct xml_node *filecount_node;
+       struct xml_node *totalbytes_node;
+       struct xml_node *hardlinkbytes_node;
+       struct xml_node *lastmodificationtime_node;
 
        image_for_each_inode(inode, imd) {
                if (inode_is_directory(inode))
@@ -717,30 +493,32 @@ xml_update_image_info(WIMStruct *wim, int image)
                hard_link_bytes += size * (inode->i_nlink - 1);
        }
 
-       dircount_node = new_element_with_u64(NULL, "DIRCOUNT", dir_count);
-       filecount_node = new_element_with_u64(NULL, "FILECOUNT", file_count);
-       totalbytes_node = new_element_with_u64(NULL, "TOTALBYTES", total_bytes);
-       hardlinkbytes_node = new_element_with_u64(NULL, "HARDLINKBYTES",
-                                                 hard_link_bytes);
-       lastmodificationtime_node =
-               new_element_with_timestamp(NULL, "LASTMODIFICATIONTIME",
-                                          now_as_wim_timestamp());
+       dircount_node = xml_new_element_with_u64(NULL, T("DIRCOUNT"),
+                                                dir_count);
+       filecount_node = xml_new_element_with_u64(NULL, T("FILECOUNT"),
+                                                 file_count);
+       totalbytes_node = xml_new_element_with_u64(NULL, T("TOTALBYTES"),
+                                                  total_bytes);
+       hardlinkbytes_node = xml_new_element_with_u64(NULL, T("HARDLINKBYTES"),
+                                                     hard_link_bytes);
+       lastmodificationtime_node = xml_new_element_with_timestamp(NULL,
+                       T("LASTMODIFICATIONTIME"), now_as_wim_timestamp());
 
        if (unlikely(!dircount_node || !filecount_node || !totalbytes_node ||
                     !hardlinkbytes_node || !lastmodificationtime_node)) {
-               xmlFreeNode(dircount_node);
-               xmlFreeNode(filecount_node);
-               xmlFreeNode(totalbytes_node);
-               xmlFreeNode(hardlinkbytes_node);
-               xmlFreeNode(lastmodificationtime_node);
+               xml_free_node(dircount_node);
+               xml_free_node(filecount_node);
+               xml_free_node(totalbytes_node);
+               xml_free_node(hardlinkbytes_node);
+               xml_free_node(lastmodificationtime_node);
                return WIMLIB_ERR_NOMEM;
        }
 
-       node_replace_child_element(image_node, dircount_node);
-       node_replace_child_element(image_node, filecount_node);
-       node_replace_child_element(image_node, totalbytes_node);
-       node_replace_child_element(image_node, hardlinkbytes_node);
-       node_replace_child_element(image_node, lastmodificationtime_node);
+       xml_replace_child(image_node, dircount_node);
+       xml_replace_child(image_node, filecount_node);
+       xml_replace_child(image_node, totalbytes_node);
+       xml_replace_child(image_node, hardlinkbytes_node);
+       xml_replace_child(image_node, lastmodificationtime_node);
        return 0;
 }
 
@@ -749,31 +527,33 @@ int
 xml_add_image(struct wim_xml_info *info, const tchar *name)
 {
        const u64 now = now_as_wim_timestamp();
-       xmlNode *image_node;
+       struct xml_node *image_node;
        int ret;
 
+       if (name && !xml_legal_value(name)) {
+               ERROR("Name of new image contains illegal characters");
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+
        ret = WIMLIB_ERR_NOMEM;
-       image_node = xmlNewNode(NULL, "IMAGE");
+       image_node = xml_new_element(NULL, T("IMAGE"));
        if (!image_node)
                goto err;
-
-       if (name && *name) {
-               ret = new_element_with_ttext(image_node, "NAME", name, NULL);
-               if (ret)
-                       goto err;
-       }
-       ret = WIMLIB_ERR_NOMEM;
-       if (!new_element_with_u64(image_node, "DIRCOUNT", 0))
+       if (name && *name &&
+           !xml_new_element_with_text(image_node, T("NAME"), name))
+               goto err;
+       if (!xml_new_element_with_u64(image_node, T("DIRCOUNT"), 0))
                goto err;
-       if (!new_element_with_u64(image_node, "FILECOUNT", 0))
+       if (!xml_new_element_with_u64(image_node, T("FILECOUNT"), 0))
                goto err;
-       if (!new_element_with_u64(image_node, "TOTALBYTES", 0))
+       if (!xml_new_element_with_u64(image_node, T("TOTALBYTES"), 0))
                goto err;
-       if (!new_element_with_u64(image_node, "HARDLINKBYTES", 0))
+       if (!xml_new_element_with_u64(image_node, T("HARDLINKBYTES"), 0))
                goto err;
-       if (!new_element_with_timestamp(image_node, "CREATIONTIME", now))
+       if (!xml_new_element_with_timestamp(image_node, T("CREATIONTIME"), now))
                goto err;
-       if (!new_element_with_timestamp(image_node, "LASTMODIFICATIONTIME", now))
+       if (!xml_new_element_with_timestamp(image_node,
+                                           T("LASTMODIFICATIONTIME"), now))
                goto err;
        ret = append_image_node(info, image_node);
        if (ret)
@@ -781,7 +561,7 @@ xml_add_image(struct wim_xml_info *info, const tchar *name)
        return 0;
 
 err:
-       xmlFreeNode(image_node);
+       xml_free_node(image_node);
        return ret;
 }
 
@@ -799,39 +579,46 @@ xml_export_image(const struct wim_xml_info *src_info, int src_image,
                 struct wim_xml_info *dest_info, const tchar *dest_image_name,
                 const tchar *dest_image_description, bool wimboot)
 {
-       xmlNode *dest_node;
+       struct xml_node *dest_node;
        int ret;
 
+       if (dest_image_name && !xml_legal_value(dest_image_name)) {
+               ERROR("Destination image name contains illegal characters");
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+       if (dest_image_description &&
+           !xml_legal_value(dest_image_description)) {
+               ERROR("Destination image description contains illegal characters");
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+
        ret = WIMLIB_ERR_NOMEM;
-       dest_node = xmlDocCopyNode(src_info->images[src_image - 1],
-                                  dest_info->doc, 1);
+       dest_node = xml_clone_tree(src_info->images[src_image - 1]);
        if (!dest_node)
                goto err;
 
-       ret = xml_set_ttext_by_path(dest_node, "NAME", dest_image_name);
+       ret = xml_set_text_by_path(dest_node, T("NAME"), dest_image_name);
        if (ret)
                goto err;
 
-       ret = xml_set_ttext_by_path(dest_node, "DESCRIPTION",
-                                   dest_image_description);
+       ret = xml_set_text_by_path(dest_node, T("DESCRIPTION"),
+                                  dest_image_description);
        if (ret)
                goto err;
 
        if (wimboot) {
-               ret = xml_set_ttext_by_path(dest_node, "WIMBOOT", T("1"));
+               ret = xml_set_text_by_path(dest_node, T("WIMBOOT"), T("1"));
                if (ret)
                        goto err;
        }
 
-       xmlFreeProp(unlink_index_attribute(dest_node));
-
        ret = append_image_node(dest_info, dest_node);
        if (ret)
                goto err;
        return 0;
 
 err:
-       xmlFreeNode(dest_node);
+       xml_free_node(dest_node);
        return ret;
 }
 
@@ -839,8 +626,8 @@ err:
 void
 xml_delete_image(struct wim_xml_info *info, int image)
 {
-       xmlNode *next_image;
-       xmlAttr *index_attr, *next_index_attr;
+       struct xml_node *next_image;
+       struct xml_node *index_attr, *next_index_attr;
 
        /* Free the IMAGE element for the deleted image.  Then, shift all
         * higher-indexed IMAGE elements down by 1, in the process re-assigning
@@ -848,18 +635,18 @@ xml_delete_image(struct wim_xml_info *info, int image)
 
        next_image = info->images[image - 1];
        next_index_attr = unlink_index_attribute(next_image);
-       unlink_and_free_tree(next_image);
+       xml_free_node(next_image);
 
        while (image < info->image_count) {
                index_attr = next_index_attr;
                next_image = info->images[image];
                next_index_attr = unlink_index_attribute(next_image);
-               xmlAddChild(next_image, (xmlNode *)index_attr);
+               xml_add_child(next_image, index_attr);
                info->images[image - 1] = next_image;
                image++;
        }
 
-       xmlFreeProp(next_index_attr);
+       xml_free_node(next_index_attr);
        info->image_count--;
 }
 
@@ -897,80 +684,80 @@ describe_arch(u64 arch)
 
 /* Print information from the WINDOWS element, if present.  */
 static void
-print_windows_info(struct wim_xml_info *info, xmlNode *image_node)
+print_windows_info(struct xml_node *image_node)
 {
-       xmlNode *windows_node;
-       xmlNode *langs_node;
-       xmlNode *version_node;
+       struct xml_node *windows_node;
+       struct xml_node *langs_node;
+       struct xml_node *version_node;
        const tchar *text;
 
-       windows_node = xml_get_node_by_path(image_node, "WINDOWS");
+       windows_node = xml_get_element_by_path(image_node, T("WINDOWS"));
        if (!windows_node)
                return;
 
        tprintf(T("Architecture:           %"TS"\n"),
-               describe_arch(xml_get_number_by_path(windows_node, "ARCH")));
+               describe_arch(xml_get_number_by_path(windows_node, T("ARCH"))));
 
-       text = xml_get_ttext_by_path(info, windows_node, "PRODUCTNAME");
+       text = xml_get_text_by_path(windows_node, T("PRODUCTNAME"));
        if (text)
                tprintf(T("Product Name:           %"TS"\n"), text);
 
-       text = xml_get_ttext_by_path(info, windows_node, "EDITIONID");
+       text = xml_get_text_by_path(windows_node, T("EDITIONID"));
        if (text)
                tprintf(T("Edition ID:             %"TS"\n"), text);
 
-       text = xml_get_ttext_by_path(info, windows_node, "INSTALLATIONTYPE");
+       text = xml_get_text_by_path(windows_node, T("INSTALLATIONTYPE"));
        if (text)
                tprintf(T("Installation Type:      %"TS"\n"), text);
 
-       text = xml_get_ttext_by_path(info, windows_node, "HAL");
+       text = xml_get_text_by_path(windows_node, T("HAL"));
        if (text)
                tprintf(T("HAL:                    %"TS"\n"), text);
 
-       text = xml_get_ttext_by_path(info, windows_node, "PRODUCTTYPE");
+       text = xml_get_text_by_path(windows_node, T("PRODUCTTYPE"));
        if (text)
                tprintf(T("Product Type:           %"TS"\n"), text);
 
-       text = xml_get_ttext_by_path(info, windows_node, "PRODUCTSUITE");
+       text = xml_get_text_by_path(windows_node, T("PRODUCTSUITE"));
        if (text)
                tprintf(T("Product Suite:          %"TS"\n"), text);
 
-       langs_node = xml_get_node_by_path(windows_node, "LANGUAGES");
+       langs_node = xml_get_element_by_path(windows_node, T("LANGUAGES"));
        if (langs_node) {
-               xmlNode *lang_node;
+               struct xml_node *lang_node;
 
                tprintf(T("Languages:              "));
-               node_for_each_child(langs_node, lang_node) {
-                       if (!node_is_element(lang_node, "LANGUAGE"))
+               xml_node_for_each_child(langs_node, lang_node) {
+                       if (!xml_node_is_element(lang_node, T("LANGUAGE")))
                                continue;
-                       text = node_get_ttext(info, lang_node);
+                       text = xml_element_get_text(lang_node);
                        if (!text)
                                continue;
                        tprintf(T("%"TS" "), text);
                }
                tputchar(T('\n'));
 
-               text = xml_get_ttext_by_path(info, langs_node, "DEFAULT");
+               text = xml_get_text_by_path(langs_node, T("DEFAULT"));
                if (text)
                        tprintf(T("Default Language:       %"TS"\n"), text);
        }
 
-       text = xml_get_ttext_by_path(info, windows_node, "SYSTEMROOT");
+       text = xml_get_text_by_path(windows_node, T("SYSTEMROOT"));
        if (text)
                tprintf(T("System Root:            %"TS"\n"), text);
 
-       version_node = xml_get_node_by_path(windows_node, "VERSION");
+       version_node = xml_get_element_by_path(windows_node, T("VERSION"));
        if (version_node) {
                tprintf(T("Major Version:          %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "MAJOR"));
+                       xml_get_number_by_path(version_node, T("MAJOR")));
                tprintf(T("Minor Version:          %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "MINOR"));
+                       xml_get_number_by_path(version_node, T("MINOR")));
                tprintf(T("Build:                  %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "BUILD"));
+                       xml_get_number_by_path(version_node, T("BUILD")));
                tprintf(T("Service Pack Build:     %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "SPBUILD"));
+                       xml_get_number_by_path(version_node, T("SPBUILD")));
                tprintf(T("Service Pack Level:     %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "SPLEVEL"));
+                       xml_get_number_by_path(version_node, T("SPLEVEL")));
        }
 }
 
@@ -978,7 +765,7 @@ print_windows_info(struct wim_xml_info *info, xmlNode *image_node)
 void
 xml_print_image_info(struct wim_xml_info *info, int image)
 {
-       xmlNode * const image_node = info->images[image - 1];
+       struct xml_node * const image_node = info->images[image - 1];
        const tchar *text;
        tchar timebuf[64];
 
@@ -986,49 +773,49 @@ xml_print_image_info(struct wim_xml_info *info, int image)
 
        /* Always print the Name and Description, even if the corresponding XML
         * elements are not present.  */
-       text = xml_get_ttext_by_path(info, image_node, "NAME");
+       text = xml_get_text_by_path(image_node, T("NAME"));
        tprintf(T("Name:                   %"TS"\n"), text ? text : T(""));
-       text = xml_get_ttext_by_path(info, image_node, "DESCRIPTION");
+       text = xml_get_text_by_path(image_node, T("DESCRIPTION"));
        tprintf(T("Description:            %"TS"\n"), text ? text : T(""));
 
-       text = xml_get_ttext_by_path(info, image_node, "DISPLAYNAME");
+       text = xml_get_text_by_path(image_node, T("DISPLAYNAME"));
        if (text)
                tprintf(T("Display Name:           %"TS"\n"), text);
 
-       text = xml_get_ttext_by_path(info, image_node, "DISPLAYDESCRIPTION");
+       text = xml_get_text_by_path(image_node, T("DISPLAYDESCRIPTION"));
        if (text)
                tprintf(T("Display Description:    %"TS"\n"), text);
 
        tprintf(T("Directory Count:        %"PRIu64"\n"),
-               xml_get_number_by_path(image_node, "DIRCOUNT"));
+               xml_get_number_by_path(image_node, T("DIRCOUNT")));
 
        tprintf(T("File Count:             %"PRIu64"\n"),
-               xml_get_number_by_path(image_node, "FILECOUNT"));
+               xml_get_number_by_path(image_node, T("FILECOUNT")));
 
        tprintf(T("Total Bytes:            %"PRIu64"\n"),
-               xml_get_number_by_path(image_node, "TOTALBYTES"));
+               xml_get_number_by_path(image_node, T("TOTALBYTES")));
 
        tprintf(T("Hard Link Bytes:        %"PRIu64"\n"),
-               xml_get_number_by_path(image_node, "HARDLINKBYTES"));
+               xml_get_number_by_path(image_node, T("HARDLINKBYTES")));
 
        wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
-                                                      "CREATIONTIME"),
+                                                      T("CREATIONTIME")),
                             timebuf, ARRAY_LEN(timebuf));
        tprintf(T("Creation Time:          %"TS"\n"), timebuf);
 
        wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
-                                                      "LASTMODIFICATIONTIME"),
-                            timebuf, ARRAY_LEN(timebuf));
+                                       T("LASTMODIFICATIONTIME")),
+                                       timebuf, ARRAY_LEN(timebuf));
        tprintf(T("Last Modification Time: %"TS"\n"), timebuf);
 
-       print_windows_info(info, image_node);
+       print_windows_info(image_node);
 
-       text = xml_get_ttext_by_path(info, image_node, "FLAGS");
+       text = xml_get_text_by_path(image_node, T("FLAGS"));
        if (text)
                tprintf(T("Flags:                  %"TS"\n"), text);
 
        tprintf(T("WIMBoot compatible:     %"TS"\n"),
-               xml_get_number_by_path(image_node, "WIMBOOT") ?
+               xml_get_number_by_path(image_node, T("WIMBOOT")) ?
                        T("yes") : T("no"));
 
        tputchar('\n');
@@ -1039,28 +826,28 @@ xml_print_image_info(struct wim_xml_info *info, int image)
  *----------------------------------------------------------------------------*/
 
 static int
-image_node_get_index(xmlNode *node)
+image_element_get_index(struct xml_node *element)
 {
-       u64 v = node_get_number((const xmlNode *)xmlHasProp(node, "INDEX"), 10);
-       return min(v, INT_MAX);
+       struct xml_node *attrib = xml_get_attrib(element, T("INDEX"));
+
+       if (!attrib)
+               return 0;
+       return min(INT_MAX, parse_number(attrib->value, 10));
 }
 
 /* Prepare the 'images' array from the XML document tree.  */
 static int
-setup_images(struct wim_xml_info *info, xmlNode *root)
+setup_images(struct wim_xml_info *info, struct xml_node *root)
 {
-       xmlNode *child;
+       struct xml_node *child;
        int index;
        int max_index = 0;
        int ret;
 
-       info->images = NULL;
-       info->image_count = 0;
-
-       node_for_each_child(root, child) {
-               if (!node_is_element(child, "IMAGE"))
+       xml_node_for_each_child(root, child) {
+               if (!xml_node_is_element(child, T("IMAGE")))
                        continue;
-               index = image_node_get_index(child);
+               index = image_element_get_index(child);
                if (unlikely(index < 1 || info->image_count >= MAX_IMAGES))
                        goto err_indices;
                max_index = max(max_index, index);
@@ -1072,10 +859,10 @@ setup_images(struct wim_xml_info *info, xmlNode *root)
        info->images = CALLOC(info->image_count, sizeof(info->images[0]));
        if (unlikely(!info->images))
                goto err;
-       node_for_each_child(root, child) {
-               if (!node_is_element(child, "IMAGE"))
+       xml_node_for_each_child(root, child) {
+               if (!xml_node_is_element(child, T("IMAGE")))
                        continue;
-               index = image_node_get_index(child);
+               index = image_element_get_index(child);
                if (unlikely(info->images[index - 1]))
                        goto err_indices;
                info->images[index - 1] = child;
@@ -1091,97 +878,108 @@ err:
        return ret;
 }
 
+static int
+parse_wim_xml_document(const utf16lechar *raw_doc, size_t raw_doc_size,
+                      struct xml_node **root_ret)
+{
+       tchar *doc;
+       int ret;
+
+       ret = utf16le_to_tstr(raw_doc, raw_doc_size, &doc, NULL);
+       if (ret)
+               return ret;
+       ret = xml_parse_document(doc, root_ret);
+       FREE(doc);
+       return ret;
+}
+
 /* Reads the XML data from a WIM file.  */
 int
 read_wim_xml_data(WIMStruct *wim)
 {
        struct wim_xml_info *info;
-       void *buf;
-       size_t bufsize;
-       xmlDoc *doc;
-       xmlNode *root;
+       void *raw_doc;
+       size_t raw_doc_size;
+       struct xml_node *root;
        int ret;
 
        /* Allocate the 'struct wim_xml_info'.  */
        ret = WIMLIB_ERR_NOMEM;
-       info = alloc_wim_xml_info();
+       info = CALLOC(1, sizeof(*info));
        if (!info)
                goto err;
 
-       /* Read the raw UTF-16LE bytes.  */
-       ret = wimlib_get_xml_data(wim, &buf, &bufsize);
+       /* Read the raw UTF-16LE XML document.  */
+       ret = wimlib_get_xml_data(wim, &raw_doc, &raw_doc_size);
        if (ret)
-               goto err_free_info;
+               goto err;
 
-       /* Parse the document with libxml2, creating the document tree.  */
-       doc = xmlReadMemory(buf, bufsize, NULL, "UTF-16LE", XML_PARSE_NONET);
-       FREE(buf);
-       buf = NULL;
-       if (!doc) {
+       /* Parse the document, creating the document tree.  */
+       ret = parse_wim_xml_document(raw_doc, raw_doc_size, &info->root);
+       FREE(raw_doc);
+       raw_doc = NULL;
+       if (ret) {
+               if (ret != WIMLIB_ERR_NOMEM)
+                       ret = WIMLIB_ERR_XML;
                ERROR("Unable to parse the WIM file's XML document!");
-               ret = WIMLIB_ERR_XML;
-               goto err_free_info;
+               goto err;
        }
+       root = info->root;
 
        /* Verify the root element.  */
-       root = xmlDocGetRootElement(doc);
-       if (!node_is_element(root, "WIM")) {
+       if (!xml_node_is_element(root, T("WIM"))) {
                ERROR("The WIM file's XML document has an unexpected format!");
                ret = WIMLIB_ERR_XML;
-               goto err_free_doc;
+               goto err;
        }
 
        /* Verify the WIM file is not encrypted.  */
-       if (xml_get_node_by_path(root, "ESD/ENCRYPTED")) {
+       if (xml_get_element_by_path(root, T("ESD/ENCRYPTED"))) {
                ret = WIMLIB_ERR_WIM_IS_ENCRYPTED;
-               goto err_free_doc;
+               goto err;
        }
 
        /* Validate the image elements and set up the images[] array.  */
        ret = setup_images(info, root);
        if (ret)
-               goto err_free_doc;
+               goto err;
 
-       /* Save the document and return.  */
-       info->doc = doc;
-       info->root = root;
+       /* Success!  */
        wim->xml_info = info;
        return 0;
 
-err_free_doc:
-       xmlFreeDoc(doc);
-err_free_info:
-       FREE(info);
 err:
+       xml_free_info_struct(info);
        return ret;
 }
 
 /* Swap the INDEX attributes of two IMAGE elements.  */
 static void
-swap_index_attributes(xmlNode *image_node_1, xmlNode *image_node_2)
+swap_index_attributes(struct xml_node *image_element_1,
+                     struct xml_node *image_element_2)
 {
-       xmlAttr *attr_1, *attr_2;
+       struct xml_node *attr_1, *attr_2;
 
-       if (image_node_1 != image_node_2) {
-               attr_1 = unlink_index_attribute(image_node_1);
-               attr_2 = unlink_index_attribute(image_node_2);
-               xmlAddChild(image_node_1, (xmlNode *)attr_2);
-               xmlAddChild(image_node_2, (xmlNode *)attr_1);
+       if (image_element_1 != image_element_2) {
+               attr_1 = unlink_index_attribute(image_element_1);
+               attr_2 = unlink_index_attribute(image_element_2);
+               xml_add_child(image_element_1, attr_2);
+               xml_add_child(image_element_2, attr_1);
        }
 }
 
 static int
 prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes,
-                          xmlNode **orig_totalbytes_node_ret)
+                          struct xml_node **orig_totalbytes_element_ret)
 {
-       xmlNode *totalbytes_node = NULL;
+       struct xml_node *totalbytes_element = NULL;
 
        /* Allocate the new TOTALBYTES element if needed.  */
        if (total_bytes != WIM_TOTALBYTES_USE_EXISTING &&
            total_bytes != WIM_TOTALBYTES_OMIT) {
-               totalbytes_node = new_element_with_u64(NULL, "TOTALBYTES",
-                                                      total_bytes);
-               if (!totalbytes_node)
+               totalbytes_element = xml_new_element_with_u64(
+                                       NULL, T("TOTALBYTES"), total_bytes);
+               if (!totalbytes_element)
                        return WIMLIB_ERR_NOMEM;
        }
 
@@ -1191,7 +989,7 @@ prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes
                 * other IMAGE elements from the document.  */
                for (int i = 0; i < info->image_count; i++)
                        if (i + 1 != image)
-                               xmlUnlinkNode(info->images[i]);
+                               xml_unlink_node(info->images[i]);
 
                /* Temporarily set the INDEX attribute of the needed IMAGE
                 * element to 1.  */
@@ -1199,24 +997,24 @@ prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes
        }
 
        /* Adjust (add, change, or remove) the TOTALBYTES element if needed.  */
-       *orig_totalbytes_node_ret = NULL;
+       *orig_totalbytes_element_ret = NULL;
        if (total_bytes != WIM_TOTALBYTES_USE_EXISTING) {
                /* Unlink the previous TOTALBYTES element, if any.  */
-               *orig_totalbytes_node_ret = xml_get_node_by_path(info->root,
-                                                                "TOTALBYTES");
-               if (*orig_totalbytes_node_ret)
-                       xmlUnlinkNode(*orig_totalbytes_node_ret);
+               *orig_totalbytes_element_ret = xml_get_element_by_path(
+                                               info->root, T("TOTALBYTES"));
+               if (*orig_totalbytes_element_ret)
+                       xml_unlink_node(*orig_totalbytes_element_ret);
 
                /* Link in the new TOTALBYTES element, if any.  */
-               if (totalbytes_node)
-                       xmlAddChild(info->root, totalbytes_node);
+               if (totalbytes_element)
+                       xml_add_child(info->root, totalbytes_element);
        }
        return 0;
 }
 
 static void
 restore_document_after_write(struct wim_xml_info *info, int image,
-                            xmlNode *orig_totalbytes_node)
+                            struct xml_node *orig_totalbytes_element)
 {
        /* Restore the IMAGE elements if needed.  */
        if (image != WIMLIB_ALL_IMAGES) {
@@ -1224,15 +1022,15 @@ restore_document_after_write(struct wim_xml_info *info, int image,
                 * elements to the document.  */
                for (int i = 0; i < info->image_count; i++)
                        if (i + 1 != image)
-                               xmlAddChild(info->root, info->images[i]);
+                               xml_add_child(info->root, info->images[i]);
 
                /* Restore the original INDEX attributes.  */
                swap_index_attributes(info->images[0], info->images[image - 1]);
        }
 
        /* Restore the original TOTALBYTES element if needed.  */
-       if (orig_totalbytes_node)
-               node_replace_child_element(info->root, orig_totalbytes_node);
+       if (orig_totalbytes_element)
+               xml_replace_child(info->root, orig_totalbytes_element);
 }
 
 /*
@@ -1250,45 +1048,29 @@ write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
                   struct wim_reshdr *out_reshdr, int write_resource_flags)
 {
        struct wim_xml_info *info = wim->xml_info;
-       long ret;
-       long ret2;
-       xmlBuffer *buffer;
-       xmlNode *orig_totalbytes_node;
-       xmlSaveCtxt *save_ctx;
+       int ret;
+       struct xml_node *orig_totalbytes_element;
+       struct xml_out_buf buf = {};
+       const utf16lechar *raw_doc;
+       size_t raw_doc_size;
 
        /* Make any needed temporary changes to the document.  */
        ret = prepare_document_for_write(info, image, total_bytes,
-                                        &orig_totalbytes_node);
+                                        &orig_totalbytes_element);
        if (ret)
                goto out;
 
-       /* Create an in-memory buffer to hold the encoded document.  */
-       ret = WIMLIB_ERR_NOMEM;
-       buffer = xmlBufferCreate();
-       if (!buffer)
+       ret = xml_write_document(info->root, &buf);
+       if (ret)
                goto out_restore_document;
 
-       /* Encode the document in UTF-16LE, with a byte order mark, and with no
-        * XML declaration.  Some other WIM software requires all of these
-        * characteristics.  */
-       ret = WIMLIB_ERR_NOMEM;
-       if (xmlBufferCat(buffer, "\xff\xfe"))
-               goto out_free_buffer;
-       save_ctx = xmlSaveToBuffer(buffer, "UTF-16LE", XML_SAVE_NO_DECL);
-       if (!save_ctx)
-               goto out_free_buffer;
-       ret = xmlSaveDoc(save_ctx, info->doc);
-       ret2 = xmlSaveClose(save_ctx);
-       if (ret < 0 || ret2 < 0) {
-               ERROR("Unable to serialize the WIM file's XML document!");
-               ret = WIMLIB_ERR_NOMEM;
-               goto out_free_buffer;
-       }
+       ret = tstr_get_utf16le_and_len(buf.buf, &raw_doc, &raw_doc_size);
+       if (ret)
+               goto out_restore_document;
 
        /* Write the XML data uncompressed.  Although wimlib can handle
         * compressed XML data, some other WIM software cannot.  */
-       ret = write_wim_resource_from_buffer(xmlBufferContent(buffer),
-                                            xmlBufferLength(buffer),
+       ret = write_wim_resource_from_buffer(raw_doc, raw_doc_size,
                                             true,
                                             &wim->out_fd,
                                             WIMLIB_COMPRESSION_TYPE_NONE,
@@ -1296,39 +1078,15 @@ write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
                                             out_reshdr,
                                             NULL,
                                             write_resource_flags);
-out_free_buffer:
-       xmlBufferFree(buffer);
+       tstr_put_utf16le(raw_doc);
 out_restore_document:
        /* Revert any temporary changes we made to the document.  */
-       restore_document_after_write(info, image, orig_totalbytes_node);
+       restore_document_after_write(info, image, orig_totalbytes_element);
+       FREE(buf.buf);
 out:
        return ret;
 }
 
-/*----------------------------------------------------------------------------*
- *                           Global setup functions                           *
- *----------------------------------------------------------------------------*/
-
-void
-xml_global_init(void)
-{
-       xmlInitParser();
-}
-
-void
-xml_global_cleanup(void)
-{
-       xmlCleanupParser();
-}
-
-void
-xml_set_memory_allocator(void *(*malloc_func)(size_t),
-                        void (*free_func)(void *),
-                        void *(*realloc_func)(void *, size_t))
-{
-       xmlMemSetup(free_func, malloc_func, realloc_func, wimlib_strdup);
-}
-
 /*----------------------------------------------------------------------------*
  *                           Library API functions                            *
  *----------------------------------------------------------------------------*/
@@ -1373,24 +1131,22 @@ static bool
 image_name_in_use(const WIMStruct *wim, const tchar *name, int excluded_image)
 {
        const struct wim_xml_info *info = wim->xml_info;
-       const xmlChar *name_utf8;
-       bool found = false;
+       const tchar *existing_name;
 
        /* Any number of images can have "no name".  */
        if (!name || !*name)
                return false;
 
        /* Check for images that have the specified name.  */
-       if (tstr_get_utf8(name, &name_utf8))
-               return false;
-       for (int i = 0; i < info->image_count && !found; i++) {
+       for (int i = 0; i < info->image_count; i++) {
                if (i + 1 == excluded_image)
                        continue;
-               found = xmlStrEqual(name_utf8, xml_get_text_by_path(
-                                                   info->images[i], "NAME"));
+               existing_name = xml_get_text_by_path(info->images[i],
+                                                    T("NAME"));
+               if (existing_name && !tstrcmp(existing_name, name))
+                       return true;
        }
-       tstr_put_utf8(name_utf8);
-       return found;
+       return false;
 }
 
 WIMLIBAPI bool
@@ -1421,19 +1177,13 @@ WIMLIBAPI const tchar *
 wimlib_get_image_property(const WIMStruct *wim, int image,
                          const tchar *property_name)
 {
-       const xmlChar *name;
-       const tchar *value;
-       struct wim_xml_info *info = wim->xml_info;
+       const struct wim_xml_info *info = wim->xml_info;
 
        if (!property_name || !*property_name)
                return NULL;
        if (image < 1 || image > info->image_count)
                return NULL;
-       if (tstr_get_utf8(property_name, &name))
-               return NULL;
-       value = xml_get_ttext_by_path(info, info->images[image - 1], name);
-       tstr_put_utf8(name);
-       return value;
+       return xml_get_text_by_path(info->images[image - 1], property_name);
 }
 
 WIMLIBAPI int
@@ -1445,7 +1195,8 @@ wimlib_set_image_name(WIMStruct *wim, int image, const tchar *name)
 WIMLIBAPI int
 wimlib_set_image_descripton(WIMStruct *wim, int image, const tchar *description)
 {
-       return wimlib_set_image_property(wim, image, T("DESCRIPTION"), description);
+       return wimlib_set_image_property(wim, image, T("DESCRIPTION"),
+                                        description);
 }
 
 WIMLIBAPI int
@@ -1458,13 +1209,22 @@ WIMLIBAPI int
 wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
                          const tchar *property_value)
 {
-       const xmlChar *name;
        struct wim_xml_info *info = wim->xml_info;
-       int ret;
 
        if (!property_name || !*property_name)
                return WIMLIB_ERR_INVALID_PARAM;
 
+       if (!xml_legal_name(property_name)) {
+               ERROR("Property name '%"TS"' is illegal in XML", property_name);
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+
+       if (property_value && !xml_legal_value(property_value)) {
+               WARNING("Value of property '%"TS"' contains illegal characters",
+                       property_name);
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+
        if (image < 1 || image > info->image_count)
                return WIMLIB_ERR_INVALID_IMAGE;
 
@@ -1472,10 +1232,6 @@ wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
            image_name_in_use(wim, property_value, image))
                return WIMLIB_ERR_IMAGE_NAME_COLLISION;
 
-       ret = tstr_get_utf8(property_name, &name);
-       if (ret)
-               return ret;
-       ret = xml_set_ttext_by_path(info->images[image - 1], name, property_value);
-       tstr_put_utf8(name);
-       return ret;
+       return xml_set_text_by_path(info->images[image - 1], property_name,
+                                   property_value);
 }
diff --git a/src/xmlproc.c b/src/xmlproc.c
new file mode 100644 (file)
index 0000000..529fd69
--- /dev/null
@@ -0,0 +1,766 @@
+/*
+ * xmlproc.c
+ *
+ * A simple XML 1.0 processor.  This handles all XML features that are used in
+ * WIM files, plus a bit more for futureproofing.  It omits problematic
+ * features, such as expansion of entities other than simple escape sequences.
+ */
+
+/*
+ * Copyright 2023 Eric Biggers
+ *
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <string.h>
+
+#include "wimlib/error.h"
+#include "wimlib/test_support.h"
+#include "wimlib/util.h"
+#include "wimlib/xmlproc.h"
+
+/*----------------------------------------------------------------------------*
+ *                         XML node utility functions                         *
+ *----------------------------------------------------------------------------*/
+
+static tchar *
+tstrdupz(const tchar *str, size_t len)
+{
+       tchar *new_str = CALLOC(len + 1, sizeof(str[0]));
+
+       if (new_str)
+               tmemcpy(new_str, str, len);
+       return new_str;
+}
+
+static struct xml_node *
+xml_new_node(struct xml_node *parent, enum xml_node_type type,
+            const tchar *name, size_t name_len,
+            const tchar *value, size_t value_len)
+{
+       struct xml_node *node = CALLOC(1, sizeof(*node));
+
+       if (!node)
+               return NULL;
+       node->type = type;
+       INIT_LIST_HEAD(&node->children);
+       if (name) {
+               node->name = tstrdupz(name, name_len);
+               if (!node->name)
+                       goto oom;
+       }
+       if (value) {
+               node->value = tstrdupz(value, value_len);
+               if (!node->value)
+                       goto oom;
+       }
+       if (parent)
+               xml_add_child(parent, node);
+       return node;
+
+oom:
+       xml_free_node(node);
+       return NULL;
+}
+
+/*
+ * Create a new ELEMENT node, and if @parent is non-NULL add the new node under
+ * @parent which should be another ELEMENT.
+ */
+struct xml_node *
+xml_new_element(struct xml_node *parent, const tchar *name)
+{
+       return xml_new_node(parent, XML_ELEMENT_NODE, name, tstrlen(name),
+                           NULL, 0);
+}
+
+/*
+ * Create a new ELEMENT node with an attached TEXT node, and if @parent is
+ * non-NULL add the new ELEMENT under @parent which should be another ELEMENT.
+ */
+struct xml_node *
+xml_new_element_with_text(struct xml_node *parent, const tchar *name,
+                         const tchar *text)
+{
+       struct xml_node *element = xml_new_element(parent, name);
+
+       if (element && xml_element_set_text(element, text) != 0) {
+               xml_free_node(element);
+               return NULL;
+       }
+       return element;
+}
+
+/* Append @child to the children list of @parent. */
+void
+xml_add_child(struct xml_node *parent, struct xml_node *child)
+{
+       xml_unlink_node(child); /* Shouldn't be needed, but be safe. */
+       child->parent = parent;
+       list_add_tail(&child->sibling_link, &parent->children);
+}
+
+/* Unlink @node from its parent, if it has one. */
+void
+xml_unlink_node(struct xml_node *node)
+{
+       if (node->parent) {
+               list_del(&node->sibling_link);
+               node->parent = NULL;
+       }
+}
+
+static void
+xml_free_children(struct xml_node *parent)
+{
+       struct xml_node *child, *tmp;
+
+       list_for_each_entry_safe(child, tmp, &parent->children, sibling_link)
+               xml_free_node(child);
+}
+
+/* Recursively free @node, first unlinking it if needed.  @node may be NULL. */
+void
+xml_free_node(struct xml_node *node)
+{
+       if (node) {
+               xml_unlink_node(node);
+               xml_free_children(node);
+               FREE(node->name);
+               FREE(node->value);
+               FREE(node);
+       }
+}
+
+/*
+ * Return the text from the first TEXT child node of @element, or NULL if no
+ * such node exists.  @element may be NULL.
+ */
+const tchar *
+xml_element_get_text(const struct xml_node *element)
+{
+       const struct xml_node *child;
+
+       xml_node_for_each_child(element, child)
+               if (child->type == XML_TEXT_NODE)
+                       return child->value;
+       return NULL;
+}
+
+/*
+ * Set the contents of the given @element to the given @text, replacing the
+ * entire existing contents if any.
+ */
+int
+xml_element_set_text(struct xml_node *element, const tchar *text)
+{
+       struct xml_node *text_node = xml_new_node(NULL, XML_TEXT_NODE, NULL, 0,
+                                                 text, tstrlen(text));
+       if (!text_node)
+               return WIMLIB_ERR_NOMEM;
+       xml_free_children(element);
+       xml_add_child(element, text_node);
+       return 0;
+}
+
+static int
+xml_element_append_text(struct xml_node *element,
+                       const tchar *text, size_t text_len)
+{
+       struct xml_node *last_child;
+
+       if (!list_empty(&element->children) &&
+           (last_child =
+            list_last_entry(&element->children, struct xml_node,
+                            sibling_link))->type == XML_TEXT_NODE) {
+               /*
+                * The new TEXT would directly follow another TEXT, so simplify
+                * the tree by just appending to the existing TEXT.  (This case
+                * can theoretically be reached via the use of CDATA...)
+                */
+               size_t old_len = tstrlen(last_child->value);
+               tchar *new_value = CALLOC(old_len + text_len + 1,
+                                         sizeof(new_value[0]));
+               if (!new_value)
+                       return WIMLIB_ERR_NOMEM;
+               tmemcpy(new_value, last_child->value, old_len);
+               tmemcpy(&new_value[old_len], text, text_len);
+               FREE(last_child->value);
+               last_child->value = new_value;
+               return 0;
+       }
+       if (!xml_new_node(element, XML_TEXT_NODE, NULL, 0, text, text_len))
+               return WIMLIB_ERR_NOMEM;
+       return 0;
+}
+
+/* Find the attribute with the given @name on @element. */
+struct xml_node *
+xml_get_attrib(const struct xml_node *element, const tchar *name)
+{
+       struct xml_node *child;
+
+       xml_node_for_each_child(element, child) {
+               if (child->type == XML_ATTRIBUTE_NODE &&
+                   !tstrcmp(child->name, name))
+                       return child;
+       }
+       return NULL;
+}
+
+/* Set the attribute @name=@value on the given @element. */
+int
+xml_set_attrib(struct xml_node *element, const tchar *name, const tchar *value)
+{
+       struct xml_node *attrib = xml_new_node(NULL, XML_ATTRIBUTE_NODE,
+                                              name, tstrlen(name),
+                                              value, tstrlen(value));
+       if (!attrib)
+               return WIMLIB_ERR_NOMEM;
+       xml_replace_child(element, attrib);
+       return 0;
+}
+
+/*
+ * Add the ELEMENT or ATTRIBUTE node @replacement under the ELEMENT @parent,
+ * replacing any node with the same type and name that already exists.
+ */
+void
+xml_replace_child(struct xml_node *parent, struct xml_node *replacement)
+{
+       struct xml_node *child;
+
+       xml_unlink_node(replacement); /* Shouldn't be needed, but be safe. */
+
+       xml_node_for_each_child(parent, child) {
+               if (child->type == replacement->type &&
+                   !tstrcmp(child->name, replacement->name)) {
+                       list_replace(&child->sibling_link,
+                                    &replacement->sibling_link);
+                       replacement->parent = parent;
+                       child->parent = NULL;
+                       xml_free_node(child);
+                       return;
+               }
+       }
+       xml_add_child(parent, replacement);
+}
+
+struct xml_node *
+xml_clone_tree(struct xml_node *orig)
+{
+       struct xml_node *clone, *orig_child, *clone_child;
+
+       clone = xml_new_node(NULL, orig->type,
+                       orig->name, orig->name ? tstrlen(orig->name) : 0,
+                       orig->value, orig->value ? tstrlen(orig->value) : 0);
+       if (!clone)
+               return NULL;
+       xml_node_for_each_child(orig, orig_child) {
+               clone_child = xml_clone_tree(orig_child);
+               if (!clone_child)
+                       goto oom;
+               xml_add_child(clone, clone_child);
+       }
+       return clone;
+
+oom:
+       xml_free_node(clone);
+       return NULL;
+}
+
+/*----------------------------------------------------------------------------*
+ *                           XML string validation                            *
+ *----------------------------------------------------------------------------*/
+
+/*
+ * Functions that check for legal names and values in XML 1.0.  These are
+ * currently slightly over-lenient, as they allow everything non-ASCII.  These
+ * are also not currently used by the XML parser to reject non-well-formed
+ * documents, but rather just by the user of the XML processor (xml.c) in order
+ * to avoid introducing illegal names and values into the document.
+ */
+
+static inline bool
+is_whitespace(tchar c)
+{
+       return c == ' ' || c == '\n' || c == '\r' || c == '\t';
+}
+
+static inline bool
+is_name_start_char(tchar c)
+{
+       return (c & 0x7f) != c /* overly lenient for now */ ||
+               (c >= 'A' && c <= 'Z') ||
+               (c >= 'a' && c <= 'z') ||
+               c == ':' || c == '_';
+}
+
+static inline bool
+is_name_char(tchar c)
+{
+       return is_name_start_char(c) ||
+               (c >= '0' && c <= '9') || c == '-' || c == '.';
+}
+
+bool
+xml_legal_name(const tchar *p)
+{
+       if (!is_name_start_char(*p))
+               return false;
+       for (p = p + 1; *p; p++) {
+               if (!is_name_char(*p))
+                       return false;
+       }
+       return true;
+}
+
+bool
+xml_legal_value(const tchar *p)
+{
+       for (; *p; p++) {
+               if (*p < 0x20 && !is_whitespace(*p))
+                       return false;
+       }
+       return true;
+}
+
+#if TCHAR_IS_UTF16LE
+#define BYTE_ORDER_MARK        (tchar[]){ 0xfeff, 0 }
+#else
+#define BYTE_ORDER_MARK        "\xEF\xBB\xBF"
+#endif
+
+/*----------------------------------------------------------------------------*
+ *                               XML parsing                                  *
+ *----------------------------------------------------------------------------*/
+
+#define CHECK(cond)    if (!(cond)) goto bad
+
+static inline void
+skip_whitespace(const tchar **pp)
+{
+       const tchar *p = *pp;
+
+       while (is_whitespace(*p))
+               p++;
+       *pp = p;
+}
+
+static inline bool
+skip_string(const tchar **pp, const tchar *str)
+{
+       const tchar *p = *pp;
+       size_t len = tstrlen(str);
+
+       if (tstrncmp(p, str, len))
+               return false;
+       *pp = p + len;
+       return true;
+}
+
+static inline bool
+find_and_skip(const tchar **pp, const tchar *str)
+{
+       const tchar *p = *pp;
+
+       p = tstrstr(p, str);
+       if (!p)
+               return false;
+       *pp = p + tstrlen(str);
+       return true;
+}
+
+static bool
+skip_misc(const tchar **pp)
+{
+       const tchar *p = *pp, *prev_p;
+
+       do {
+               prev_p = p;
+               skip_whitespace(&p);
+               /* Discard XML declaration and top-level PIs for now. */
+               if (skip_string(&p, T("<?")) && !find_and_skip(&p, T("?>")))
+                       return false;
+               /* Discard DOCTYPE declaration for now. */
+               if (skip_string(&p, T("<!DOCTYPE")) && !find_and_skip(&p, T(">")))
+                       return false;
+               /* Discard top-level comments for now. */
+               if (skip_string(&p, T("<!--")) && !find_and_skip(&p, T("-->")))
+                       return false;
+       } while (p != prev_p);
+       *pp = p;
+       return true;
+}
+
+static inline const tchar *
+get_escape_seq(tchar c)
+{
+       switch (c) {
+       case '<':
+               return T("&lt;");
+       case '>':
+               return T("&gt;");
+       case '&':
+               return T("&amp;");
+       case '\'':
+               return T("&apos;");
+       case '"':
+               return T("&quot;");
+       }
+       return NULL;
+}
+
+/* Note: 'str' must be NUL-terminated, but only 'len' chars are used. */
+static int
+unescape_string(const tchar *str, size_t len, tchar **unescaped_ret)
+{
+       const tchar *in_p = str;
+       tchar *unescaped, *out_p;
+
+       unescaped = CALLOC(len + 1, sizeof(str[0]));
+       if (!unescaped)
+               return WIMLIB_ERR_NOMEM;
+       out_p = unescaped;
+       while (in_p < &str[len]) {
+               if (*in_p != '&')
+                       *out_p++ = *in_p++;
+               else if (skip_string(&in_p, T("&lt;")))
+                       *out_p++ = '<';
+               else if (skip_string(&in_p, T("&gt;")))
+                       *out_p++ = '>';
+               else if (skip_string(&in_p, T("&amp;")))
+                       *out_p++ = '&';
+               else if (skip_string(&in_p, T("&apos;")))
+                       *out_p++ = '\'';
+               else if (skip_string(&in_p, T("&quot;")))
+                       *out_p++ = '"';
+               else
+                       goto bad;
+       }
+       if (in_p > &str[len])
+               goto bad;
+       *unescaped_ret = unescaped;
+       return 0;
+
+bad:
+       ERROR("Error unescaping string '%.*"TS"'", (int)len, str);
+       FREE(unescaped);
+       return WIMLIB_ERR_XML;
+}
+
+static int
+parse_element(const tchar **pp, struct xml_node *parent, int depth,
+             struct xml_node **node_ret);
+
+static int
+parse_contents(const tchar **pp, struct xml_node *element, int depth)
+{
+       const tchar *p = *pp;
+       int ret;
+
+       for (;;) {
+               const tchar *raw_text = p;
+               tchar *text;
+
+               for (; *p != '<'; p++) {
+                       if (*p == '\0')
+                               return WIMLIB_ERR_XML;
+               }
+               if (p > raw_text) {
+                       ret = unescape_string(raw_text, p - raw_text, &text);
+                       if (ret)
+                               return ret;
+                       ret = xml_element_append_text(element, text,
+                                                     tstrlen(text));
+                       FREE(text);
+                       if (ret)
+                               return ret;
+               }
+               if (p[1] == '/') {
+                       break; /* Reached the end tag of @element */
+               } else if (p[1] == '?') {
+                       /* Discard processing instructions for now. */
+                       p += 2;
+                       if (!find_and_skip(&p, T("?>")))
+                               return WIMLIB_ERR_XML;
+                       continue;
+               } else if (p[1] == '!') {
+                       if (skip_string(&p, T("<![CDATA["))) {
+                               raw_text = p;
+                               if (!find_and_skip(&p, T("]]>")))
+                                       return WIMLIB_ERR_XML;
+                               ret = xml_element_append_text(element, raw_text,
+                                                             p - 3 - raw_text);
+                               if (ret)
+                                       return ret;
+                               continue;
+                       } else if (skip_string(&p, T("<!--"))) {
+                               /* Discard comments for now. */
+                               if (!find_and_skip(&p, T("-->")))
+                                       return WIMLIB_ERR_XML;
+                               continue;
+                       }
+                       return WIMLIB_ERR_XML;
+               }
+               ret = parse_element(&p, element, depth + 1, NULL);
+               if (ret)
+                       return ret;
+       }
+       *pp = p;
+       return 0;
+}
+
+static int
+parse_element(const tchar **pp, struct xml_node *parent, int depth,
+             struct xml_node **element_ret)
+{
+       const tchar *p = *pp;
+       struct xml_node *element = NULL;
+       const tchar *name_start;
+       size_t name_len;
+       int ret;
+
+       /* Parse the start tag. */
+       CHECK(depth < 50);
+       CHECK(*p == '<');
+       p++;
+       name_start = p;
+       while (!is_whitespace(*p) && *p != '>' && *p != '\0')
+               p++;
+       name_len = p - name_start;
+       CHECK(name_len > 0);
+       element = xml_new_node(parent, XML_ELEMENT_NODE, name_start, name_len,
+                              NULL, 0);
+       if (!element) {
+               ret = WIMLIB_ERR_NOMEM;
+               goto error;
+       }
+       /* Parse the attributes list within the start tag. */
+       while (is_whitespace(*p)) {
+               const tchar *attr_name_start, *attr_value_start;
+               size_t attr_name_len, attr_value_len;
+               tchar *attr_value;
+               tchar quote;
+
+               skip_whitespace(&p);
+               if (*p == '/' || *p == '>')
+                       break;
+               attr_name_start = p;
+               while (*p != '=' && !is_whitespace(*p) && *p != '\0')
+                       p++;
+               attr_name_len = p - attr_name_start;
+               skip_whitespace(&p);
+               CHECK(attr_name_len > 0 && *p == '=');
+               p++;
+               skip_whitespace(&p);
+               quote = *p;
+               CHECK(quote == '\'' || quote == '"');
+               attr_value_start = ++p;
+               while (*p != quote && *p != '\0')
+                       p++;
+               CHECK(*p == quote);
+               attr_value_len = p - attr_value_start;
+               p++;
+               ret = unescape_string(attr_value_start, attr_value_len,
+                                     &attr_value);
+               if (ret)
+                       goto error;
+               ret = xml_new_node(element, XML_ATTRIBUTE_NODE,
+                                  attr_name_start, attr_name_len,
+                                  attr_value, tstrlen(attr_value))
+                       ? 0 : WIMLIB_ERR_NOMEM;
+               FREE(attr_value);
+               if (ret)
+                       goto error;
+       }
+       if (*p == '/') {
+               /* Closing an empty element tag */
+               p++;
+               CHECK(*p == '>');
+               p++;
+       } else {
+               /* Closing the start tag */
+               CHECK(*p == '>');
+               p++;
+               /* Parse the contents, then the end tag. */
+               ret = parse_contents(&p, element, depth);
+               if (ret)
+                       goto error;
+               CHECK(*p == '<');
+               p++;
+               CHECK(*p == '/');
+               p++;
+               CHECK(!tstrncmp(p, name_start, name_len));
+               p += name_len;
+               skip_whitespace(&p);
+               CHECK(*p == '>');
+               p++;
+       }
+       *pp = p;
+       if (element_ret)
+               *element_ret = element;
+       return 0;
+
+error:
+       xml_free_node(element);
+       return ret;
+
+bad:
+       ret = WIMLIB_ERR_XML;
+       goto error;
+}
+
+/*
+ * Deserialize an XML document and return its root node in @doc_ret.  The
+ * document must be given as a NUL-terminated string of 'tchar', i.e. UTF-16LE
+ * in Windows builds and UTF-8 everywhere else.
+ */
+int
+xml_parse_document(const tchar *p, struct xml_node **doc_ret)
+{
+       int ret;
+       struct xml_node *doc;
+
+       skip_string(&p, BYTE_ORDER_MARK);
+       if (!skip_misc(&p))
+               return WIMLIB_ERR_XML;
+       ret = parse_element(&p, NULL, 0, &doc);
+       if (ret)
+               return ret;
+       if (!skip_misc(&p) || *p) {
+               xml_free_node(doc);
+               return WIMLIB_ERR_XML;
+       }
+       *doc_ret = doc;
+       return 0;
+}
+
+/*----------------------------------------------------------------------------*
+ *                               XML writing                                  *
+ *----------------------------------------------------------------------------*/
+
+static void
+xml_write(struct xml_out_buf *buf, const tchar *str, size_t len)
+{
+       if (buf->count + len + 1 > buf->capacity) {
+               size_t new_capacity = max(buf->capacity * 2, 4096);
+               tchar *new_buf = REALLOC(buf->buf,
+                                        new_capacity * sizeof(str[0]));
+               if (!new_buf) {
+                       buf->oom = true;
+                       return;
+               }
+               buf->buf = new_buf;
+               buf->capacity = new_capacity;
+       }
+       tmemcpy(&buf->buf[buf->count], str, len);
+       buf->count += len;
+}
+
+static void
+xml_puts(struct xml_out_buf *buf, const tchar *str)
+{
+       xml_write(buf, str, tstrlen(str));
+}
+
+static void
+xml_escape_and_puts(struct xml_out_buf *buf, const tchar *str)
+{
+       const tchar *p = str, *saved, *seq = NULL;
+
+       for (;; p++) {
+               for (saved = p; *p && (seq = get_escape_seq(*p)) == NULL; p++)
+                       ;
+               xml_write(buf, saved, p - saved);
+               if (!*p)
+                       return;
+               xml_puts(buf, seq);
+       }
+}
+
+static void
+xml_write_element(struct xml_node *node, struct xml_out_buf *buf)
+{
+       struct xml_node *child;
+
+       /* Write the start tag. */
+       xml_puts(buf, T("<"));
+       xml_puts(buf, node->name);
+       xml_node_for_each_child(node, child) {
+               if (child->type == XML_ATTRIBUTE_NODE) {
+                       xml_puts(buf, T(" "));
+                       xml_puts(buf, child->name);
+                       xml_puts(buf, T("=\""));
+                       xml_escape_and_puts(buf, child->value);
+                       xml_puts(buf, T("\""));
+               }
+       }
+       xml_puts(buf, T(">"));
+
+       /* Write the contents. */
+       xml_node_for_each_child(node, child) {
+               if (child->type == XML_TEXT_NODE)
+                       xml_escape_and_puts(buf, child->value);
+               else if (child->type == XML_ELEMENT_NODE)
+                       xml_write_element(child, buf);
+       }
+
+       /* Write the end tag. */
+       xml_puts(buf, T("</"));
+       xml_puts(buf, node->name);
+       xml_puts(buf, T(">"));
+}
+
+/*
+ * Serialize the document @doc into @buf as a NUL-terminated string of 'tchar',
+ * i.e. UTF-16LE in Windows builds and UTF-8 everywhere else.  A byte order mark
+ * (BOM) is included, as this is needed for compatibility with WIMGAPI.
+ */
+int
+xml_write_document(struct xml_node *doc, struct xml_out_buf *buf)
+{
+       xml_puts(buf, BYTE_ORDER_MARK);
+       xml_write_element(doc, buf);
+       if (buf->oom)
+               return WIMLIB_ERR_NOMEM;
+       buf->buf[buf->count] = '\0';
+       return 0;
+}
+
+/*----------------------------------------------------------------------------*
+ *                              Test support                                  *
+ *----------------------------------------------------------------------------*/
+
+#ifdef ENABLE_TEST_SUPPORT
+WIMLIBAPI int
+wimlib_parse_and_write_xml_doc(const tchar *in, tchar **out_ret)
+{
+       struct xml_node *doc;
+       struct xml_out_buf buf = {};
+       int ret;
+
+       ret = xml_parse_document(in, &doc);
+       if (ret)
+               return ret;
+       ret = xml_write_document(doc, &buf);
+       xml_free_node(doc);
+       *out_ret = buf.buf;
+       return ret;
+}
+#endif /* ENABLE_TEST_SUPPORT */
index d95c6986100ff28848e7bfc395193b7d0e1cd363..e233909e5201f9bf8b186b68a556cc0fc40ad819 100755 (executable)
@@ -31,14 +31,6 @@ VERSION=$(tools/get-version-number)
 DESTDIR=wimlib-${VERSION}-windows-${ARCH}-bin
 ZIPFILE=wimlib-${VERSION}-windows-${ARCH}-bin.zip
 MAKE="make -j $(grep -c processor /proc/cpuinfo)"
-WINDEPDIR=./tools/windeps
-SYSROOT=$WINDEPDIR/sysroot_${ARCH}
-
-# Prepare third party libraries
-
-if [ ! -e $SYSROOT ]; then
-       $MAKE -C $WINDEPDIR sysroot_${ARCH}
-fi
 
 # Compile wimlib
 
@@ -50,16 +42,8 @@ then
        # Note: putting -static-libgcc in CC is a workaround for libtool
        # stripping it:
        # http://www.gnu.org/software/libtool/manual/libtool.html#Stripped-link-flags
-       #
-       # We also need to override the MinGW pkg-config with the "native" one in
-       # order for it to correctly restrict the include path to our $SYSROOT.
        ./configure --host=${ARCH}-w64-mingw32 --disable-static         \
-               CC="${ARCH}-w64-mingw32-gcc -static-libgcc"             \
-               CPPFLAGS="-I$SYSROOT/include"                           \
-               LDFLAGS="-L$SYSROOT/lib"                                \
-               PKG_CONFIG=pkg-config                                   \
-               PKG_CONFIG_LIBDIR="$SYSROOT/lib/pkgconfig"              \
-               "$@"
+               CC="${ARCH}-w64-mingw32-gcc -static-libgcc" "$@"
        $MAKE clean
 fi
 $MAKE
@@ -77,7 +61,6 @@ ${ARCH}-w64-mingw32-strip $DESTDIR/*.{dll,exe}
 # Install text files
 
 cp NEWS README* COPYING* $DESTDIR
-cp $WINDEPDIR/COPYING* $DESTDIR
 
 sed -n '/^#/q; s/^[\/\* ]*//; p' src/divsufsort.c > $DESTDIR/COPYING.libdivsufsort-lite
 if ! grep -q 'Copyright' $DESTDIR/COPYING.libdivsufsort-lite; then
index dafad10ec621772fbfbf9a8c628012b187b002f4..828aebe774661ad1ce7e8617eb9f17e40c747fd8 100755 (executable)
@@ -3,6 +3,5 @@
 for fil in src/*.c programs/imagex.c; do
        sparse $fil -gcc-base-dir `gcc --print-file-name=`              \
                -D_FILE_OFFSET_BITS=64 -DHAVE_CONFIG_H -D_GNU_SOURCE    \
-               -I. -Iinclude -I/usr/include/libxml2                    \
-               -Wbitwise -Wpointer-subtraction-blows
+               -I. -Iinclude -Wbitwise -Wpointer-subtraction-blows
 done
diff --git a/tools/windeps/Makefile b/tools/windeps/Makefile
deleted file mode 100644 (file)
index 8e88327..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# This Makefile builds the third-party libraries needed to build a standalone
-# libwim.dll for Windows.  We build these third-party libraries ourselves mainly
-# to cut down on bloat.  They are automatically downloaded from the URLs
-# declared below and verified against the checksums given in the 'sha256sums'
-# file.
-#
-# This Makefile requires a GNU toolchain with MinGW-w64 (i686 and x86_64
-# versions).
-#
-
-ARCHITECTURES          := i686 x86_64
-
-LIBXML2_VERSION                := 2.10.3
-LIBXML_URL             := https://download.gnome.org/sources/libxml2/2.10/libxml2-$(LIBXML2_VERSION).tar.xz
-LIBXML_SRCDIR          := libxml2-$(LIBXML2_VERSION)
-LIBXML_DIST            := $(LIBXML_SRCDIR).tar.xz
-SRCDIR_TARGETS         += $(LIBXML_SRCDIR)
-DIST_TARGETS           += $(LIBXML_DIST)
-$(LIBXML_DIST):
-       wget $(LIBXML_URL)
-$(LIBXML_SRCDIR):$(LIBXML_DIST) checksums_verified
-       tar xvf $<
-       cp $@/Copyright COPYING.libxml2
-MAKE_CLEAN_FILES += $(LIBXML_SRCDIR) COPYING.libxml2
-
-checksums_verified:$(DIST_TARGETS)
-       sha256sum -c sha256sums
-
-#
-# declare_libxml_target(arch)
-#
-define declare_libxml_target
-libxml_$(1):$(LIBXML_SRCDIR)
-       builddir=build_libxml_$(1);                             \
-       rm -rf $$$$builddir;                                    \
-       mkdir $$$$builddir;                                     \
-       cd $$$$builddir;                                        \
-       ../$(LIBXML_SRCDIR)/configure                           \
-               --host=$(1)-w64-mingw32                         \
-               --enable-static                                 \
-               --disable-shared                                \
-               --prefix=$$$$PWD/../sysroot_$(1)                \
-               CFLAGS=-Os                                      \
-               --with-minimum                                  \
-               --without-lzma                                  \
-               --with-tree                                     \
-               --with-writer;                                  \
-       $(MAKE) install;                                        \
-       rm -f ../sysroot_$(1)/lib/libxml2.la;
-
-$(1)_BUILD_TARGETS += libxml_$(1)
-MAKE_CLEAN_FILES += build_libxml_$(1)
-endef
-
-#
-# declare_arch_targets(arch)
-#
-define declare_arch_targets
-$(eval $(call declare_libxml_target,$(1)))
-
-sysroot_$(1): $($(1)_BUILD_TARGETS)
-
-ALL_SYSROOTS += sysroot_$(1)
-MAKE_CLEAN_FILES += sysroot_$(1)
-endef
-
-$(foreach arch,$(ARCHITECTURES),$(eval $(call declare_arch_targets,$(arch))))
-
-all: $(ALL_SYSROOTS)
-
-clean:
-       rm -rf $(MAKE_CLEAN_FILES) $(DIST_TARGETS)
-
-.PHONY: all clean $(SRCDIR_TARGETS) checksums_verified
-
-.DEFAULT_GOAL = all
diff --git a/tools/windeps/sha256sums b/tools/windeps/sha256sums
deleted file mode 100644 (file)
index 2318951..0000000
+++ /dev/null
@@ -1 +0,0 @@
-5d2cc3d78bec3dbe212a9d7fa629ada25a7da928af432c93060ff5c17ee28a9c  libxml2-2.10.3.tar.xz