Eliminate the dependency on libxml2

author Eric Biggers <ebiggers3@gmail.com>

Mon, 27 Mar 2023 00:25:46 +0000 (17:25 -0700)

committer Eric Biggers <ebiggers3@gmail.com>

Mon, 27 Mar 2023 00:25:46 +0000 (17:25 -0700)
author Eric Biggers <ebiggers3@gmail.com>
Mon, 27 Mar 2023 00:25:46 +0000 (17:25 -0700)
committer Eric Biggers <ebiggers3@gmail.com>
Mon, 27 Mar 2023 00:25:46 +0000 (17:25 -0700)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml

index 02fa6ea5b3d254ac00274fb9711c7530ada9b0b5..95d88aaccad6efa0f8dacc9b0d95bf7865516a2e 100644 (file)
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -2,7 +2,7 @@ name: CI
  on: [pull_request, push]
  env:
    DEF_CFLAGS: -O2 -g -Wall -Werror
-  DEPENDENCIES: autoconf automake libtool pkg-config libxml2-dev libfuse-dev ntfs-3g-dev
+  DEPENDENCIES: autoconf automake libtool pkg-config libfuse-dev ntfs-3g-dev
  
  jobs:
    gcc-build-and-test:
@@ -54,7 +54,7 @@ jobs:
        run: |
          sudo dpkg --add-architecture i386
          sudo apt-get update
-        sudo apt-get install -y gcc-multilib $DEPENDENCIES libxml2-dev:i386
+        sudo apt-get install -y gcc-multilib $DEPENDENCIES
      - run: ./bootstrap
      - run: ./configure CC=gcc CFLAGS="-m32 $DEF_CFLAGS" --without-fuse --without-ntfs-3g
      - run: make -j8 check V=1
@@ -109,7 +109,7 @@ jobs:
      - uses: actions/checkout@v3
      - name: Install dependencies
        run: |
-        brew install autoconf automake libtool pkg-config libxml2
+        brew install autoconf automake libtool pkg-config
      - run: ./bootstrap
      - run: ./configure CFLAGS="$DEF_CFLAGS" --without-fuse --without-ntfs-3g
      - run: make -j8 check V=1
@@ -145,7 +145,6 @@ jobs:
            pkg-config
            make
            mingw-w64-${{matrix.env}}-cc
-          mingw-w64-${{matrix.env}}-libxml2
      - run: ./bootstrap
      - run: ./configure CFLAGS="$DEF_CFLAGS" --without-fuse --without-ntfs-3g
      - run: make -j8
diff --git a/.gitignore b/.gitignore

index ef29a1d11853db45cd1e870ccf14b527ba8cd9e4..feb1eaba93b7c19acbd965f201bbcb1ca216b82b 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -45,12 +45,6 @@
  /tests/tree-cmp
  /tests/wlfuzz
  /tests/wlfuzz.exe
-/tools/windeps/*.tar.*
-/tools/windeps/COPYING.*
-/tools/windeps/build_*
-/tools/windeps/libxml2*
-/tools/windeps/mingw*
-/tools/windeps/sysroot_*
  /wimlib-*-bin/
  /wimlib-*.tar
  /wimlib-*.tar.*
diff --git a/Makefile.am b/Makefile.am

index bb801a523856e7604ce3b89df751a356fce801f5..d784d46a9949e3102976956457c58e0b4142becc 100644 (file)
--- a/Makefile.am
+++ b/Makefile.am
@@ -92,6 +92,7 @@ libwim_la_SOURCES =           \
         src/write.c             \
         src/xml.c               \
         src/xml_windows.c       \
+       src/xmlproc.c           \
         src/xpress_compress.c   \
         src/xpress_decompress.c \
         include/wimlib/alloca.h         \
@@ -156,6 +157,7 @@ libwim_la_SOURCES =         \
         include/wimlib/xattr.h          \
         include/wimlib/xml.h            \
         include/wimlib/xml_windows.h    \
+       include/wimlib/xmlproc.h        \
         include/wimlib/xpress_constants.h
  
  if WITH_NTFS_3G
@@ -191,7 +193,6 @@ endif
  libwim_la_CFLAGS =             \
         $(AM_CFLAGS)            \
         $(PTHREAD_CFLAGS)       \
-       $(LIBXML2_CFLAGS)       \
         $(LIBNTFS_3G_CFLAGS)    \
         $(LIBFUSE_CFLAGS)
  
@@ -199,7 +200,6 @@ libwim_la_LDFLAGS = $(AM_LDFLAGS) -version-info 36:0:21
  
  libwim_la_LIBADD =             \
         $(PTHREAD_LIBS)         \
-       $(LIBXML2_LIBS)         \
         $(LIBNTFS_3G_LIBS)      \
         $(LIBFUSE_LIBS)         \
         $(LIBRT_LIBS)           \
diff --git a/NEWS b/NEWS

index 7447e5e52896cff6f7bba550a1eee0b7046d83d0..df8603359f02eb0c1705908ba952903022e7af3b 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,13 +1,12 @@
  Version 1.14.0-BETA1:
-       Removed OpenSSL (libcrypto) as a dependency of wimlib.
+       Removed libxml2 and libcrypto (OpenSSL) as dependencies of wimlib.
+       Also removed winpthreads as a dependency of wimlib on Windows.
  
         Improved the performance of the Windows binaries on CPUs that have SHA-1
         instructions.
  
         Removed support for Windows XP.
  
-       On Windows, wimlib no longer depends on winpthreads.
-
         Fixed a bug in 'wimsplit' where it didn't accept part sizes of 4 GiB or
         larger on Windows and on 32-bit platforms.
  
diff --git a/README b/README

index 6ea31854848e6b0a288f98e7341053e940fd763f..7f382cfe422b39cb76b573d74d4c8cd2ca8e13db 100644 (file)
--- a/README
+++ b/README
@@ -122,12 +122,6 @@ with it, when building for a UNIX-like system from source.  If you have
  downloaded the Windows binary distribution of wimlib and wimlib-imagex then all
  dependencies were already included and this section is irrelevant.
  
-* libxml2 (required)
-       This is a commonly used free library to read and write XML documents.
-       Almost all Linux distributions should include this; however, you may
-       need to install the header files, which might be in a package named
-       "libxml2-dev" or similar.  For more information see http://xmlsoft.org/.
-
  * libfuse (optional but recommended)
         Unless configured --without-fuse, wimlib requires a non-ancient version
         of libfuse.  Most Linux distributions already include this, but make
diff --git a/README.WINDOWS b/README.WINDOWS

index 0308a2edc9b54f8aa9e2a1751c547ca7383a79c0..28d8d7b4e9fffe2a6b82a43314e50752701712a0 100644 (file)
--- a/README.WINDOWS
+++ b/README.WINDOWS
@@ -103,7 +103,6 @@ packages from category "Devel":
      - make
      - mingw64-x86_64-binutils
      - mingw64-x86_64-gcc-g++
-    - mingw64-x86_64-libxml2
      - pkg-config
  
  Download wimlib's source code from https://wimlib.net/downloads/wimlib-1.13.6.tar.gz.
@@ -122,17 +121,6 @@ have been produced in the .libs directory.
  By default the binaries are built with debug symbols.  If desired, you can use
  x86_64-w64-mingw32-strip to strip them.
  
-libwim-15.dll will be linked to several other DLLs which you will need as well:
-
-    - libxml2-2.dll, which also requires:
-        - iconv.dll
-        - liblzma-5.dll
-        - zlib1.dll
-
-These DLLs can be found in "C:\cygwin\usr\x86_64-w64-mingw32\sys-root\mingw\bin"
-and must be placed alongside libwim-15.dll for it to run portably.  But see
-below for an alternative.
-
  Building 32-bit binaries is very similar, but you'll need to replace "x86_64"
  with "i686" everywhere in the above instructions, and libwim-15.dll will also
  depend on libgcc_s_sjlj-1.dll.  Note that you can build both 32-bit and 64-bit
@@ -163,8 +151,7 @@ bootstrap the repository, and run the Windows release script:
      ./bootstrap
      ./tools/make-windows-release x86_64
  
-The release script will download and build libxml2 as a static library, then
-build wimlib, then do some final tasks and bundle the resulting files up into a
-ZIP archive.  If successful you'll end up with a file like
-"wimlib-1.13.6-windows-x86_64-bin.zip", just like the official releases.  For
-32-bit binaries just use "i686" instead of "x86_64".
+The release script will build wimlib, then do some final tasks and bundle the
+resulting files up into a ZIP archive.  If successful you'll end up with a file
+like "wimlib-1.13.6-windows-x86_64-bin.zip", just like the official releases.
+For 32-bit binaries just use "i686" instead of "x86_64".
diff --git a/configure.ac b/configure.ac

index 1d53735331136c9c612ea36b3cd7145e53839a1f..34b2e184d383ccfef848d1c8f96eecd44bb9570a 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -103,10 +103,6 @@ if test "$WINDOWS_NATIVE_BUILD" != "yes"; then
         AX_PTHREAD([], [AC_MSG_ERROR(["cannot find pthreads library"])])
  fi
  
-# ------------------------------ libxml2 --------------------------------------
-PKG_CHECK_MODULES([LIBXML2], [libxml-2.0])
-PKGCONFIG_PRIVATE_REQUIRES="$PKGCONFIG_PRIVATE_REQUIRES libxml-2.0"
-
  ###############################################################################
  #                        Configuration options                               #
  ###############################################################################
diff --git a/include/wimlib/test_support.h b/include/wimlib/test_support.h

index 8c19a01a7c8a926e26b146c82930442470dea45a..4d6df5578673f448244913d29eb249ec75ff4cda 100644 (file)
--- a/include/wimlib/test_support.h
+++ b/include/wimlib/test_support.h
@@ -17,6 +17,9 @@ extern int
  wimlib_compare_images(WIMStruct *wim1, int image1,
                       WIMStruct *wim2, int image2, int cmp_flags);
  
+extern int
+wimlib_parse_and_write_xml_doc(const tchar *in, tchar **out_ret);
+
  #endif /* ENABLE_TEST_SUPPORT */
  
  #endif /* _WIMLIB_TEST_SUPPORT_H */
diff --git a/include/wimlib/xml.h b/include/wimlib/xml.h

index 9ce52104093ea646686775448733672f43794613..a8d8f91f5ca40e61a8da38757c5ee7ea7b5db196 100644 (file)
--- a/include/wimlib/xml.h
+++ b/include/wimlib/xml.h
@@ -71,17 +71,4 @@ write_wim_xml_data(WIMStruct *wim, int image,
                    u64 total_bytes, struct wim_reshdr *out_reshdr,
                    int write_resource_flags);
  
-/*****************************************************************************/
-
-extern void
-xml_global_init(void);
-
-extern void
-xml_global_cleanup(void);
-
-extern void
-xml_set_memory_allocator(void *(*malloc_func)(size_t),
-                        void (*free_func)(void *),
-                        void *(*realloc_func)(void *, size_t));
-
  #endif /* _WIMLIB_XML_H */
diff --git a/include/wimlib/xmlproc.h b/include/wimlib/xmlproc.h

new file mode 100644 (file)

index 0000000..374d27e
--- /dev/null
+++ b/include/wimlib/xmlproc.h
@@ -0,0 +1,94 @@
+#ifndef _WIMLIB_XMLPROC_H
+#define _WIMLIB_XMLPROC_H
+
+#include "wimlib/list.h"
+#include "wimlib/types.h"
+
+/*****************************************************************************/
+
+enum xml_node_type {
+       XML_ELEMENT_NODE,
+       XML_TEXT_NODE,
+       XML_ATTRIBUTE_NODE,
+};
+
+struct xml_node {
+       enum xml_node_type type;        /* type of node */
+       tchar *name;                    /* name of ELEMENT or ATTRIBUTE */
+       tchar *value;                   /* value of TEXT or ATTRIBUTE */
+       struct xml_node *parent;        /* parent, or NULL if none */
+       struct list_head children;      /* children; only used for ELEMENT */
+       struct list_head sibling_link;
+};
+
+/* Iterate through the children of an xml_node.  Does nothing if passed NULL. */
+#define xml_node_for_each_child(parent, child) \
+       if (parent) list_for_each_entry(child, &(parent)->children, sibling_link)
+
+static inline bool
+xml_node_is_element(const struct xml_node *node, const tchar *name)
+{
+       return node->type == XML_ELEMENT_NODE && !tstrcmp(node->name, name);
+}
+
+struct xml_node *
+xml_new_element(struct xml_node *parent, const tchar *name);
+
+struct xml_node *
+xml_new_element_with_text(struct xml_node *parent, const tchar *name,
+                         const tchar *text);
+
+struct xml_node *
+xml_new_attrib(struct xml_node *parent, const tchar *name, const tchar *value);
+
+void
+xml_add_child(struct xml_node *parent, struct xml_node *child);
+
+void
+xml_unlink_node(struct xml_node *node);
+
+void
+xml_free_node(struct xml_node *node);
+
+const tchar *
+xml_element_get_text(const struct xml_node *element);
+
+int
+xml_element_set_text(struct xml_node *element, const tchar *text);
+
+struct xml_node *
+xml_get_attrib(const struct xml_node *element, const tchar *name);
+
+int
+xml_set_attrib(struct xml_node *element, const tchar *name, const tchar *value);
+
+void
+xml_replace_child(struct xml_node *parent, struct xml_node *replacement);
+
+struct xml_node *
+xml_clone_tree(struct xml_node *orig);
+
+bool
+xml_legal_name(const tchar *name);
+
+bool
+xml_legal_value(const tchar *value);
+
+/*****************************************************************************/
+
+int
+xml_parse_document(const tchar *p, struct xml_node **doc_ret);
+
+/*****************************************************************************/
+
+struct xml_out_buf {
+       tchar *buf;
+       size_t count;
+       size_t capacity;
+       bool oom;
+};
+
+int
+xml_write_document(struct xml_node *doc, struct xml_out_buf *buf);
+
+#endif /* _WIMLIB_XMLPROC_H */
diff --git a/include/wimlib_tchar.h b/include/wimlib_tchar.h

index 339b46d6bdbdfe40db4d731f2112de7f8f0a3fc4..781f370c7d42760ebb3391ee6adc7b6daea2e702 100644 (file)
--- a/include/wimlib_tchar.h
+++ b/include/wimlib_tchar.h
@@ -39,6 +39,7 @@ typedef wchar_t tchar;
  #  define tstrchr      wcschr
  #  define tstrpbrk     wcspbrk
  #  define tstrrchr     wcsrchr
+#  define tstrstr      wcsstr
  #  define tstrlen      wcslen
  #  define tmemcmp      wmemcmp
  #  define tstrcasecmp   _wcsicmp
@@ -102,6 +103,7 @@ typedef char tchar;
  #  define tstrchr      strchr
  #  define tstrpbrk     strpbrk
  #  define tstrrchr     strrchr
+#  define tstrstr      strstr
  #  define tstrlen      strlen
  #  define tmemcmp      memcmp
  #  define tstrcasecmp   strcasecmp
diff --git a/src/util.c b/src/util.c

index ef4554efc7c19e189cebfe7528585370c5f6869d..1f9f779282c9834a017e47abefb1ca958520b7c4 100644 (file)
--- a/src/util.c
+++ b/src/util.c
@@ -3,7 +3,7 @@
   */
  
  /*
- * Copyright (C) 2012-2016 Eric Biggers
+ * Copyright 2012-2023 Eric Biggers
   *
   * This file is free software; you can redistribute it and/or modify it under
   * the terms of the GNU Lesser General Public License as published by the Free
@@ -43,7 +43,6 @@
  #include "wimlib/error.h"
  #include "wimlib/timestamp.h"
  #include "wimlib/util.h"
-#include "wimlib/xml.h"
  
  /*******************
   * Memory allocation
@@ -153,9 +152,6 @@ wimlib_set_memory_allocator(void *(*malloc_func)(size_t),
         wimlib_malloc_func  = malloc_func  ? malloc_func  : malloc;
         wimlib_free_func    = free_func    ? free_func    : free;
         wimlib_realloc_func = realloc_func ? realloc_func : realloc;
-
-       xml_set_memory_allocator(wimlib_malloc_func, wimlib_free_func,
-                                wimlib_realloc_func);
         return 0;
  }
  
diff --git a/src/wim.c b/src/wim.c

index e9a6c8eba1ca665d619272e82c9eb92c1ed2faf8..97125c4b10549769d65c2792040f4b6fb4f4e6ee 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -979,7 +979,6 @@ wimlib_global_init(int init_flags)
                 goto out_unlock;
  
         init_cpu_features();
-       xml_global_init();
  #ifdef _WIN32
         ret = win32_global_init(init_flags);
         if (ret)
@@ -1010,7 +1009,6 @@ wimlib_global_cleanup(void)
         if (!lib_initialized)
                 goto out_unlock;
  
-       xml_global_cleanup();
  #ifdef _WIN32
         win32_global_cleanup();
  #endif
diff --git a/src/xml.c b/src/xml.c

index 3812ce1e19d7926e499a9c230e0bddb9664d02ea..3194978b015289df871daf5af8b828ae0dbe7d41 100644 (file)
--- a/src/xml.c
+++ b/src/xml.c
@@ -1,11 +1,9 @@
  /*
- * xml.c
- *
- * Deals with the XML information in WIM files.  Uses the C library libxml2.
+ * xml.c - deals with the XML information in WIM files
   */
  
  /*
- * Copyright (C) 2012-2016 Eric Biggers
+ * Copyright 2012-2023 Eric Biggers
   *
   * This file is free software; you can redistribute it and/or modify it under
   * the terms of the GNU Lesser General Public License as published by the Free
@@ -25,9 +23,7 @@
  #  include "config.h"
  #endif
  
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-#include <libxml/xmlsave.h>
+#include <stdlib.h>
  #include <string.h>
  
  #include "wimlib/blob_table.h"
@@ -39,6 +35,7 @@
  #include "wimlib/resource.h"
  #include "wimlib/timestamp.h"
  #include "wimlib/xml.h"
+#include "wimlib/xmlproc.h"
  #include "wimlib/write.h"
  
  /*
@@ -48,314 +45,107 @@
   */
  struct wim_xml_info {
  
-       /* The parsed XML document as a libxml2 document tree  */
-       xmlDocPtr doc;
-
-       /* The root element of the document.  This is a cached value, equal to
-        * xmlDocGetRootElement(doc).  */
-       xmlNode *root;
+       /* The XML document in tree form */
+       struct xml_node *root;
  
         /* A malloc()ed array containing a pointer to the IMAGE element for each
          * WIM image.  The image with 1-based index 'i' is at index 'i - 1' in
          * this array.  Note: these pointers are cached values, since they could
          * also be found by searching the document.  */
-       xmlNode **images;
+       struct xml_node **images;
  
         /* The number of WIM images (the length of 'images')  */
         int image_count;
-
-#if TCHAR_IS_UTF16LE
-       /* Temporary memory for UTF-8 => 'tchar' string translations.  When an
-        * API function needs to return a 'tchar' string, it uses one of these
-        * array slots to hold the string and returns a pointer to it.  */
-       tchar *strings[128];
-       size_t next_string_idx;
-       size_t num_strings;
-#endif
  };
  
-/*----------------------------------------------------------------------------*
- *                            Internal functions                              *
- *----------------------------------------------------------------------------*/
-
-/* Iterate through the children of an xmlNode.  */
-#define node_for_each_child(parent, child)     \
-       for (child = (parent)->children; child != NULL; child = child->next)
-
-/* Is the specified node an element of the specified name?  */
-static bool
-node_is_element(const xmlNode *node, const xmlChar *name)
-{
-       return node->type == XML_ELEMENT_NODE && xmlStrEqual(node->name, name);
-}
-
-/* Retrieve a pointer to the UTF-8 text contents of the specified node, or NULL
- * if the node has no text contents.  This assumes the simple case where the
- * node has a single TEXT child node.  */
-static const xmlChar *
-node_get_text(const xmlNode *node)
-{
-       const xmlNode *child;
-
-       if (!node)
-               return NULL;
-       node_for_each_child(node, child)
-               if (child->type == XML_TEXT_NODE && child->content)
-                       return child->content;
-       return NULL;
-}
-
-/* Retrieve an unsigned integer from the contents of the specified node,
- * decoding it using the specified base.  If the node has no contents or does
- * not contain a valid number, returns 0.  */
  static u64
-node_get_number(const xmlNode *node, int base)
+parse_number(const tchar *str, int base)
  {
-       const xmlChar *str = node_get_text(node);
-       char *end;
+       tchar *end;
         unsigned long long v;
  
         if (!str)
                 return 0;
-       v = strtoull(str, &end, base);
-       if ((xmlChar *)end == str || *end || v >= UINT64_MAX)
+       v = tstrtoull(str, &end, base);
+       if (end == str || *end || v >= UINT64_MAX)
                 return 0;
         return v;
  }
  
-/* Retrieve the timestamp from a time node.  This node should have child
- * elements HIGHPART and LOWPART; these elements will be used to construct a
- * Windows-style timestamp.  */
+/*
+ * Retrieve an unsigned integer from the contents of the specified element,
+ * decoding it using the specified base.  If the element has no contents or does
+ * not contain a valid number, returns 0.
+ */
  static u64
-node_get_timestamp(const xmlNode *node)
-{
-       u64 timestamp = 0;
-       xmlNode *child;
-
-       if (!node)
-               return 0;
-       node_for_each_child(node, child) {
-               if (node_is_element(child, "HIGHPART"))
-                       timestamp |= node_get_number(child, 16) << 32;
-               else if (node_is_element(child, "LOWPART"))
-                       timestamp |= node_get_number(child, 16);
-       }
-       return timestamp;
-}
-
-static int
-tstr_get_utf8(const tchar *tstr, const xmlChar **utf8_ret)
-{
-#if TCHAR_IS_UTF16LE
-       return utf16le_to_utf8(tstr, tstrlen(tstr) * sizeof(tchar),
-                              (char **)utf8_ret, NULL);
-#else
-       *utf8_ret = (const xmlChar *)tstr;
-       return 0;
-#endif
-}
-
-static void
-tstr_put_utf8(const xmlChar *utf8)
-{
-#if TCHAR_IS_UTF16LE
-       FREE((char *)utf8);
-#endif
-}
-
-/* Retrieve the text contents of an XML element as a 'tchar' string.  If not
- * found or if the text could not be translated, returns NULL.  */
-static const tchar *
-node_get_ttext(struct wim_xml_info *info, xmlNode *node)
-{
-       const xmlChar *text = node_get_text(node);
-
-#if TCHAR_IS_UTF16LE
-       tchar **ttext_p;
-
-       if (!text)
-               return NULL;
-
-       ttext_p = &info->strings[info->next_string_idx];
-       if (info->num_strings >= ARRAY_LEN(info->strings)) {
-               FREE(*ttext_p);
-               *ttext_p = NULL;
-       }
-       if (utf8_to_tstr(text, strlen(text), ttext_p, NULL))
-               return NULL;
-       if (info->num_strings < ARRAY_LEN(info->strings))
-               info->num_strings++;
-       info->next_string_idx++;
-       info->next_string_idx %= ARRAY_LEN(info->strings);
-       return *ttext_p;
-#else
-       return text;
-#endif
-}
-
-/* Unlink the specified node from its parent, then free it (recursively).  */
-static void
-unlink_and_free_tree(xmlNode *node)
+xml_element_get_number(const struct xml_node *element, int base)
  {
-       xmlUnlinkNode(node);
-       xmlFreeNode(node);
+       return parse_number(xml_element_get_text(element), base);
  }
  
-/* Unlink and free (recursively) all children of the specified node.  */
-static void
-unlink_and_free_children(xmlNode *node)
-{
-       xmlNode *child;
-
-       while ((child = node->last) != NULL)
-               unlink_and_free_tree(child);
-}
-
-/* Add the new child element 'replacement' to 'parent', replacing any same-named
- * element that may already exist.  */
-static void
-node_replace_child_element(xmlNode *parent, xmlNode *replacement)
-{
-       xmlNode *child;
-
-       node_for_each_child(parent, child) {
-               if (node_is_element(child, replacement->name)) {
-                       xmlReplaceNode(child, replacement);
-                       xmlFreeNode(child);
-                       return;
-               }
-       }
-
-       xmlAddChild(parent, replacement);
-}
-
-/* Set the text contents of the specified element to the specified string,
- * replacing the existing contents (if any).  The string is "raw" and is
- * permitted to contain characters that have special meaning in XML.  */
-static int
-node_set_text(xmlNode *node, const xmlChar *text)
-{
-       xmlNode *text_node = xmlNewText(text);
-       if (!text_node)
-               return WIMLIB_ERR_NOMEM;
-       unlink_and_free_children(node);
-       xmlAddChild(node, text_node);
-       return 0;
-}
-
-/* Like 'node_set_text()', but takes in a 'tchar' string.  */
-static int
-node_set_ttext(xmlNode *node, const tchar *ttext)
-{
-       const xmlChar *text;
-       int ret;
-
-       ret = tstr_get_utf8(ttext, &text);
-       if (ret)
-               return ret;
-       ret = node_set_text(node, text);
-       tstr_put_utf8(text);
-       return ret;
-}
-
-/* Create a new element containing text and optionally link it into a tree.  */
-static xmlNode *
-new_element_with_text(xmlNode *parent, const xmlChar *name, const xmlChar *text)
+/*
+ * Retrieve the timestamp from a time element.  This element should have child
+ * elements HIGHPART and LOWPART; these elements will be used to construct a
+ * Windows-style timestamp.
+ */
+static u64
+xml_element_get_timestamp(const struct xml_node *element)
  {
-       xmlNode *node;
-
-       node = xmlNewNode(NULL, name);
-       if (!node)
-               return NULL;
+       u64 timestamp = 0;
+       const struct xml_node *child;
  
-       if (node_set_text(node, text)) {
-               xmlFreeNode(node);
-               return NULL;
+       xml_node_for_each_child(element, child) {
+               if (xml_node_is_element(child, T("HIGHPART")))
+                       timestamp |= xml_element_get_number(child, 16) << 32;
+               else if (xml_node_is_element(child, T("LOWPART")))
+                       timestamp |= xml_element_get_number(child, 16);
         }
-
-       if (parent)
-               xmlAddChild(parent, node);
-       return node;
-}
-
-/* Create a new element containing text and optionally link it into a tree.  */
-static int
-new_element_with_ttext(xmlNode *parent, const xmlChar *name, const tchar *ttext,
-                      xmlNode **node_ret)
-{
-       const xmlChar *text;
-       int ret;
-       xmlNode *node;
-
-       ret = tstr_get_utf8(ttext, &text);
-       if (ret)
-               return ret;
-       node = new_element_with_text(parent, name, text);
-       tstr_put_utf8(text);
-       if (!node)
-               return WIMLIB_ERR_NOMEM;
-       if (node_ret)
-               *node_ret = node;
-       return 0;
+       return timestamp;
  }
  
  /* Create a new timestamp element and optionally link it into a tree.  */
-static xmlNode *
-new_element_with_timestamp(xmlNode *parent, const xmlChar *name, u64 timestamp)
+static struct xml_node *
+xml_new_element_with_timestamp(struct xml_node *parent, const tchar *name,
+                              u64 timestamp)
  {
-       xmlNode *node;
-       char buf[32];
+       struct xml_node *element;
+       tchar buf[32];
  
-       node = xmlNewNode(NULL, name);
-       if (!node)
+       element = xml_new_element(NULL, name);
+       if (!element)
                 goto err;
  
-       sprintf(buf, "0x%08"PRIX32, (u32)(timestamp >> 32));
-       if (!new_element_with_text(node, "HIGHPART", buf))
+       tsprintf(buf, T("0x%08"PRIX32), (u32)(timestamp >> 32));
+       if (!xml_new_element_with_text(element, T("HIGHPART"), buf))
                 goto err;
  
-       sprintf(buf, "0x%08"PRIX32, (u32)timestamp);
-       if (!new_element_with_text(node, "LOWPART", buf))
+       tsprintf(buf, T("0x%08"PRIX32), (u32)timestamp);
+       if (!xml_new_element_with_text(element, T("LOWPART"), buf))
                 goto err;
  
         if (parent)
-               xmlAddChild(parent, node);
-       return node;
+               xml_add_child(parent, element);
+       return element;
  
  err:
-       xmlFreeNode(node);
+       xml_free_node(element);
         return NULL;
  }
  
  /* Create a new number element and optionally link it into a tree.  */
-static xmlNode *
-new_element_with_u64(xmlNode *parent, const xmlChar *name, u64 value)
+static struct xml_node *
+xml_new_element_with_u64(struct xml_node *parent, const tchar *name, u64 value)
  {
-       char buf[32];
+       tchar buf[32];
  
-       sprintf(buf, "%"PRIu64, value);
-       return new_element_with_text(parent, name, buf);
-}
-
-/* Allocate a 'struct wim_xml_info'.  The caller is responsible for initializing
- * the document and the images array.  */
-static struct wim_xml_info *
-alloc_wim_xml_info(void)
-{
-       struct wim_xml_info *info = MALLOC(sizeof(*info));
-#if TCHAR_IS_UTF16LE
-       if (info) {
-               info->next_string_idx = 0;
-               info->num_strings = 0;
-       }
-#endif
-       return info;
+       tsprintf(buf, T("%"PRIu64), value);
+       return xml_new_element_with_text(parent, name, buf);
  }
  
  static bool
-parse_index(xmlChar **pp, u32 *index_ret)
+parse_index(tchar **pp, u32 *index_ret)
  {
-       xmlChar *p = *pp;
+       tchar *p = *pp;
         u32 index = 0;
  
         *p++ = '\0'; /* overwrite '[' */
@@ -379,21 +169,21 @@ parse_index(xmlChar **pp, u32 *index_ret)
  }
  
  static int
-do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
-                xmlNode **result_ret)
+do_xml_path_walk(struct xml_node *element, const tchar *path, bool create,
+                struct xml_node **result_ret)
  {
-       size_t n = strlen(path) + 1;
-       xmlChar buf[n];
-       xmlChar *p;
-       xmlChar c;
+       size_t n = tstrlen(path) + 1;
+       tchar buf[n];
+       tchar *p;
+       tchar c;
  
         *result_ret = NULL;
  
-       if (!node)
+       if (!element)
                 return 0;
  
         /* Copy the path to a temporary buffer.  */
-       memcpy(buf, path, n);
+       tmemcpy(buf, path, n);
         p = buf;
  
         if (*p == '/')
@@ -401,8 +191,8 @@ do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
         c = *p;
  
         while (c != '\0') {
-               const xmlChar *name;
-               xmlNode *child;
+               const tchar *name;
+               struct xml_node *child;
                 u32 index = 1;
  
                 /* We have another path component.  */
@@ -422,8 +212,8 @@ do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
                 *p = '\0';
  
                 /* Look for a matching child.  */
-               node_for_each_child(node, child)
-                       if (node_is_element(child, name) && !--index)
+               xml_node_for_each_child(element, child)
+                       if (xml_node_is_element(child, name) && !--index)
                                 goto next_step;
  
                 /* No child matched the path.  If create=false, the lookup
@@ -436,99 +226,99 @@ do_xml_path_walk(xmlNode *node, const xmlChar *path, bool create,
                 if (index != 1)
                         return WIMLIB_ERR_INVALID_PARAM;
  
-               child = xmlNewChild(node, NULL, name, NULL);
+               child = xml_new_element(element, name);
                 if (!child)
                         return WIMLIB_ERR_NOMEM;
         next_step:
                 /* Continue to the next path component, if there is one.  */
-               node = child;
+               element = child;
                 p++;
         }
  
-       *result_ret = node;
+       *result_ret = element;
         return 0;
  
  bad_syntax:
-       ERROR("The XML path \"%s\" has invalid syntax.", path);
+       ERROR("The XML path \"%"TS"\" has invalid syntax.", path);
         return WIMLIB_ERR_INVALID_PARAM;
  }
  
  /* Retrieve the XML element, if any, at the specified 'path'.  This supports a
   * simple filesystem-like syntax.  If the element was found, returns a pointer
   * to it; otherwise returns NULL.  */
-static xmlNode *
-xml_get_node_by_path(xmlNode *root, const xmlChar *path)
+static struct xml_node *
+xml_get_element_by_path(struct xml_node *root, const tchar *path)
  {
-       xmlNode *node;
-       do_xml_path_walk(root, path, false, &node);
-       return node;
+       struct xml_node *element;
+
+       do_xml_path_walk(root, path, false, &element);
+       return element;
  }
  
-/* Similar to xml_get_node_by_path(), but creates the element and any requisite
- * ancestor elements as needed.   If successful, 0 is returned and *node_ret is
- * set to a pointer to the resulting element.  If unsuccessful, an error code is
- * returned and *node_ret is set to NULL.  */
+/*
+ * Similar to xml_get_element_by_path(), but creates the element and any
+ * requisite ancestor elements as needed.   If successful, 0 is returned and
+ * *element_ret is set to a pointer to the resulting element.  If unsuccessful,
+ * an error code is returned and *element_ret is set to NULL.
+ */
  static int
-xml_ensure_node_by_path(xmlNode *root, const xmlChar *path, xmlNode **node_ret)
+xml_ensure_element_by_path(struct xml_node *root, const tchar *path,
+                          struct xml_node **element_ret)
  {
-       return do_xml_path_walk(root, path, true, node_ret);
+       return do_xml_path_walk(root, path, true, element_ret);
  }
  
  static u64
-xml_get_number_by_path(xmlNode *root, const xmlChar *path)
+xml_get_number_by_path(struct xml_node *root, const tchar *path)
  {
-       return node_get_number(xml_get_node_by_path(root, path), 10);
+       return xml_element_get_number(xml_get_element_by_path(root, path), 10);
  }
  
  static u64
-xml_get_timestamp_by_path(xmlNode *root, const xmlChar *path)
+xml_get_timestamp_by_path(struct xml_node *root, const tchar *path)
  {
-       return node_get_timestamp(xml_get_node_by_path(root, path));
-}
-
-static const xmlChar *
-xml_get_text_by_path(xmlNode *root, const xmlChar *path)
-{
-       return node_get_text(xml_get_node_by_path(root, path));
+       return xml_element_get_timestamp(xml_get_element_by_path(root, path));
  }
  
  static const tchar *
-xml_get_ttext_by_path(struct wim_xml_info *info, xmlNode *root,
-                     const xmlChar *path)
+xml_get_text_by_path(struct xml_node *root, const tchar *path)
  {
-       return node_get_ttext(info, xml_get_node_by_path(root, path));
+       return xml_element_get_text(xml_get_element_by_path(root, path));
  }
  
-/* Creates/replaces (if ttext is not NULL and not empty) or removes (if ttext is
- * NULL or empty) an element containing text.  */
+/*
+ * Create/replace (if text is not NULL and not empty) or remove (if text is NULL
+ * or empty) an element containing text.
+ */
  static int
-xml_set_ttext_by_path(xmlNode *root, const xmlChar *path, const tchar *ttext)
+xml_set_text_by_path(struct xml_node *root, const tchar *path, const tchar *text)
  {
         int ret;
-       xmlNode *node;
+       struct xml_node *element;
  
-       if (ttext && *ttext) {
+       if (text && *text) {
                 /* Create or replace  */
-               ret = xml_ensure_node_by_path(root, path, &node);
+               ret = xml_ensure_element_by_path(root, path, &element);
                 if (ret)
                         return ret;
-               return node_set_ttext(node, ttext);
+               return xml_element_set_text(element, text);
         } else {
                 /* Remove  */
-               node = xml_get_node_by_path(root, path);
-               if (node)
-                       unlink_and_free_tree(node);
+               element = xml_get_element_by_path(root, path);
+               if (element)
+                       xml_free_node(element);
                 return 0;
         }
  }
  
  /* Unlink and return the node which represents the INDEX attribute of the
   * specified IMAGE element.  */
-static xmlAttr *
-unlink_index_attribute(xmlNode *image_node)
+static struct xml_node *
+unlink_index_attribute(struct xml_node *image_node)
  {
-       xmlAttr *attr = xmlHasProp(image_node, "INDEX");
-       xmlUnlinkNode((xmlNode *)attr);
+       struct xml_node *attr = xml_get_attrib(image_node, T("INDEX"));
+
+       xml_unlink_node(attr);
         return attr;
  }
  
@@ -550,19 +340,21 @@ inode_sum_stream_sizes(const struct wim_inode *inode,
  }
  
  static int
-append_image_node(struct wim_xml_info *info, xmlNode *image_node)
+append_image_node(struct wim_xml_info *info, struct xml_node *image_node)
  {
-       char buf[32];
-       xmlNode **images;
+       tchar buf[32];
+       struct xml_node **images;
+       int ret;
  
         /* Limit exceeded?  */
         if (unlikely(info->image_count >= MAX_IMAGES))
                 return WIMLIB_ERR_IMAGE_COUNT;
  
-       /* Add the INDEX attribute.  */
-       sprintf(buf, "%d", info->image_count + 1);
-       if (!xmlNewProp(image_node, "INDEX", buf))
-               return WIMLIB_ERR_NOMEM;
+       /* Set the INDEX attribute. */
+       tsprintf(buf, T("%d"), info->image_count + 1);
+       ret = xml_set_attrib(image_node, T("INDEX"), buf);
+       if (ret)
+               return ret;
  
         /* Append the IMAGE element to the 'images' array.  */
         images = REALLOC(info->images,
@@ -573,7 +365,7 @@ append_image_node(struct wim_xml_info *info, xmlNode *image_node)
         images[info->image_count++] = image_node;
  
         /* Add the IMAGE element to the document.  */
-       xmlAddChild(info->root, image_node);
+       xml_add_child(info->root, image_node);
         return 0;
  }
  
@@ -585,31 +377,17 @@ append_image_node(struct wim_xml_info *info, xmlNode *image_node)
  struct wim_xml_info *
  xml_new_info_struct(void)
  {
-       struct wim_xml_info *info;
+       struct wim_xml_info *info = CALLOC(1, sizeof(*info));
  
-       info = alloc_wim_xml_info();
         if (!info)
-               goto err;
-
-       info->doc = xmlNewDoc("1.0");
-       if (!info->doc)
-               goto err_free_info;
-
-       info->root = xmlNewNode(NULL, "WIM");
-       if (!info->root)
-               goto err_free_doc;
-       xmlDocSetRootElement(info->doc, info->root);
+               return NULL;
  
-       info->images = NULL;
-       info->image_count = 0;
+       info->root = xml_new_element(NULL, T("WIM"));
+       if (!info->root) {
+               FREE(info);
+               return NULL;
+       }
         return info;
-
-err_free_doc:
-       xmlFreeDoc(info->doc);
-err_free_info:
-       FREE(info);
-err:
-       return NULL;
  }
  
  /* Free a 'struct wim_xml_info'.  */
@@ -617,12 +395,8 @@ void
  xml_free_info_struct(struct wim_xml_info *info)
  {
         if (info) {
-               xmlFreeDoc(info->doc);
+               xml_free_node(info->root);
                 FREE(info->images);
-       #if TCHAR_IS_UTF16LE
-               for (size_t i = 0; i < info->num_strings; i++)
-                       FREE(info->strings[i]);
-       #endif
                 FREE(info);
         }
  }
@@ -640,7 +414,7 @@ xml_get_image_count(const struct wim_xml_info *info)
  u64
  xml_get_total_bytes(const struct wim_xml_info *info)
  {
-       return xml_get_number_by_path(info->root, "TOTALBYTES");
+       return xml_get_number_by_path(info->root, T("TOTALBYTES"));
  }
  
  /* Retrieve the TOTALBYTES value for the specified image, or 0 if this value is
@@ -648,7 +422,7 @@ xml_get_total_bytes(const struct wim_xml_info *info)
  u64
  xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
  {
-       return xml_get_number_by_path(info->images[image - 1], "TOTALBYTES");
+       return xml_get_number_by_path(info->images[image - 1], T("TOTALBYTES"));
  }
  
  /* Retrieve the HARDLINKBYTES value for the specified image, or 0 if this value
@@ -656,7 +430,8 @@ xml_get_image_total_bytes(const struct wim_xml_info *info, int image)
  u64
  xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
  {
-       return xml_get_number_by_path(info->images[image - 1], "HARDLINKBYTES");
+       return xml_get_number_by_path(info->images[image - 1],
+                                     T("HARDLINKBYTES"));
  }
  
  /* Retrieve the WIMBOOT value for the specified image, or false if this value is
@@ -664,7 +439,7 @@ xml_get_image_hard_link_bytes(const struct wim_xml_info *info, int image)
  bool
  xml_get_wimboot(const struct wim_xml_info *info, int image)
  {
-       return xml_get_number_by_path(info->images[image - 1], "WIMBOOT");
+       return xml_get_number_by_path(info->images[image - 1], T("WIMBOOT"));
  }
  
  /* Retrieve the Windows build number for the specified image, or 0 if this
@@ -673,14 +448,15 @@ u64
  xml_get_windows_build_number(const struct wim_xml_info *info, int image)
  {
         return xml_get_number_by_path(info->images[image - 1],
-                                     "WINDOWS/VERSION/BUILD");
+                                     T("WINDOWS/VERSION/BUILD"));
  }
  
  /* Set the WIMBOOT value for the specified image.  */
  int
  xml_set_wimboot(struct wim_xml_info *info, int image)
  {
-       return xml_set_ttext_by_path(info->images[image - 1], "WIMBOOT", T("1"));
+       return xml_set_text_by_path(info->images[image - 1],
+                                   T("WIMBOOT"), T("1"));
  }
  
  /*
@@ -694,18 +470,18 @@ int
  xml_update_image_info(WIMStruct *wim, int image)
  {
         const struct wim_image_metadata *imd = wim->image_metadata[image - 1];
-       xmlNode *image_node = wim->xml_info->images[image - 1];
+       struct xml_node *image_node = wim->xml_info->images[image - 1];
         const struct wim_inode *inode;
         u64 dir_count = 0;
         u64 file_count = 0;
         u64 total_bytes = 0;
         u64 hard_link_bytes = 0;
         u64 size;
-       xmlNode *dircount_node;
-       xmlNode *filecount_node;
-       xmlNode *totalbytes_node;
-       xmlNode *hardlinkbytes_node;
-       xmlNode *lastmodificationtime_node;
+       struct xml_node *dircount_node;
+       struct xml_node *filecount_node;
+       struct xml_node *totalbytes_node;
+       struct xml_node *hardlinkbytes_node;
+       struct xml_node *lastmodificationtime_node;
  
         image_for_each_inode(inode, imd) {
                 if (inode_is_directory(inode))
@@ -717,30 +493,32 @@ xml_update_image_info(WIMStruct *wim, int image)
                 hard_link_bytes += size * (inode->i_nlink - 1);
         }
  
-       dircount_node = new_element_with_u64(NULL, "DIRCOUNT", dir_count);
-       filecount_node = new_element_with_u64(NULL, "FILECOUNT", file_count);
-       totalbytes_node = new_element_with_u64(NULL, "TOTALBYTES", total_bytes);
-       hardlinkbytes_node = new_element_with_u64(NULL, "HARDLINKBYTES",
-                                                 hard_link_bytes);
-       lastmodificationtime_node =
-               new_element_with_timestamp(NULL, "LASTMODIFICATIONTIME",
-                                          now_as_wim_timestamp());
+       dircount_node = xml_new_element_with_u64(NULL, T("DIRCOUNT"),
+                                                dir_count);
+       filecount_node = xml_new_element_with_u64(NULL, T("FILECOUNT"),
+                                                 file_count);
+       totalbytes_node = xml_new_element_with_u64(NULL, T("TOTALBYTES"),
+                                                  total_bytes);
+       hardlinkbytes_node = xml_new_element_with_u64(NULL, T("HARDLINKBYTES"),
+                                                     hard_link_bytes);
+       lastmodificationtime_node = xml_new_element_with_timestamp(NULL,
+                       T("LASTMODIFICATIONTIME"), now_as_wim_timestamp());
  
         if (unlikely(!dircount_node || !filecount_node || !totalbytes_node ||
                      !hardlinkbytes_node || !lastmodificationtime_node)) {
-               xmlFreeNode(dircount_node);
-               xmlFreeNode(filecount_node);
-               xmlFreeNode(totalbytes_node);
-               xmlFreeNode(hardlinkbytes_node);
-               xmlFreeNode(lastmodificationtime_node);
+               xml_free_node(dircount_node);
+               xml_free_node(filecount_node);
+               xml_free_node(totalbytes_node);
+               xml_free_node(hardlinkbytes_node);
+               xml_free_node(lastmodificationtime_node);
                 return WIMLIB_ERR_NOMEM;
         }
  
-       node_replace_child_element(image_node, dircount_node);
-       node_replace_child_element(image_node, filecount_node);
-       node_replace_child_element(image_node, totalbytes_node);
-       node_replace_child_element(image_node, hardlinkbytes_node);
-       node_replace_child_element(image_node, lastmodificationtime_node);
+       xml_replace_child(image_node, dircount_node);
+       xml_replace_child(image_node, filecount_node);
+       xml_replace_child(image_node, totalbytes_node);
+       xml_replace_child(image_node, hardlinkbytes_node);
+       xml_replace_child(image_node, lastmodificationtime_node);
         return 0;
  }
  
@@ -749,31 +527,33 @@ int
  xml_add_image(struct wim_xml_info *info, const tchar *name)
  {
         const u64 now = now_as_wim_timestamp();
-       xmlNode *image_node;
+       struct xml_node *image_node;
         int ret;
  
+       if (name && !xml_legal_value(name)) {
+               ERROR("Name of new image contains illegal characters");
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+
         ret = WIMLIB_ERR_NOMEM;
-       image_node = xmlNewNode(NULL, "IMAGE");
+       image_node = xml_new_element(NULL, T("IMAGE"));
         if (!image_node)
                 goto err;
-
-       if (name && *name) {
-               ret = new_element_with_ttext(image_node, "NAME", name, NULL);
-               if (ret)
-                       goto err;
-       }
-       ret = WIMLIB_ERR_NOMEM;
-       if (!new_element_with_u64(image_node, "DIRCOUNT", 0))
+       if (name && *name &&
+           !xml_new_element_with_text(image_node, T("NAME"), name))
+               goto err;
+       if (!xml_new_element_with_u64(image_node, T("DIRCOUNT"), 0))
                 goto err;
-       if (!new_element_with_u64(image_node, "FILECOUNT", 0))
+       if (!xml_new_element_with_u64(image_node, T("FILECOUNT"), 0))
                 goto err;
-       if (!new_element_with_u64(image_node, "TOTALBYTES", 0))
+       if (!xml_new_element_with_u64(image_node, T("TOTALBYTES"), 0))
                 goto err;
-       if (!new_element_with_u64(image_node, "HARDLINKBYTES", 0))
+       if (!xml_new_element_with_u64(image_node, T("HARDLINKBYTES"), 0))
                 goto err;
-       if (!new_element_with_timestamp(image_node, "CREATIONTIME", now))
+       if (!xml_new_element_with_timestamp(image_node, T("CREATIONTIME"), now))
                 goto err;
-       if (!new_element_with_timestamp(image_node, "LASTMODIFICATIONTIME", now))
+       if (!xml_new_element_with_timestamp(image_node,
+                                           T("LASTMODIFICATIONTIME"), now))
                 goto err;
         ret = append_image_node(info, image_node);
         if (ret)
@@ -781,7 +561,7 @@ xml_add_image(struct wim_xml_info *info, const tchar *name)
         return 0;
  
  err:
-       xmlFreeNode(image_node);
+       xml_free_node(image_node);
         return ret;
  }
  
@@ -799,39 +579,46 @@ xml_export_image(const struct wim_xml_info *src_info, int src_image,
                  struct wim_xml_info *dest_info, const tchar *dest_image_name,
                  const tchar *dest_image_description, bool wimboot)
  {
-       xmlNode *dest_node;
+       struct xml_node *dest_node;
         int ret;
  
+       if (dest_image_name && !xml_legal_value(dest_image_name)) {
+               ERROR("Destination image name contains illegal characters");
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+       if (dest_image_description &&
+           !xml_legal_value(dest_image_description)) {
+               ERROR("Destination image description contains illegal characters");
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+
         ret = WIMLIB_ERR_NOMEM;
-       dest_node = xmlDocCopyNode(src_info->images[src_image - 1],
-                                  dest_info->doc, 1);
+       dest_node = xml_clone_tree(src_info->images[src_image - 1]);
         if (!dest_node)
                 goto err;
  
-       ret = xml_set_ttext_by_path(dest_node, "NAME", dest_image_name);
+       ret = xml_set_text_by_path(dest_node, T("NAME"), dest_image_name);
         if (ret)
                 goto err;
  
-       ret = xml_set_ttext_by_path(dest_node, "DESCRIPTION",
-                                   dest_image_description);
+       ret = xml_set_text_by_path(dest_node, T("DESCRIPTION"),
+                                  dest_image_description);
         if (ret)
                 goto err;
  
         if (wimboot) {
-               ret = xml_set_ttext_by_path(dest_node, "WIMBOOT", T("1"));
+               ret = xml_set_text_by_path(dest_node, T("WIMBOOT"), T("1"));
                 if (ret)
                         goto err;
         }
  
-       xmlFreeProp(unlink_index_attribute(dest_node));
-
         ret = append_image_node(dest_info, dest_node);
         if (ret)
                 goto err;
         return 0;
  
  err:
-       xmlFreeNode(dest_node);
+       xml_free_node(dest_node);
         return ret;
  }
  
@@ -839,8 +626,8 @@ err:
  void
  xml_delete_image(struct wim_xml_info *info, int image)
  {
-       xmlNode *next_image;
-       xmlAttr *index_attr, *next_index_attr;
+       struct xml_node *next_image;
+       struct xml_node *index_attr, *next_index_attr;
  
         /* Free the IMAGE element for the deleted image.  Then, shift all
          * higher-indexed IMAGE elements down by 1, in the process re-assigning
@@ -848,18 +635,18 @@ xml_delete_image(struct wim_xml_info *info, int image)
  
         next_image = info->images[image - 1];
         next_index_attr = unlink_index_attribute(next_image);
-       unlink_and_free_tree(next_image);
+       xml_free_node(next_image);
  
         while (image < info->image_count) {
                 index_attr = next_index_attr;
                 next_image = info->images[image];
                 next_index_attr = unlink_index_attribute(next_image);
-               xmlAddChild(next_image, (xmlNode *)index_attr);
+               xml_add_child(next_image, index_attr);
                 info->images[image - 1] = next_image;
                 image++;
         }
  
-       xmlFreeProp(next_index_attr);
+       xml_free_node(next_index_attr);
         info->image_count--;
  }
  
@@ -897,80 +684,80 @@ describe_arch(u64 arch)
  
  /* Print information from the WINDOWS element, if present.  */
  static void
-print_windows_info(struct wim_xml_info *info, xmlNode *image_node)
+print_windows_info(struct xml_node *image_node)
  {
-       xmlNode *windows_node;
-       xmlNode *langs_node;
-       xmlNode *version_node;
+       struct xml_node *windows_node;
+       struct xml_node *langs_node;
+       struct xml_node *version_node;
         const tchar *text;
  
-       windows_node = xml_get_node_by_path(image_node, "WINDOWS");
+       windows_node = xml_get_element_by_path(image_node, T("WINDOWS"));
         if (!windows_node)
                 return;
  
         tprintf(T("Architecture:           %"TS"\n"),
-               describe_arch(xml_get_number_by_path(windows_node, "ARCH")));
+               describe_arch(xml_get_number_by_path(windows_node, T("ARCH"))));
  
-       text = xml_get_ttext_by_path(info, windows_node, "PRODUCTNAME");
+       text = xml_get_text_by_path(windows_node, T("PRODUCTNAME"));
         if (text)
                 tprintf(T("Product Name:           %"TS"\n"), text);
  
-       text = xml_get_ttext_by_path(info, windows_node, "EDITIONID");
+       text = xml_get_text_by_path(windows_node, T("EDITIONID"));
         if (text)
                 tprintf(T("Edition ID:             %"TS"\n"), text);
  
-       text = xml_get_ttext_by_path(info, windows_node, "INSTALLATIONTYPE");
+       text = xml_get_text_by_path(windows_node, T("INSTALLATIONTYPE"));
         if (text)
                 tprintf(T("Installation Type:      %"TS"\n"), text);
  
-       text = xml_get_ttext_by_path(info, windows_node, "HAL");
+       text = xml_get_text_by_path(windows_node, T("HAL"));
         if (text)
                 tprintf(T("HAL:                    %"TS"\n"), text);
  
-       text = xml_get_ttext_by_path(info, windows_node, "PRODUCTTYPE");
+       text = xml_get_text_by_path(windows_node, T("PRODUCTTYPE"));
         if (text)
                 tprintf(T("Product Type:           %"TS"\n"), text);
  
-       text = xml_get_ttext_by_path(info, windows_node, "PRODUCTSUITE");
+       text = xml_get_text_by_path(windows_node, T("PRODUCTSUITE"));
         if (text)
                 tprintf(T("Product Suite:          %"TS"\n"), text);
  
-       langs_node = xml_get_node_by_path(windows_node, "LANGUAGES");
+       langs_node = xml_get_element_by_path(windows_node, T("LANGUAGES"));
         if (langs_node) {
-               xmlNode *lang_node;
+               struct xml_node *lang_node;
  
                 tprintf(T("Languages:              "));
-               node_for_each_child(langs_node, lang_node) {
-                       if (!node_is_element(lang_node, "LANGUAGE"))
+               xml_node_for_each_child(langs_node, lang_node) {
+                       if (!xml_node_is_element(lang_node, T("LANGUAGE")))
                                 continue;
-                       text = node_get_ttext(info, lang_node);
+                       text = xml_element_get_text(lang_node);
                         if (!text)
                                 continue;
                         tprintf(T("%"TS" "), text);
                 }
                 tputchar(T('\n'));
  
-               text = xml_get_ttext_by_path(info, langs_node, "DEFAULT");
+               text = xml_get_text_by_path(langs_node, T("DEFAULT"));
                 if (text)
                         tprintf(T("Default Language:       %"TS"\n"), text);
         }
  
-       text = xml_get_ttext_by_path(info, windows_node, "SYSTEMROOT");
+       text = xml_get_text_by_path(windows_node, T("SYSTEMROOT"));
         if (text)
                 tprintf(T("System Root:            %"TS"\n"), text);
  
-       version_node = xml_get_node_by_path(windows_node, "VERSION");
+       version_node = xml_get_element_by_path(windows_node, T("VERSION"));
         if (version_node) {
                 tprintf(T("Major Version:          %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "MAJOR"));
+                       xml_get_number_by_path(version_node, T("MAJOR")));
                 tprintf(T("Minor Version:          %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "MINOR"));
+                       xml_get_number_by_path(version_node, T("MINOR")));
                 tprintf(T("Build:                  %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "BUILD"));
+                       xml_get_number_by_path(version_node, T("BUILD")));
                 tprintf(T("Service Pack Build:     %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "SPBUILD"));
+                       xml_get_number_by_path(version_node, T("SPBUILD")));
                 tprintf(T("Service Pack Level:     %"PRIu64"\n"),
-                       xml_get_number_by_path(version_node, "SPLEVEL"));
+                       xml_get_number_by_path(version_node, T("SPLEVEL")));
         }
  }
  
@@ -978,7 +765,7 @@ print_windows_info(struct wim_xml_info *info, xmlNode *image_node)
  void
  xml_print_image_info(struct wim_xml_info *info, int image)
  {
-       xmlNode * const image_node = info->images[image - 1];
+       struct xml_node * const image_node = info->images[image - 1];
         const tchar *text;
         tchar timebuf[64];
  
@@ -986,49 +773,49 @@ xml_print_image_info(struct wim_xml_info *info, int image)
  
         /* Always print the Name and Description, even if the corresponding XML
          * elements are not present.  */
-       text = xml_get_ttext_by_path(info, image_node, "NAME");
+       text = xml_get_text_by_path(image_node, T("NAME"));
         tprintf(T("Name:                   %"TS"\n"), text ? text : T(""));
-       text = xml_get_ttext_by_path(info, image_node, "DESCRIPTION");
+       text = xml_get_text_by_path(image_node, T("DESCRIPTION"));
         tprintf(T("Description:            %"TS"\n"), text ? text : T(""));
  
-       text = xml_get_ttext_by_path(info, image_node, "DISPLAYNAME");
+       text = xml_get_text_by_path(image_node, T("DISPLAYNAME"));
         if (text)
                 tprintf(T("Display Name:           %"TS"\n"), text);
  
-       text = xml_get_ttext_by_path(info, image_node, "DISPLAYDESCRIPTION");
+       text = xml_get_text_by_path(image_node, T("DISPLAYDESCRIPTION"));
         if (text)
                 tprintf(T("Display Description:    %"TS"\n"), text);
  
         tprintf(T("Directory Count:        %"PRIu64"\n"),
-               xml_get_number_by_path(image_node, "DIRCOUNT"));
+               xml_get_number_by_path(image_node, T("DIRCOUNT")));
  
         tprintf(T("File Count:             %"PRIu64"\n"),
-               xml_get_number_by_path(image_node, "FILECOUNT"));
+               xml_get_number_by_path(image_node, T("FILECOUNT")));
  
         tprintf(T("Total Bytes:            %"PRIu64"\n"),
-               xml_get_number_by_path(image_node, "TOTALBYTES"));
+               xml_get_number_by_path(image_node, T("TOTALBYTES")));
  
         tprintf(T("Hard Link Bytes:        %"PRIu64"\n"),
-               xml_get_number_by_path(image_node, "HARDLINKBYTES"));
+               xml_get_number_by_path(image_node, T("HARDLINKBYTES")));
  
         wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
-                                                      "CREATIONTIME"),
+                                                      T("CREATIONTIME")),
                              timebuf, ARRAY_LEN(timebuf));
         tprintf(T("Creation Time:          %"TS"\n"), timebuf);
  
         wim_timestamp_to_str(xml_get_timestamp_by_path(image_node,
-                                                      "LASTMODIFICATIONTIME"),
-                            timebuf, ARRAY_LEN(timebuf));
+                                       T("LASTMODIFICATIONTIME")),
+                                       timebuf, ARRAY_LEN(timebuf));
         tprintf(T("Last Modification Time: %"TS"\n"), timebuf);
  
-       print_windows_info(info, image_node);
+       print_windows_info(image_node);
  
-       text = xml_get_ttext_by_path(info, image_node, "FLAGS");
+       text = xml_get_text_by_path(image_node, T("FLAGS"));
         if (text)
                 tprintf(T("Flags:                  %"TS"\n"), text);
  
         tprintf(T("WIMBoot compatible:     %"TS"\n"),
-               xml_get_number_by_path(image_node, "WIMBOOT") ?
+               xml_get_number_by_path(image_node, T("WIMBOOT")) ?
                         T("yes") : T("no"));
  
         tputchar('\n');
@@ -1039,28 +826,28 @@ xml_print_image_info(struct wim_xml_info *info, int image)
   *----------------------------------------------------------------------------*/
  
  static int
-image_node_get_index(xmlNode *node)
+image_element_get_index(struct xml_node *element)
  {
-       u64 v = node_get_number((const xmlNode *)xmlHasProp(node, "INDEX"), 10);
-       return min(v, INT_MAX);
+       struct xml_node *attrib = xml_get_attrib(element, T("INDEX"));
+
+       if (!attrib)
+               return 0;
+       return min(INT_MAX, parse_number(attrib->value, 10));
  }
  
  /* Prepare the 'images' array from the XML document tree.  */
  static int
-setup_images(struct wim_xml_info *info, xmlNode *root)
+setup_images(struct wim_xml_info *info, struct xml_node *root)
  {
-       xmlNode *child;
+       struct xml_node *child;
         int index;
         int max_index = 0;
         int ret;
  
-       info->images = NULL;
-       info->image_count = 0;
-
-       node_for_each_child(root, child) {
-               if (!node_is_element(child, "IMAGE"))
+       xml_node_for_each_child(root, child) {
+               if (!xml_node_is_element(child, T("IMAGE")))
                         continue;
-               index = image_node_get_index(child);
+               index = image_element_get_index(child);
                 if (unlikely(index < 1 || info->image_count >= MAX_IMAGES))
                         goto err_indices;
                 max_index = max(max_index, index);
@@ -1072,10 +859,10 @@ setup_images(struct wim_xml_info *info, xmlNode *root)
         info->images = CALLOC(info->image_count, sizeof(info->images[0]));
         if (unlikely(!info->images))
                 goto err;
-       node_for_each_child(root, child) {
-               if (!node_is_element(child, "IMAGE"))
+       xml_node_for_each_child(root, child) {
+               if (!xml_node_is_element(child, T("IMAGE")))
                         continue;
-               index = image_node_get_index(child);
+               index = image_element_get_index(child);
                 if (unlikely(info->images[index - 1]))
                         goto err_indices;
                 info->images[index - 1] = child;
@@ -1091,97 +878,108 @@ err:
         return ret;
  }
  
+static int
+parse_wim_xml_document(const utf16lechar *raw_doc, size_t raw_doc_size,
+                      struct xml_node **root_ret)
+{
+       tchar *doc;
+       int ret;
+
+       ret = utf16le_to_tstr(raw_doc, raw_doc_size, &doc, NULL);
+       if (ret)
+               return ret;
+       ret = xml_parse_document(doc, root_ret);
+       FREE(doc);
+       return ret;
+}
+
  /* Reads the XML data from a WIM file.  */
  int
  read_wim_xml_data(WIMStruct *wim)
  {
         struct wim_xml_info *info;
-       void *buf;
-       size_t bufsize;
-       xmlDoc *doc;
-       xmlNode *root;
+       void *raw_doc;
+       size_t raw_doc_size;
+       struct xml_node *root;
         int ret;
  
         /* Allocate the 'struct wim_xml_info'.  */
         ret = WIMLIB_ERR_NOMEM;
-       info = alloc_wim_xml_info();
+       info = CALLOC(1, sizeof(*info));
         if (!info)
                 goto err;
  
-       /* Read the raw UTF-16LE bytes.  */
-       ret = wimlib_get_xml_data(wim, &buf, &bufsize);
+       /* Read the raw UTF-16LE XML document.  */
+       ret = wimlib_get_xml_data(wim, &raw_doc, &raw_doc_size);
         if (ret)
-               goto err_free_info;
+               goto err;
  
-       /* Parse the document with libxml2, creating the document tree.  */
-       doc = xmlReadMemory(buf, bufsize, NULL, "UTF-16LE", XML_PARSE_NONET);
-       FREE(buf);
-       buf = NULL;
-       if (!doc) {
+       /* Parse the document, creating the document tree.  */
+       ret = parse_wim_xml_document(raw_doc, raw_doc_size, &info->root);
+       FREE(raw_doc);
+       raw_doc = NULL;
+       if (ret) {
+               if (ret != WIMLIB_ERR_NOMEM)
+                       ret = WIMLIB_ERR_XML;
                 ERROR("Unable to parse the WIM file's XML document!");
-               ret = WIMLIB_ERR_XML;
-               goto err_free_info;
+               goto err;
         }
+       root = info->root;
  
         /* Verify the root element.  */
-       root = xmlDocGetRootElement(doc);
-       if (!node_is_element(root, "WIM")) {
+       if (!xml_node_is_element(root, T("WIM"))) {
                 ERROR("The WIM file's XML document has an unexpected format!");
                 ret = WIMLIB_ERR_XML;
-               goto err_free_doc;
+               goto err;
         }
  
         /* Verify the WIM file is not encrypted.  */
-       if (xml_get_node_by_path(root, "ESD/ENCRYPTED")) {
+       if (xml_get_element_by_path(root, T("ESD/ENCRYPTED"))) {
                 ret = WIMLIB_ERR_WIM_IS_ENCRYPTED;
-               goto err_free_doc;
+               goto err;
         }
  
         /* Validate the image elements and set up the images[] array.  */
         ret = setup_images(info, root);
         if (ret)
-               goto err_free_doc;
+               goto err;
  
-       /* Save the document and return.  */
-       info->doc = doc;
-       info->root = root;
+       /* Success!  */
         wim->xml_info = info;
         return 0;
  
-err_free_doc:
-       xmlFreeDoc(doc);
-err_free_info:
-       FREE(info);
  err:
+       xml_free_info_struct(info);
         return ret;
  }
  
  /* Swap the INDEX attributes of two IMAGE elements.  */
  static void
-swap_index_attributes(xmlNode *image_node_1, xmlNode *image_node_2)
+swap_index_attributes(struct xml_node *image_element_1,
+                     struct xml_node *image_element_2)
  {
-       xmlAttr *attr_1, *attr_2;
+       struct xml_node *attr_1, *attr_2;
  
-       if (image_node_1 != image_node_2) {
-               attr_1 = unlink_index_attribute(image_node_1);
-               attr_2 = unlink_index_attribute(image_node_2);
-               xmlAddChild(image_node_1, (xmlNode *)attr_2);
-               xmlAddChild(image_node_2, (xmlNode *)attr_1);
+       if (image_element_1 != image_element_2) {
+               attr_1 = unlink_index_attribute(image_element_1);
+               attr_2 = unlink_index_attribute(image_element_2);
+               xml_add_child(image_element_1, attr_2);
+               xml_add_child(image_element_2, attr_1);
         }
  }
  
  static int
  prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes,
-                          xmlNode **orig_totalbytes_node_ret)
+                          struct xml_node **orig_totalbytes_element_ret)
  {
-       xmlNode *totalbytes_node = NULL;
+       struct xml_node *totalbytes_element = NULL;
  
         /* Allocate the new TOTALBYTES element if needed.  */
         if (total_bytes != WIM_TOTALBYTES_USE_EXISTING &&
             total_bytes != WIM_TOTALBYTES_OMIT) {
-               totalbytes_node = new_element_with_u64(NULL, "TOTALBYTES",
-                                                      total_bytes);
-               if (!totalbytes_node)
+               totalbytes_element = xml_new_element_with_u64(
+                                       NULL, T("TOTALBYTES"), total_bytes);
+               if (!totalbytes_element)
                         return WIMLIB_ERR_NOMEM;
         }
  
@@ -1191,7 +989,7 @@ prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes
                  * other IMAGE elements from the document.  */
                 for (int i = 0; i < info->image_count; i++)
                         if (i + 1 != image)
-                               xmlUnlinkNode(info->images[i]);
+                               xml_unlink_node(info->images[i]);
  
                 /* Temporarily set the INDEX attribute of the needed IMAGE
                  * element to 1.  */
@@ -1199,24 +997,24 @@ prepare_document_for_write(struct wim_xml_info *info, int image, u64 total_bytes
         }
  
         /* Adjust (add, change, or remove) the TOTALBYTES element if needed.  */
-       *orig_totalbytes_node_ret = NULL;
+       *orig_totalbytes_element_ret = NULL;
         if (total_bytes != WIM_TOTALBYTES_USE_EXISTING) {
                 /* Unlink the previous TOTALBYTES element, if any.  */
-               *orig_totalbytes_node_ret = xml_get_node_by_path(info->root,
-                                                                "TOTALBYTES");
-               if (*orig_totalbytes_node_ret)
-                       xmlUnlinkNode(*orig_totalbytes_node_ret);
+               *orig_totalbytes_element_ret = xml_get_element_by_path(
+                                               info->root, T("TOTALBYTES"));
+               if (*orig_totalbytes_element_ret)
+                       xml_unlink_node(*orig_totalbytes_element_ret);
  
                 /* Link in the new TOTALBYTES element, if any.  */
-               if (totalbytes_node)
-                       xmlAddChild(info->root, totalbytes_node);
+               if (totalbytes_element)
+                       xml_add_child(info->root, totalbytes_element);
         }
         return 0;
  }
  
  static void
  restore_document_after_write(struct wim_xml_info *info, int image,
-                            xmlNode *orig_totalbytes_node)
+                            struct xml_node *orig_totalbytes_element)
  {
         /* Restore the IMAGE elements if needed.  */
         if (image != WIMLIB_ALL_IMAGES) {
@@ -1224,15 +1022,15 @@ restore_document_after_write(struct wim_xml_info *info, int image,
                  * elements to the document.  */
                 for (int i = 0; i < info->image_count; i++)
                         if (i + 1 != image)
-                               xmlAddChild(info->root, info->images[i]);
+                               xml_add_child(info->root, info->images[i]);
  
                 /* Restore the original INDEX attributes.  */
                 swap_index_attributes(info->images[0], info->images[image - 1]);
         }
  
         /* Restore the original TOTALBYTES element if needed.  */
-       if (orig_totalbytes_node)
-               node_replace_child_element(info->root, orig_totalbytes_node);
+       if (orig_totalbytes_element)
+               xml_replace_child(info->root, orig_totalbytes_element);
  }
  
  /*
@@ -1250,45 +1048,29 @@ write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
                    struct wim_reshdr *out_reshdr, int write_resource_flags)
  {
         struct wim_xml_info *info = wim->xml_info;
-       long ret;
-       long ret2;
-       xmlBuffer *buffer;
-       xmlNode *orig_totalbytes_node;
-       xmlSaveCtxt *save_ctx;
+       int ret;
+       struct xml_node *orig_totalbytes_element;
+       struct xml_out_buf buf = {};
+       const utf16lechar *raw_doc;
+       size_t raw_doc_size;
  
         /* Make any needed temporary changes to the document.  */
         ret = prepare_document_for_write(info, image, total_bytes,
-                                        &orig_totalbytes_node);
+                                        &orig_totalbytes_element);
         if (ret)
                 goto out;
  
-       /* Create an in-memory buffer to hold the encoded document.  */
-       ret = WIMLIB_ERR_NOMEM;
-       buffer = xmlBufferCreate();
-       if (!buffer)
+       ret = xml_write_document(info->root, &buf);
+       if (ret)
                 goto out_restore_document;
  
-       /* Encode the document in UTF-16LE, with a byte order mark, and with no
-        * XML declaration.  Some other WIM software requires all of these
-        * characteristics.  */
-       ret = WIMLIB_ERR_NOMEM;
-       if (xmlBufferCat(buffer, "\xff\xfe"))
-               goto out_free_buffer;
-       save_ctx = xmlSaveToBuffer(buffer, "UTF-16LE", XML_SAVE_NO_DECL);
-       if (!save_ctx)
-               goto out_free_buffer;
-       ret = xmlSaveDoc(save_ctx, info->doc);
-       ret2 = xmlSaveClose(save_ctx);
-       if (ret < 0 || ret2 < 0) {
-               ERROR("Unable to serialize the WIM file's XML document!");
-               ret = WIMLIB_ERR_NOMEM;
-               goto out_free_buffer;
-       }
+       ret = tstr_get_utf16le_and_len(buf.buf, &raw_doc, &raw_doc_size);
+       if (ret)
+               goto out_restore_document;
  
         /* Write the XML data uncompressed.  Although wimlib can handle
          * compressed XML data, some other WIM software cannot.  */
-       ret = write_wim_resource_from_buffer(xmlBufferContent(buffer),
-                                            xmlBufferLength(buffer),
+       ret = write_wim_resource_from_buffer(raw_doc, raw_doc_size,
                                              true,
                                              &wim->out_fd,
                                              WIMLIB_COMPRESSION_TYPE_NONE,
@@ -1296,39 +1078,15 @@ write_wim_xml_data(WIMStruct *wim, int image, u64 total_bytes,
                                              out_reshdr,
                                              NULL,
                                              write_resource_flags);
-out_free_buffer:
-       xmlBufferFree(buffer);
+       tstr_put_utf16le(raw_doc);
  out_restore_document:
         /* Revert any temporary changes we made to the document.  */
-       restore_document_after_write(info, image, orig_totalbytes_node);
+       restore_document_after_write(info, image, orig_totalbytes_element);
+       FREE(buf.buf);
  out:
         return ret;
  }
  
-/*----------------------------------------------------------------------------*
- *                           Global setup functions                           *
- *----------------------------------------------------------------------------*/
-
-void
-xml_global_init(void)
-{
-       xmlInitParser();
-}
-
-void
-xml_global_cleanup(void)
-{
-       xmlCleanupParser();
-}
-
-void
-xml_set_memory_allocator(void *(*malloc_func)(size_t),
-                        void (*free_func)(void *),
-                        void *(*realloc_func)(void *, size_t))
-{
-       xmlMemSetup(free_func, malloc_func, realloc_func, wimlib_strdup);
-}
-
  /*----------------------------------------------------------------------------*
   *                           Library API functions                            *
   *----------------------------------------------------------------------------*/
@@ -1373,24 +1131,22 @@ static bool
  image_name_in_use(const WIMStruct *wim, const tchar *name, int excluded_image)
  {
         const struct wim_xml_info *info = wim->xml_info;
-       const xmlChar *name_utf8;
-       bool found = false;
+       const tchar *existing_name;
  
         /* Any number of images can have "no name".  */
         if (!name || !*name)
                 return false;
  
         /* Check for images that have the specified name.  */
-       if (tstr_get_utf8(name, &name_utf8))
-               return false;
-       for (int i = 0; i < info->image_count && !found; i++) {
+       for (int i = 0; i < info->image_count; i++) {
                 if (i + 1 == excluded_image)
                         continue;
-               found = xmlStrEqual(name_utf8, xml_get_text_by_path(
-                                                   info->images[i], "NAME"));
+               existing_name = xml_get_text_by_path(info->images[i],
+                                                    T("NAME"));
+               if (existing_name && !tstrcmp(existing_name, name))
+                       return true;
         }
-       tstr_put_utf8(name_utf8);
-       return found;
+       return false;
  }
  
  WIMLIBAPI bool
@@ -1421,19 +1177,13 @@ WIMLIBAPI const tchar *
  wimlib_get_image_property(const WIMStruct *wim, int image,
                           const tchar *property_name)
  {
-       const xmlChar *name;
-       const tchar *value;
-       struct wim_xml_info *info = wim->xml_info;
+       const struct wim_xml_info *info = wim->xml_info;
  
         if (!property_name || !*property_name)
                 return NULL;
         if (image < 1 || image > info->image_count)
                 return NULL;
-       if (tstr_get_utf8(property_name, &name))
-               return NULL;
-       value = xml_get_ttext_by_path(info, info->images[image - 1], name);
-       tstr_put_utf8(name);
-       return value;
+       return xml_get_text_by_path(info->images[image - 1], property_name);
  }
  
  WIMLIBAPI int
@@ -1445,7 +1195,8 @@ wimlib_set_image_name(WIMStruct *wim, int image, const tchar *name)
  WIMLIBAPI int
  wimlib_set_image_descripton(WIMStruct *wim, int image, const tchar *description)
  {
-       return wimlib_set_image_property(wim, image, T("DESCRIPTION"), description);
+       return wimlib_set_image_property(wim, image, T("DESCRIPTION"),
+                                        description);
  }
  
  WIMLIBAPI int
@@ -1458,13 +1209,22 @@ WIMLIBAPI int
  wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
                           const tchar *property_value)
  {
-       const xmlChar *name;
         struct wim_xml_info *info = wim->xml_info;
-       int ret;
  
         if (!property_name || !*property_name)
                 return WIMLIB_ERR_INVALID_PARAM;
  
+       if (!xml_legal_name(property_name)) {
+               ERROR("Property name '%"TS"' is illegal in XML", property_name);
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+
+       if (property_value && !xml_legal_value(property_value)) {
+               WARNING("Value of property '%"TS"' contains illegal characters",
+                       property_name);
+               return WIMLIB_ERR_INVALID_PARAM;
+       }
+
         if (image < 1 || image > info->image_count)
                 return WIMLIB_ERR_INVALID_IMAGE;
  
@@ -1472,10 +1232,6 @@ wimlib_set_image_property(WIMStruct *wim, int image, const tchar *property_name,
             image_name_in_use(wim, property_value, image))
                 return WIMLIB_ERR_IMAGE_NAME_COLLISION;
  
-       ret = tstr_get_utf8(property_name, &name);
-       if (ret)
-               return ret;
-       ret = xml_set_ttext_by_path(info->images[image - 1], name, property_value);
-       tstr_put_utf8(name);
-       return ret;
+       return xml_set_text_by_path(info->images[image - 1], property_name,
+                                   property_value);
  }
diff --git a/src/xmlproc.c b/src/xmlproc.c

new file mode 100644 (file)

index 0000000..529fd69
--- /dev/null
+++ b/src/xmlproc.c
@@ -0,0 +1,766 @@
+/*
+ * xmlproc.c
+ *
+ * A simple XML 1.0 processor.  This handles all XML features that are used in
+ * WIM files, plus a bit more for futureproofing.  It omits problematic
+ * features, such as expansion of entities other than simple escape sequences.
+ */
+
+/*
+ * Copyright 2023 Eric Biggers
+ *
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <string.h>
+
+#include "wimlib/error.h"
+#include "wimlib/test_support.h"
+#include "wimlib/util.h"
+#include "wimlib/xmlproc.h"
+
+/*----------------------------------------------------------------------------*
+ *                         XML node utility functions                         *
+ *----------------------------------------------------------------------------*/
+
+static tchar *
+tstrdupz(const tchar *str, size_t len)
+{
+       tchar *new_str = CALLOC(len + 1, sizeof(str[0]));
+
+       if (new_str)
+               tmemcpy(new_str, str, len);
+       return new_str;
+}
+
+static struct xml_node *
+xml_new_node(struct xml_node *parent, enum xml_node_type type,
+            const tchar *name, size_t name_len,
+            const tchar *value, size_t value_len)
+{
+       struct xml_node *node = CALLOC(1, sizeof(*node));
+
+       if (!node)
+               return NULL;
+       node->type = type;
+       INIT_LIST_HEAD(&node->children);
+       if (name) {
+               node->name = tstrdupz(name, name_len);
+               if (!node->name)
+                       goto oom;
+       }
+       if (value) {
+               node->value = tstrdupz(value, value_len);
+               if (!node->value)
+                       goto oom;
+       }
+       if (parent)
+               xml_add_child(parent, node);
+       return node;
+
+oom:
+       xml_free_node(node);
+       return NULL;
+}
+
+/*
+ * Create a new ELEMENT node, and if @parent is non-NULL add the new node under
+ * @parent which should be another ELEMENT.
+ */
+struct xml_node *
+xml_new_element(struct xml_node *parent, const tchar *name)
+{
+       return xml_new_node(parent, XML_ELEMENT_NODE, name, tstrlen(name),
+                           NULL, 0);
+}
+
+/*
+ * Create a new ELEMENT node with an attached TEXT node, and if @parent is
+ * non-NULL add the new ELEMENT under @parent which should be another ELEMENT.
+ */
+struct xml_node *
+xml_new_element_with_text(struct xml_node *parent, const tchar *name,
+                         const tchar *text)
+{
+       struct xml_node *element = xml_new_element(parent, name);
+
+       if (element && xml_element_set_text(element, text) != 0) {
+               xml_free_node(element);
+               return NULL;
+       }
+       return element;
+}
+
+/* Append @child to the children list of @parent. */
+void
+xml_add_child(struct xml_node *parent, struct xml_node *child)
+{
+       xml_unlink_node(child); /* Shouldn't be needed, but be safe. */
+       child->parent = parent;
+       list_add_tail(&child->sibling_link, &parent->children);
+}
+
+/* Unlink @node from its parent, if it has one. */
+void
+xml_unlink_node(struct xml_node *node)
+{
+       if (node->parent) {
+               list_del(&node->sibling_link);
+               node->parent = NULL;
+       }
+}
+
+static void
+xml_free_children(struct xml_node *parent)
+{
+       struct xml_node *child, *tmp;
+
+       list_for_each_entry_safe(child, tmp, &parent->children, sibling_link)
+               xml_free_node(child);
+}
+
+/* Recursively free @node, first unlinking it if needed.  @node may be NULL. */
+void
+xml_free_node(struct xml_node *node)
+{
+       if (node) {
+               xml_unlink_node(node);
+               xml_free_children(node);
+               FREE(node->name);
+               FREE(node->value);
+               FREE(node);
+       }
+}
+
+/*
+ * Return the text from the first TEXT child node of @element, or NULL if no
+ * such node exists.  @element may be NULL.
+ */
+const tchar *
+xml_element_get_text(const struct xml_node *element)
+{
+       const struct xml_node *child;
+
+       xml_node_for_each_child(element, child)
+               if (child->type == XML_TEXT_NODE)
+                       return child->value;
+       return NULL;
+}
+
+/*
+ * Set the contents of the given @element to the given @text, replacing the
+ * entire existing contents if any.
+ */
+int
+xml_element_set_text(struct xml_node *element, const tchar *text)
+{
+       struct xml_node *text_node = xml_new_node(NULL, XML_TEXT_NODE, NULL, 0,
+                                                 text, tstrlen(text));
+       if (!text_node)
+               return WIMLIB_ERR_NOMEM;
+       xml_free_children(element);
+       xml_add_child(element, text_node);
+       return 0;
+}
+
+static int
+xml_element_append_text(struct xml_node *element,
+                       const tchar *text, size_t text_len)
+{
+       struct xml_node *last_child;
+
+       if (!list_empty(&element->children) &&
+           (last_child =
+            list_last_entry(&element->children, struct xml_node,
+                            sibling_link))->type == XML_TEXT_NODE) {
+               /*
+                * The new TEXT would directly follow another TEXT, so simplify
+                * the tree by just appending to the existing TEXT.  (This case
+                * can theoretically be reached via the use of CDATA...)
+                */
+               size_t old_len = tstrlen(last_child->value);
+               tchar *new_value = CALLOC(old_len + text_len + 1,
+                                         sizeof(new_value[0]));
+               if (!new_value)
+                       return WIMLIB_ERR_NOMEM;
+               tmemcpy(new_value, last_child->value, old_len);
+               tmemcpy(&new_value[old_len], text, text_len);
+               FREE(last_child->value);
+               last_child->value = new_value;
+               return 0;
+       }
+       if (!xml_new_node(element, XML_TEXT_NODE, NULL, 0, text, text_len))
+               return WIMLIB_ERR_NOMEM;
+       return 0;
+}
+
+/* Find the attribute with the given @name on @element. */
+struct xml_node *
+xml_get_attrib(const struct xml_node *element, const tchar *name)
+{
+       struct xml_node *child;
+
+       xml_node_for_each_child(element, child) {
+               if (child->type == XML_ATTRIBUTE_NODE &&
+                   !tstrcmp(child->name, name))
+                       return child;
+       }
+       return NULL;
+}
+
+/* Set the attribute @name=@value on the given @element. */
+int
+xml_set_attrib(struct xml_node *element, const tchar *name, const tchar *value)
+{
+       struct xml_node *attrib = xml_new_node(NULL, XML_ATTRIBUTE_NODE,
+                                              name, tstrlen(name),
+                                              value, tstrlen(value));
+       if (!attrib)
+               return WIMLIB_ERR_NOMEM;
+       xml_replace_child(element, attrib);
+       return 0;
+}
+
+/*
+ * Add the ELEMENT or ATTRIBUTE node @replacement under the ELEMENT @parent,
+ * replacing any node with the same type and name that already exists.
+ */
+void
+xml_replace_child(struct xml_node *parent, struct xml_node *replacement)
+{
+       struct xml_node *child;
+
+       xml_unlink_node(replacement); /* Shouldn't be needed, but be safe. */
+
+       xml_node_for_each_child(parent, child) {
+               if (child->type == replacement->type &&
+                   !tstrcmp(child->name, replacement->name)) {
+                       list_replace(&child->sibling_link,
+                                    &replacement->sibling_link);
+                       replacement->parent = parent;
+                       child->parent = NULL;
+                       xml_free_node(child);
+                       return;
+               }
+       }
+       xml_add_child(parent, replacement);
+}
+
+struct xml_node *
+xml_clone_tree(struct xml_node *orig)
+{
+       struct xml_node *clone, *orig_child, *clone_child;
+
+       clone = xml_new_node(NULL, orig->type,
+                       orig->name, orig->name ? tstrlen(orig->name) : 0,
+                       orig->value, orig->value ? tstrlen(orig->value) : 0);
+       if (!clone)
+               return NULL;
+       xml_node_for_each_child(orig, orig_child) {
+               clone_child = xml_clone_tree(orig_child);
+               if (!clone_child)
+                       goto oom;
+               xml_add_child(clone, clone_child);
+       }
+       return clone;
+
+oom:
+       xml_free_node(clone);
+       return NULL;
+}
+
+/*----------------------------------------------------------------------------*
+ *                           XML string validation                            *
+ *----------------------------------------------------------------------------*/
+
+/*
+ * Functions that check for legal names and values in XML 1.0.  These are
+ * currently slightly over-lenient, as they allow everything non-ASCII.  These
+ * are also not currently used by the XML parser to reject non-well-formed
+ * documents, but rather just by the user of the XML processor (xml.c) in order
+ * to avoid introducing illegal names and values into the document.
+ */
+
+static inline bool
+is_whitespace(tchar c)
+{
+       return c == ' ' || c == '\n' || c == '\r' || c == '\t';
+}
+
+static inline bool
+is_name_start_char(tchar c)
+{
+       return (c & 0x7f) != c /* overly lenient for now */ ||
+               (c >= 'A' && c <= 'Z') ||
+               (c >= 'a' && c <= 'z') ||
+               c == ':' || c == '_';
+}
+
+static inline bool
+is_name_char(tchar c)
+{
+       return is_name_start_char(c) ||
+               (c >= '0' && c <= '9') || c == '-' || c == '.';
+}
+
+bool
+xml_legal_name(const tchar *p)
+{
+       if (!is_name_start_char(*p))
+               return false;
+       for (p = p + 1; *p; p++) {
+               if (!is_name_char(*p))
+                       return false;
+       }
+       return true;
+}
+
+bool
+xml_legal_value(const tchar *p)
+{
+       for (; *p; p++) {
+               if (*p < 0x20 && !is_whitespace(*p))
+                       return false;
+       }
+       return true;
+}
+
+#if TCHAR_IS_UTF16LE
+#define BYTE_ORDER_MARK        (tchar[]){ 0xfeff, 0 }
+#else
+#define BYTE_ORDER_MARK        "\xEF\xBB\xBF"
+#endif
+
+/*----------------------------------------------------------------------------*
+ *                               XML parsing                                  *
+ *----------------------------------------------------------------------------*/
+
+#define CHECK(cond)    if (!(cond)) goto bad
+
+static inline void
+skip_whitespace(const tchar **pp)
+{
+       const tchar *p = *pp;
+
+       while (is_whitespace(*p))
+               p++;
+       *pp = p;
+}
+
+static inline bool
+skip_string(const tchar **pp, const tchar *str)
+{
+       const tchar *p = *pp;
+       size_t len = tstrlen(str);
+
+       if (tstrncmp(p, str, len))
+               return false;
+       *pp = p + len;
+       return true;
+}
+
+static inline bool
+find_and_skip(const tchar **pp, const tchar *str)
+{
+       const tchar *p = *pp;
+
+       p = tstrstr(p, str);
+       if (!p)
+               return false;
+       *pp = p + tstrlen(str);
+       return true;
+}
+
+static bool
+skip_misc(const tchar **pp)
+{
+       const tchar *p = *pp, *prev_p;
+
+       do {
+               prev_p = p;
+               skip_whitespace(&p);
+               /* Discard XML declaration and top-level PIs for now. */
+               if (skip_string(&p, T("<?")) && !find_and_skip(&p, T("?>")))
+                       return false;
+               /* Discard DOCTYPE declaration for now. */
+               if (skip_string(&p, T("<!DOCTYPE")) && !find_and_skip(&p, T(">")))
+                       return false;
+               /* Discard top-level comments for now. */
+               if (skip_string(&p, T("<!--")) && !find_and_skip(&p, T("-->")))
+                       return false;
+       } while (p != prev_p);
+       *pp = p;
+       return true;
+}
+
+static inline const tchar *
+get_escape_seq(tchar c)
+{
+       switch (c) {
+       case '<':
+               return T("&lt;");
+       case '>':
+               return T("&gt;");
+       case '&':
+               return T("&amp;");
+       case '\'':
+               return T("&apos;");
+       case '"':
+               return T("&quot;");
+       }
+       return NULL;
+}
+
+/* Note: 'str' must be NUL-terminated, but only 'len' chars are used. */
+static int
+unescape_string(const tchar *str, size_t len, tchar **unescaped_ret)
+{
+       const tchar *in_p = str;
+       tchar *unescaped, *out_p;
+
+       unescaped = CALLOC(len + 1, sizeof(str[0]));
+       if (!unescaped)
+               return WIMLIB_ERR_NOMEM;
+       out_p = unescaped;
+       while (in_p < &str[len]) {
+               if (*in_p != '&')
+                       *out_p++ = *in_p++;
+               else if (skip_string(&in_p, T("&lt;")))
+                       *out_p++ = '<';
+               else if (skip_string(&in_p, T("&gt;")))
+                       *out_p++ = '>';
+               else if (skip_string(&in_p, T("&amp;")))
+                       *out_p++ = '&';
+               else if (skip_string(&in_p, T("&apos;")))
+                       *out_p++ = '\'';
+               else if (skip_string(&in_p, T("&quot;")))
+                       *out_p++ = '"';
+               else
+                       goto bad;
+       }
+       if (in_p > &str[len])
+               goto bad;
+       *unescaped_ret = unescaped;
+       return 0;
+
+bad:
+       ERROR("Error unescaping string '%.*"TS"'", (int)len, str);
+       FREE(unescaped);
+       return WIMLIB_ERR_XML;
+}
+
+static int
+parse_element(const tchar **pp, struct xml_node *parent, int depth,
+             struct xml_node **node_ret);
+
+static int
+parse_contents(const tchar **pp, struct xml_node *element, int depth)
+{
+       const tchar *p = *pp;
+       int ret;
+
+       for (;;) {
+               const tchar *raw_text = p;
+               tchar *text;
+
+               for (; *p != '<'; p++) {
+                       if (*p == '\0')
+                               return WIMLIB_ERR_XML;
+               }
+               if (p > raw_text) {
+                       ret = unescape_string(raw_text, p - raw_text, &text);
+                       if (ret)
+                               return ret;
+                       ret = xml_element_append_text(element, text,
+                                                     tstrlen(text));
+                       FREE(text);
+                       if (ret)
+                               return ret;
+               }
+               if (p[1] == '/') {
+                       break; /* Reached the end tag of @element */
+               } else if (p[1] == '?') {
+                       /* Discard processing instructions for now. */
+                       p += 2;
+                       if (!find_and_skip(&p, T("?>")))
+                               return WIMLIB_ERR_XML;
+                       continue;
+               } else if (p[1] == '!') {
+                       if (skip_string(&p, T("<![CDATA["))) {
+                               raw_text = p;
+                               if (!find_and_skip(&p, T("]]>")))
+                                       return WIMLIB_ERR_XML;
+                               ret = xml_element_append_text(element, raw_text,
+                                                             p - 3 - raw_text);
+                               if (ret)
+                                       return ret;
+                               continue;
+                       } else if (skip_string(&p, T("<!--"))) {
+                               /* Discard comments for now. */
+                               if (!find_and_skip(&p, T("-->")))
+                                       return WIMLIB_ERR_XML;
+                               continue;
+                       }
+                       return WIMLIB_ERR_XML;
+               }
+               ret = parse_element(&p, element, depth + 1, NULL);
+               if (ret)
+                       return ret;
+       }
+       *pp = p;
+       return 0;
+}
+
+static int
+parse_element(const tchar **pp, struct xml_node *parent, int depth,
+             struct xml_node **element_ret)
+{
+       const tchar *p = *pp;
+       struct xml_node *element = NULL;
+       const tchar *name_start;
+       size_t name_len;
+       int ret;
+
+       /* Parse the start tag. */
+       CHECK(depth < 50);
+       CHECK(*p == '<');
+       p++;
+       name_start = p;
+       while (!is_whitespace(*p) && *p != '>' && *p != '\0')
+               p++;
+       name_len = p - name_start;
+       CHECK(name_len > 0);
+       element = xml_new_node(parent, XML_ELEMENT_NODE, name_start, name_len,
+                              NULL, 0);
+       if (!element) {
+               ret = WIMLIB_ERR_NOMEM;
+               goto error;
+       }
+       /* Parse the attributes list within the start tag. */
+       while (is_whitespace(*p)) {
+               const tchar *attr_name_start, *attr_value_start;
+               size_t attr_name_len, attr_value_len;
+               tchar *attr_value;
+               tchar quote;
+
+               skip_whitespace(&p);
+               if (*p == '/' || *p == '>')
+                       break;
+               attr_name_start = p;
+               while (*p != '=' && !is_whitespace(*p) && *p != '\0')
+                       p++;
+               attr_name_len = p - attr_name_start;
+               skip_whitespace(&p);
+               CHECK(attr_name_len > 0 && *p == '=');
+               p++;
+               skip_whitespace(&p);
+               quote = *p;
+               CHECK(quote == '\'' || quote == '"');
+               attr_value_start = ++p;
+               while (*p != quote && *p != '\0')
+                       p++;
+               CHECK(*p == quote);
+               attr_value_len = p - attr_value_start;
+               p++;
+               ret = unescape_string(attr_value_start, attr_value_len,
+                                     &attr_value);
+               if (ret)
+                       goto error;
+               ret = xml_new_node(element, XML_ATTRIBUTE_NODE,
+                                  attr_name_start, attr_name_len,
+                                  attr_value, tstrlen(attr_value))
+                       ? 0 : WIMLIB_ERR_NOMEM;
+               FREE(attr_value);
+               if (ret)
+                       goto error;
+       }
+       if (*p == '/') {
+               /* Closing an empty element tag */
+               p++;
+               CHECK(*p == '>');
+               p++;
+       } else {
+               /* Closing the start tag */
+               CHECK(*p == '>');
+               p++;
+               /* Parse the contents, then the end tag. */
+               ret = parse_contents(&p, element, depth);
+               if (ret)
+                       goto error;
+               CHECK(*p == '<');
+               p++;
+               CHECK(*p == '/');
+               p++;
+               CHECK(!tstrncmp(p, name_start, name_len));
+               p += name_len;
+               skip_whitespace(&p);
+               CHECK(*p == '>');
+               p++;
+       }
+       *pp = p;
+       if (element_ret)
+               *element_ret = element;
+       return 0;
+
+error:
+       xml_free_node(element);
+       return ret;
+
+bad:
+       ret = WIMLIB_ERR_XML;
+       goto error;
+}
+
+/*
+ * Deserialize an XML document and return its root node in @doc_ret.  The
+ * document must be given as a NUL-terminated string of 'tchar', i.e. UTF-16LE
+ * in Windows builds and UTF-8 everywhere else.
+ */
+int
+xml_parse_document(const tchar *p, struct xml_node **doc_ret)
+{
+       int ret;
+       struct xml_node *doc;
+
+       skip_string(&p, BYTE_ORDER_MARK);
+       if (!skip_misc(&p))
+               return WIMLIB_ERR_XML;
+       ret = parse_element(&p, NULL, 0, &doc);
+       if (ret)
+               return ret;
+       if (!skip_misc(&p) || *p) {
+               xml_free_node(doc);
+               return WIMLIB_ERR_XML;
+       }
+       *doc_ret = doc;
+       return 0;
+}
+
+/*----------------------------------------------------------------------------*
+ *                               XML writing                                  *
+ *----------------------------------------------------------------------------*/
+
+static void
+xml_write(struct xml_out_buf *buf, const tchar *str, size_t len)
+{
+       if (buf->count + len + 1 > buf->capacity) {
+               size_t new_capacity = max(buf->capacity * 2, 4096);
+               tchar *new_buf = REALLOC(buf->buf,
+                                        new_capacity * sizeof(str[0]));
+               if (!new_buf) {
+                       buf->oom = true;
+                       return;
+               }
+               buf->buf = new_buf;
+               buf->capacity = new_capacity;
+       }
+       tmemcpy(&buf->buf[buf->count], str, len);
+       buf->count += len;
+}
+
+static void
+xml_puts(struct xml_out_buf *buf, const tchar *str)
+{
+       xml_write(buf, str, tstrlen(str));
+}
+
+static void
+xml_escape_and_puts(struct xml_out_buf *buf, const tchar *str)
+{
+       const tchar *p = str, *saved, *seq = NULL;
+
+       for (;; p++) {
+               for (saved = p; *p && (seq = get_escape_seq(*p)) == NULL; p++)
+                       ;
+               xml_write(buf, saved, p - saved);
+               if (!*p)
+                       return;
+               xml_puts(buf, seq);
+       }
+}
+
+static void
+xml_write_element(struct xml_node *node, struct xml_out_buf *buf)
+{
+       struct xml_node *child;
+
+       /* Write the start tag. */
+       xml_puts(buf, T("<"));
+       xml_puts(buf, node->name);
+       xml_node_for_each_child(node, child) {
+               if (child->type == XML_ATTRIBUTE_NODE) {
+                       xml_puts(buf, T(" "));
+                       xml_puts(buf, child->name);
+                       xml_puts(buf, T("=\""));
+                       xml_escape_and_puts(buf, child->value);
+                       xml_puts(buf, T("\""));
+               }
+       }
+       xml_puts(buf, T(">"));
+
+       /* Write the contents. */
+       xml_node_for_each_child(node, child) {
+               if (child->type == XML_TEXT_NODE)
+                       xml_escape_and_puts(buf, child->value);
+               else if (child->type == XML_ELEMENT_NODE)
+                       xml_write_element(child, buf);
+       }
+
+       /* Write the end tag. */
+       xml_puts(buf, T("</"));
+       xml_puts(buf, node->name);
+       xml_puts(buf, T(">"));
+}
+
+/*
+ * Serialize the document @doc into @buf as a NUL-terminated string of 'tchar',
+ * i.e. UTF-16LE in Windows builds and UTF-8 everywhere else.  A byte order mark
+ * (BOM) is included, as this is needed for compatibility with WIMGAPI.
+ */
+int
+xml_write_document(struct xml_node *doc, struct xml_out_buf *buf)
+{
+       xml_puts(buf, BYTE_ORDER_MARK);
+       xml_write_element(doc, buf);
+       if (buf->oom)
+               return WIMLIB_ERR_NOMEM;
+       buf->buf[buf->count] = '\0';
+       return 0;
+}
+
+/*----------------------------------------------------------------------------*
+ *                              Test support                                  *
+ *----------------------------------------------------------------------------*/
+
+#ifdef ENABLE_TEST_SUPPORT
+WIMLIBAPI int
+wimlib_parse_and_write_xml_doc(const tchar *in, tchar **out_ret)
+{
+       struct xml_node *doc;
+       struct xml_out_buf buf = {};
+       int ret;
+
+       ret = xml_parse_document(in, &doc);
+       if (ret)
+               return ret;
+       ret = xml_write_document(doc, &buf);
+       xml_free_node(doc);
+       *out_ret = buf.buf;
+       return ret;
+}
+#endif /* ENABLE_TEST_SUPPORT */
diff --git a/tools/make-windows-release b/tools/make-windows-release

index d95c6986100ff28848e7bfc395193b7d0e1cd363..e233909e5201f9bf8b186b68a556cc0fc40ad819 100755 (executable)
--- a/tools/make-windows-release
+++ b/tools/make-windows-release
@@ -31,14 +31,6 @@ VERSION=$(tools/get-version-number)
  DESTDIR=wimlib-${VERSION}-windows-${ARCH}-bin
  ZIPFILE=wimlib-${VERSION}-windows-${ARCH}-bin.zip
  MAKE="make -j $(grep -c processor /proc/cpuinfo)"
-WINDEPDIR=./tools/windeps
-SYSROOT=$WINDEPDIR/sysroot_${ARCH}
-
-# Prepare third party libraries
-
-if [ ! -e $SYSROOT ]; then
-       $MAKE -C $WINDEPDIR sysroot_${ARCH}
-fi
  
  # Compile wimlib
  
@@ -50,16 +42,8 @@ then
         # Note: putting -static-libgcc in CC is a workaround for libtool
         # stripping it:
         # http://www.gnu.org/software/libtool/manual/libtool.html#Stripped-link-flags
-       #
-       # We also need to override the MinGW pkg-config with the "native" one in
-       # order for it to correctly restrict the include path to our $SYSROOT.
         ./configure --host=${ARCH}-w64-mingw32 --disable-static         \
-               CC="${ARCH}-w64-mingw32-gcc -static-libgcc"             \
-               CPPFLAGS="-I$SYSROOT/include"                           \
-               LDFLAGS="-L$SYSROOT/lib"                                \
-               PKG_CONFIG=pkg-config                                   \
-               PKG_CONFIG_LIBDIR="$SYSROOT/lib/pkgconfig"              \
-               "$@"
+               CC="${ARCH}-w64-mingw32-gcc -static-libgcc" "$@"
         $MAKE clean
  fi
  $MAKE
@@ -77,7 +61,6 @@ ${ARCH}-w64-mingw32-strip $DESTDIR/*.{dll,exe}
  # Install text files
  
  cp NEWS README* COPYING* $DESTDIR
-cp $WINDEPDIR/COPYING* $DESTDIR
  
  sed -n '/^#/q; s/^[\/\* ]*//; p' src/divsufsort.c > $DESTDIR/COPYING.libdivsufsort-lite
  if ! grep -q 'Copyright' $DESTDIR/COPYING.libdivsufsort-lite; then
diff --git a/tools/run-sparse b/tools/run-sparse

index dafad10ec621772fbfbf9a8c628012b187b002f4..828aebe774661ad1ce7e8617eb9f17e40c747fd8 100755 (executable)
--- a/tools/run-sparse
+++ b/tools/run-sparse
@@ -3,6 +3,5 @@
  for fil in src/*.c programs/imagex.c; do
         sparse $fil -gcc-base-dir `gcc --print-file-name=`              \
                 -D_FILE_OFFSET_BITS=64 -DHAVE_CONFIG_H -D_GNU_SOURCE    \
-               -I. -Iinclude -I/usr/include/libxml2                    \
-               -Wbitwise -Wpointer-subtraction-blows
+               -I. -Iinclude -Wbitwise -Wpointer-subtraction-blows
  done
diff --git a/tools/windeps/Makefile b/tools/windeps/Makefile

deleted file mode 100644 (file)

index 8e88327..0000000
--- a/tools/windeps/Makefile
+++ /dev/null
@@ -1,77 +0,0 @@
-#
-# This Makefile builds the third-party libraries needed to build a standalone
-# libwim.dll for Windows.  We build these third-party libraries ourselves mainly
-# to cut down on bloat.  They are automatically downloaded from the URLs
-# declared below and verified against the checksums given in the 'sha256sums'
-# file.
-#
-# This Makefile requires a GNU toolchain with MinGW-w64 (i686 and x86_64
-# versions).
-#
-
-ARCHITECTURES          := i686 x86_64
-
-LIBXML2_VERSION                := 2.10.3
-LIBXML_URL             := https://download.gnome.org/sources/libxml2/2.10/libxml2-$(LIBXML2_VERSION).tar.xz
-LIBXML_SRCDIR          := libxml2-$(LIBXML2_VERSION)
-LIBXML_DIST            := $(LIBXML_SRCDIR).tar.xz
-SRCDIR_TARGETS         += $(LIBXML_SRCDIR)
-DIST_TARGETS           += $(LIBXML_DIST)
-$(LIBXML_DIST):
-       wget $(LIBXML_URL)
-$(LIBXML_SRCDIR):$(LIBXML_DIST) checksums_verified
-       tar xvf $<
-       cp $@/Copyright COPYING.libxml2
-MAKE_CLEAN_FILES += $(LIBXML_SRCDIR) COPYING.libxml2
-
-checksums_verified:$(DIST_TARGETS)
-       sha256sum -c sha256sums
-
-#
-# declare_libxml_target(arch)
-#
-define declare_libxml_target
-libxml_$(1):$(LIBXML_SRCDIR)
-       builddir=build_libxml_$(1);                             \
-       rm -rf $$$$builddir;                                    \
-       mkdir $$$$builddir;                                     \
-       cd $$$$builddir;                                        \
-       ../$(LIBXML_SRCDIR)/configure                           \
-               --host=$(1)-w64-mingw32                         \
-               --enable-static                                 \
-               --disable-shared                                \
-               --prefix=$$$$PWD/../sysroot_$(1)                \
-               CFLAGS=-Os                                      \
-               --with-minimum                                  \
-               --without-lzma                                  \
-               --with-tree                                     \
-               --with-writer;                                  \
-       $(MAKE) install;                                        \
-       rm -f ../sysroot_$(1)/lib/libxml2.la;
-
-$(1)_BUILD_TARGETS += libxml_$(1)
-MAKE_CLEAN_FILES += build_libxml_$(1)
-endef
-
-#
-# declare_arch_targets(arch)
-#
-define declare_arch_targets
-$(eval $(call declare_libxml_target,$(1)))
-
-sysroot_$(1): $($(1)_BUILD_TARGETS)
-
-ALL_SYSROOTS += sysroot_$(1)
-MAKE_CLEAN_FILES += sysroot_$(1)
-endef
-
-$(foreach arch,$(ARCHITECTURES),$(eval $(call declare_arch_targets,$(arch))))
-
-all: $(ALL_SYSROOTS)
-
-clean:
-       rm -rf $(MAKE_CLEAN_FILES) $(DIST_TARGETS)
-
-.PHONY: all clean $(SRCDIR_TARGETS) checksums_verified
-
-.DEFAULT_GOAL = all
diff --git a/tools/windeps/sha256sums b/tools/windeps/sha256sums

deleted file mode 100644 (file)

index 2318951..0000000
--- a/tools/windeps/sha256sums
+++ /dev/null
@@ -1 +0,0 @@
-5d2cc3d78bec3dbe212a9d7fa629ada25a7da928af432c93060ff5c17ee28a9c  libxml2-2.10.3.tar.xz
author	Eric Biggers <ebiggers3@gmail.com>
	Mon, 27 Mar 2023 00:25:46 +0000 (17:25 -0700)
committer	Eric Biggers <ebiggers3@gmail.com>
	Mon, 27 Mar 2023 00:25:46 +0000 (17:25 -0700)
.github/workflows/ci.yml		patch \| blob \| history
.gitignore		patch \| blob \| history
Makefile.am		patch \| blob \| history
NEWS		patch \| blob \| history
README		patch \| blob \| history
README.WINDOWS		patch \| blob \| history
configure.ac		patch \| blob \| history
include/wimlib/test_support.h		patch \| blob \| history
include/wimlib/xml.h		patch \| blob \| history
include/wimlib/xmlproc.h	[new file with mode: 0644]	patch \| blob
include/wimlib_tchar.h		patch \| blob \| history
src/util.c		patch \| blob \| history
src/wim.c		patch \| blob \| history
src/xml.c		patch \| blob \| history
src/xmlproc.c	[new file with mode: 0644]	patch \| blob
tools/make-windows-release		patch \| blob \| history
tools/run-sparse		patch \| blob \| history
tools/windeps/Makefile	[deleted file]	patch \| blob \| history
tools/windeps/sha256sums	[deleted file]	patch \| blob \| history