wimlib.net Git - wimlib/blob - src/xmlproc.c

   1 /*
   2  * xmlproc.c
   3  *
   4  * A simple XML 1.0 processor.  This handles all XML features that are used in
   5  * WIM files, plus a bit more for futureproofing.  It omits problematic
   6  * features, such as expansion of entities other than simple escape sequences.
   7  */
   8
   9 /*
  10  * Copyright 2023 Eric Biggers
  11  *
  12  * This file is free software; you can redistribute it and/or modify it under
  13  * the terms of the GNU Lesser General Public License as published by the Free
  14  * Software Foundation; either version 3 of the License, or (at your option) any
  15  * later version.
  16  *
  17  * This file is distributed in the hope that it will be useful, but WITHOUT
  18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  19  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  20  * details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public License
  23  * along with this file; if not, see https://www.gnu.org/licenses/.
  24  */
  25
  26 #ifdef HAVE_CONFIG_H
  27 #  include "config.h"
  28 #endif
  29
  30 #include <string.h>
  31
  32 #include "wimlib/error.h"
  33 #include "wimlib/test_support.h"
  34 #include "wimlib/util.h"
  35 #include "wimlib/xmlproc.h"
  36
  37 /*----------------------------------------------------------------------------*
  38  *                         XML node utility functions                         *
  39  *----------------------------------------------------------------------------*/
  40
  41 static tchar *
  42 tstrdupz(const tchar *str, size_t len)
  43 {
  44         tchar *new_str = CALLOC(len + 1, sizeof(str[0]));
  45
  46         if (new_str)
  47                 tmemcpy(new_str, str, len);
  48         return new_str;
  49 }
  50
  51 static struct xml_node *
  52 xml_new_node(struct xml_node *parent, enum xml_node_type type,
  53              const tchar *name, size_t name_len,
  54              const tchar *value, size_t value_len)
  55 {
  56         struct xml_node *node = CALLOC(1, sizeof(*node));
  57
  58         if (!node)
  59                 return NULL;
  60         node->type = type;
  61         INIT_LIST_HEAD(&node->children);
  62         if (name) {
  63                 node->name = tstrdupz(name, name_len);
  64                 if (!node->name)
  65                         goto oom;
  66         }
  67         if (value) {
  68                 node->value = tstrdupz(value, value_len);
  69                 if (!node->value)
  70                         goto oom;
  71         }
  72         if (parent)
  73                 xml_add_child(parent, node);
  74         return node;
  75
  76 oom:
  77         xml_free_node(node);
  78         return NULL;
  79 }
  80
  81 /*
  82  * Create a new ELEMENT node, and if @parent is non-NULL add the new node under
  83  * @parent which should be another ELEMENT.
  84  */
  85 struct xml_node *
  86 xml_new_element(struct xml_node *parent, const tchar *name)
  87 {
  88         return xml_new_node(parent, XML_ELEMENT_NODE, name, tstrlen(name),
  89                             NULL, 0);
  90 }
  91
  92 /*
  93  * Create a new ELEMENT node with an attached TEXT node, and if @parent is
  94  * non-NULL add the new ELEMENT under @parent which should be another ELEMENT.
  95  */
  96 struct xml_node *
  97 xml_new_element_with_text(struct xml_node *parent, const tchar *name,
  98                           const tchar *text)
  99 {
 100         struct xml_node *element = xml_new_element(parent, name);
 101
 102         if (element && xml_element_set_text(element, text) != 0) {
 103                 xml_free_node(element);
 104                 return NULL;
 105         }
 106         return element;
 107 }
 108
 109 /* Append @child to the children list of @parent. */
 110 void
 111 xml_add_child(struct xml_node *parent, struct xml_node *child)
 112 {
 113         xml_unlink_node(child); /* Shouldn't be needed, but be safe. */
 114         child->parent = parent;
 115         list_add_tail(&child->sibling_link, &parent->children);
 116 }
 117
 118 /* Unlink @node from its parent, if it has one. */
 119 void
 120 xml_unlink_node(struct xml_node *node)
 121 {
 122         if (node->parent) {
 123                 list_del(&node->sibling_link);
 124                 node->parent = NULL;
 125         }
 126 }
 127
 128 static void
 129 xml_free_children(struct xml_node *parent)
 130 {
 131         struct xml_node *child, *tmp;
 132
 133         list_for_each_entry_safe(child, tmp, &parent->children, sibling_link)
 134                 xml_free_node(child);
 135 }
 136
 137 /* Recursively free @node, first unlinking it if needed.  @node may be NULL. */
 138 void
 139 xml_free_node(struct xml_node *node)
 140 {
 141         if (node) {
 142                 xml_unlink_node(node);
 143                 xml_free_children(node);
 144                 FREE(node->name);
 145                 FREE(node->value);
 146                 FREE(node);
 147         }
 148 }
 149
 150 /*
 151  * Return the text from the first TEXT child node of @element, or NULL if no
 152  * such node exists.  @element may be NULL.
 153  */
 154 const tchar *
 155 xml_element_get_text(const struct xml_node *element)
 156 {
 157         const struct xml_node *child;
 158
 159         xml_node_for_each_child(element, child)
 160                 if (child->type == XML_TEXT_NODE)
 161                         return child->value;
 162         return NULL;
 163 }
 164
 165 /*
 166  * Set the contents of the given @element to the given @text, replacing the
 167  * entire existing contents if any.
 168  */
 169 int
 170 xml_element_set_text(struct xml_node *element, const tchar *text)
 171 {
 172         struct xml_node *text_node = xml_new_node(NULL, XML_TEXT_NODE, NULL, 0,
 173                                                   text, tstrlen(text));
 174         if (!text_node)
 175                 return WIMLIB_ERR_NOMEM;
 176         xml_free_children(element);
 177         xml_add_child(element, text_node);
 178         return 0;
 179 }
 180
 181 static int
 182 xml_element_append_text(struct xml_node *element,
 183                         const tchar *text, size_t text_len)
 184 {
 185         struct xml_node *last_child;
 186
 187         if (!list_empty(&element->children) &&
 188             (last_child =
 189              list_last_entry(&element->children, struct xml_node,
 190                              sibling_link))->type == XML_TEXT_NODE) {
 191                 /*
 192                  * The new TEXT would directly follow another TEXT, so simplify
 193                  * the tree by just appending to the existing TEXT.  (This case
 194                  * can theoretically be reached via the use of CDATA...)
 195                  */
 196                 size_t old_len = tstrlen(last_child->value);
 197                 tchar *new_value = CALLOC(old_len + text_len + 1,
 198                                           sizeof(new_value[0]));
 199                 if (!new_value)
 200                         return WIMLIB_ERR_NOMEM;
 201                 tmemcpy(new_value, last_child->value, old_len);
 202                 tmemcpy(&new_value[old_len], text, text_len);
 203                 FREE(last_child->value);
 204                 last_child->value = new_value;
 205                 return 0;
 206         }
 207         if (!xml_new_node(element, XML_TEXT_NODE, NULL, 0, text, text_len))
 208                 return WIMLIB_ERR_NOMEM;
 209         return 0;
 210 }
 211
 212 /* Find the attribute with the given @name on @element. */
 213 struct xml_node *
 214 xml_get_attrib(const struct xml_node *element, const tchar *name)
 215 {
 216         struct xml_node *child;
 217
 218         xml_node_for_each_child(element, child) {
 219                 if (child->type == XML_ATTRIBUTE_NODE &&
 220                     !tstrcmp(child->name, name))
 221                         return child;
 222         }
 223         return NULL;
 224 }
 225
 226 /* Set the attribute @name=@value on the given @element. */
 227 int
 228 xml_set_attrib(struct xml_node *element, const tchar *name, const tchar *value)
 229 {
 230         struct xml_node *attrib = xml_new_node(NULL, XML_ATTRIBUTE_NODE,
 231                                                name, tstrlen(name),
 232                                                value, tstrlen(value));
 233         if (!attrib)
 234                 return WIMLIB_ERR_NOMEM;
 235         xml_replace_child(element, attrib);
 236         return 0;
 237 }
 238
 239 /*
 240  * Add the ELEMENT or ATTRIBUTE node @replacement under the ELEMENT @parent,
 241  * replacing any node with the same type and name that already exists.
 242  */
 243 void
 244 xml_replace_child(struct xml_node *parent, struct xml_node *replacement)
 245 {
 246         struct xml_node *child;
 247
 248         xml_unlink_node(replacement); /* Shouldn't be needed, but be safe. */
 249
 250         xml_node_for_each_child(parent, child) {
 251                 if (child->type == replacement->type &&
 252                     !tstrcmp(child->name, replacement->name)) {
 253                         list_replace(&child->sibling_link,
 254                                      &replacement->sibling_link);
 255                         replacement->parent = parent;
 256                         child->parent = NULL;
 257                         xml_free_node(child);
 258                         return;
 259                 }
 260         }
 261         xml_add_child(parent, replacement);
 262 }
 263
 264 struct xml_node *
 265 xml_clone_tree(struct xml_node *orig)
 266 {
 267         struct xml_node *clone, *orig_child, *clone_child;
 268
 269         clone = xml_new_node(NULL, orig->type,
 270                         orig->name, orig->name ? tstrlen(orig->name) : 0,
 271                         orig->value, orig->value ? tstrlen(orig->value) : 0);
 272         if (!clone)
 273                 return NULL;
 274         xml_node_for_each_child(orig, orig_child) {
 275                 clone_child = xml_clone_tree(orig_child);
 276                 if (!clone_child)
 277                         goto oom;
 278                 xml_add_child(clone, clone_child);
 279         }
 280         return clone;
 281
 282 oom:
 283         xml_free_node(clone);
 284         return NULL;
 285 }
 286
 287 /*----------------------------------------------------------------------------*
 288  *                           XML string validation                            *
 289  *----------------------------------------------------------------------------*/
 290
 291 /*
 292  * Functions that check for legal names and values in XML 1.0.  These are
 293  * currently slightly over-lenient, as they allow everything non-ASCII.  These
 294  * are also not currently used by the XML parser to reject non-well-formed
 295  * documents, but rather just by the user of the XML processor (xml.c) in order
 296  * to avoid introducing illegal names and values into the document.
 297  */
 298
 299 static inline bool
 300 is_whitespace(tchar c)
 301 {
 302         return c == ' ' || c == '\n' || c == '\r' || c == '\t';
 303 }
 304
 305 static inline bool
 306 is_name_start_char(tchar c)
 307 {
 308         return (c & 0x7f) != c /* overly lenient for now */ ||
 309                 (c >= 'A' && c <= 'Z') ||
 310                 (c >= 'a' && c <= 'z') ||
 311                 c == ':' || c == '_';
 312 }
 313
 314 static inline bool
 315 is_name_char(tchar c)
 316 {
 317         return is_name_start_char(c) ||
 318                 (c >= '0' && c <= '9') || c == '-' || c == '.';
 319 }
 320
 321 /* Allow characters used in element "paths"; see do_xml_path_walk() */
 322 static inline bool
 323 is_path_char(tchar c)
 324 {
 325         return c == '/' || c == '[' || c == ']';
 326 }
 327
 328 bool
 329 xml_legal_path(const tchar *p)
 330 {
 331         if (!is_name_start_char(*p) && !is_path_char(*p))
 332                 return false;
 333         for (p = p + 1; *p; p++) {
 334                 if (!is_name_char(*p) && !is_path_char(*p))
 335                         return false;
 336         }
 337         return true;
 338 }
 339
 340 bool
 341 xml_legal_value(const tchar *p)
 342 {
 343         for (; *p; p++) {
 344                 if (*p < 0x20 && !is_whitespace(*p))
 345                         return false;
 346         }
 347         return true;
 348 }
 349
 350 #if TCHAR_IS_UTF16LE
 351 #define BYTE_ORDER_MARK (tchar[]){ 0xfeff, 0 }
 352 #else
 353 #define BYTE_ORDER_MARK "\xEF\xBB\xBF"
 354 #endif
 355
 356 /*----------------------------------------------------------------------------*
 357  *                               XML parsing                                  *
 358  *----------------------------------------------------------------------------*/
 359
 360 #define CHECK(cond)     if (!(cond)) goto bad
 361
 362 static inline void
 363 skip_whitespace(const tchar **pp)
 364 {
 365         const tchar *p = *pp;
 366
 367         while (is_whitespace(*p))
 368                 p++;
 369         *pp = p;
 370 }
 371
 372 static inline bool
 373 skip_string(const tchar **pp, const tchar *str)
 374 {
 375         const tchar *p = *pp;
 376         size_t len = tstrlen(str);
 377
 378         if (tstrncmp(p, str, len))
 379                 return false;
 380         *pp = p + len;
 381         return true;
 382 }
 383
 384 static inline bool
 385 find_and_skip(const tchar **pp, const tchar *str)
 386 {
 387         const tchar *p = *pp;
 388
 389         p = tstrstr(p, str);
 390         if (!p)
 391                 return false;
 392         *pp = p + tstrlen(str);
 393         return true;
 394 }
 395
 396 static bool
 397 skip_misc(const tchar **pp)
 398 {
 399         const tchar *p = *pp, *prev_p;
 400
 401         do {
 402                 prev_p = p;
 403                 skip_whitespace(&p);
 404                 /* Discard XML declaration and top-level PIs for now. */
 405                 if (skip_string(&p, T("<?")) && !find_and_skip(&p, T("?>")))
 406                         return false;
 407                 /* Discard DOCTYPE declaration for now. */
 408                 if (skip_string(&p, T("<!DOCTYPE")) && !find_and_skip(&p, T(">")))
 409                         return false;
 410                 /* Discard top-level comments for now. */
 411                 if (skip_string(&p, T("<!--")) && !find_and_skip(&p, T("-->")))
 412                         return false;
 413         } while (p != prev_p);
 414         *pp = p;
 415         return true;
 416 }
 417
 418 static inline const tchar *
 419 get_escape_seq(tchar c)
 420 {
 421         switch (c) {
 422         case '<':
 423                 return T("&lt;");
 424         case '>':
 425                 return T("&gt;");
 426         case '&':
 427                 return T("&amp;");
 428         case '\'':
 429                 return T("&apos;");
 430         case '"':
 431                 return T("&quot;");
 432         }
 433         return NULL;
 434 }
 435
 436 /* Note: 'str' must be NUL-terminated, but only 'len' chars are used. */
 437 static int
 438 unescape_string(const tchar *str, size_t len, tchar **unescaped_ret)
 439 {
 440         const tchar *in_p = str;
 441         tchar *unescaped, *out_p;
 442
 443         unescaped = CALLOC(len + 1, sizeof(str[0]));
 444         if (!unescaped)
 445                 return WIMLIB_ERR_NOMEM;
 446         out_p = unescaped;
 447         while (in_p < &str[len]) {
 448                 if (*in_p != '&')
 449                         *out_p++ = *in_p++;
 450                 else if (skip_string(&in_p, T("&lt;")))
 451                         *out_p++ = '<';
 452                 else if (skip_string(&in_p, T("&gt;")))
 453                         *out_p++ = '>';
 454                 else if (skip_string(&in_p, T("&amp;")))
 455                         *out_p++ = '&';
 456                 else if (skip_string(&in_p, T("&apos;")))
 457                         *out_p++ = '\'';
 458                 else if (skip_string(&in_p, T("&quot;")))
 459                         *out_p++ = '"';
 460                 else
 461                         goto bad;
 462         }
 463         if (in_p > &str[len])
 464                 goto bad;
 465         *unescaped_ret = unescaped;
 466         return 0;
 467
 468 bad:
 469         ERROR("Error unescaping string '%.*"TS"'", (int)len, str);
 470         FREE(unescaped);
 471         return WIMLIB_ERR_XML;
 472 }
 473
 474 static int
 475 parse_element(const tchar **pp, struct xml_node *parent, int depth,
 476               struct xml_node **node_ret);
 477
 478 static int
 479 parse_contents(const tchar **pp, struct xml_node *element, int depth)
 480 {
 481         const tchar *p = *pp;
 482         int ret;
 483
 484         for (;;) {
 485                 const tchar *raw_text = p;
 486                 tchar *text;
 487
 488                 for (; *p != '<'; p++) {
 489                         if (*p == '\0')
 490                                 return WIMLIB_ERR_XML;
 491                 }
 492                 if (p > raw_text) {
 493                         ret = unescape_string(raw_text, p - raw_text, &text);
 494                         if (ret)
 495                                 return ret;
 496                         ret = xml_element_append_text(element, text,
 497                                                       tstrlen(text));
 498                         FREE(text);
 499                         if (ret)
 500                                 return ret;
 501                 }
 502                 if (p[1] == '/') {
 503                         break; /* Reached the end tag of @element */
 504                 } else if (p[1] == '?') {
 505                         /* Discard processing instructions for now. */
 506                         p += 2;
 507                         if (!find_and_skip(&p, T("?>")))
 508                                 return WIMLIB_ERR_XML;
 509                         continue;
 510                 } else if (p[1] == '!') {
 511                         if (skip_string(&p, T("<![CDATA["))) {
 512                                 raw_text = p;
 513                                 if (!find_and_skip(&p, T("]]>")))
 514                                         return WIMLIB_ERR_XML;
 515                                 ret = xml_element_append_text(element, raw_text,
 516                                                               p - 3 - raw_text);
 517                                 if (ret)
 518                                         return ret;
 519                                 continue;
 520                         } else if (skip_string(&p, T("<!--"))) {
 521                                 /* Discard comments for now. */
 522                                 if (!find_and_skip(&p, T("-->")))
 523                                         return WIMLIB_ERR_XML;
 524                                 continue;
 525                         }
 526                         return WIMLIB_ERR_XML;
 527                 }
 528                 ret = parse_element(&p, element, depth + 1, NULL);
 529                 if (ret)
 530                         return ret;
 531         }
 532         *pp = p;
 533         return 0;
 534 }
 535
 536 static int
 537 parse_element(const tchar **pp, struct xml_node *parent, int depth,
 538               struct xml_node **element_ret)
 539 {
 540         const tchar *p = *pp;
 541         struct xml_node *element = NULL;
 542         const tchar *name_start;
 543         size_t name_len;
 544         int ret;
 545
 546         /* Parse the start tag. */
 547         CHECK(depth < 50);
 548         CHECK(*p == '<');
 549         p++;
 550         name_start = p;
 551         while (!is_whitespace(*p) && *p != '>' && *p != '/' && *p != '\0')
 552                 p++;
 553         name_len = p - name_start;
 554         CHECK(name_len > 0);
 555         element = xml_new_node(parent, XML_ELEMENT_NODE, name_start, name_len,
 556                                NULL, 0);
 557         if (!element) {
 558                 ret = WIMLIB_ERR_NOMEM;
 559                 goto error;
 560         }
 561         /* Parse the attributes list within the start tag. */
 562         while (is_whitespace(*p)) {
 563                 const tchar *attr_name_start, *attr_value_start;
 564                 size_t attr_name_len, attr_value_len;
 565                 tchar *attr_value;
 566                 tchar quote;
 567
 568                 skip_whitespace(&p);
 569                 if (*p == '/' || *p == '>')
 570                         break;
 571                 attr_name_start = p;
 572                 while (*p != '=' && !is_whitespace(*p) && *p != '\0')
 573                         p++;
 574                 attr_name_len = p - attr_name_start;
 575                 skip_whitespace(&p);
 576                 CHECK(attr_name_len > 0 && *p == '=');
 577                 p++;
 578                 skip_whitespace(&p);
 579                 quote = *p;
 580                 CHECK(quote == '\'' || quote == '"');
 581                 attr_value_start = ++p;
 582                 while (*p != quote && *p != '\0')
 583                         p++;
 584                 CHECK(*p == quote);
 585                 attr_value_len = p - attr_value_start;
 586                 p++;
 587                 ret = unescape_string(attr_value_start, attr_value_len,
 588                                       &attr_value);
 589                 if (ret)
 590                         goto error;
 591                 ret = xml_new_node(element, XML_ATTRIBUTE_NODE,
 592                                    attr_name_start, attr_name_len,
 593                                    attr_value, tstrlen(attr_value))
 594                         ? 0 : WIMLIB_ERR_NOMEM;
 595                 FREE(attr_value);
 596                 if (ret)
 597                         goto error;
 598         }
 599         if (*p == '/') {
 600                 /* Closing an empty element tag */
 601                 p++;
 602         } else {
 603                 /* Closing the start tag */
 604                 CHECK(*p == '>');
 605                 p++;
 606                 /* Parse the contents, then the end tag. */
 607                 ret = parse_contents(&p, element, depth);
 608                 if (ret)
 609                         goto error;
 610                 CHECK(*p == '<');
 611                 p++;
 612                 CHECK(*p == '/');
 613                 p++;
 614                 CHECK(!tstrncmp(p, name_start, name_len));
 615                 p += name_len;
 616                 skip_whitespace(&p);
 617         }
 618         CHECK(*p == '>');
 619         p++;
 620         *pp = p;
 621         if (element_ret)
 622                 *element_ret = element;
 623         return 0;
 624
 625 error:
 626         xml_free_node(element);
 627         return ret;
 628
 629 bad:
 630         ret = WIMLIB_ERR_XML;
 631         goto error;
 632 }
 633
 634 /*
 635  * Deserialize an XML document and return its root node in @doc_ret.  The
 636  * document must be given as a NUL-terminated string of 'tchar', i.e. UTF-16LE
 637  * in Windows builds and UTF-8 everywhere else.
 638  */
 639 int
 640 xml_parse_document(const tchar *p, struct xml_node **doc_ret)
 641 {
 642         int ret;
 643         struct xml_node *doc;
 644
 645         skip_string(&p, BYTE_ORDER_MARK);
 646         if (!skip_misc(&p))
 647                 return WIMLIB_ERR_XML;
 648         ret = parse_element(&p, NULL, 0, &doc);
 649         if (ret)
 650                 return ret;
 651         if (!skip_misc(&p) || *p) {
 652                 xml_free_node(doc);
 653                 return WIMLIB_ERR_XML;
 654         }
 655         *doc_ret = doc;
 656         return 0;
 657 }
 658
 659 /*----------------------------------------------------------------------------*
 660  *                               XML writing                                  *
 661  *----------------------------------------------------------------------------*/
 662
 663 static void
 664 xml_write(struct xml_out_buf *buf, const tchar *str, size_t len)
 665 {
 666         if (buf->count + len + 1 > buf->capacity) {
 667                 size_t new_capacity = max(buf->capacity * 2, 4096);
 668                 tchar *new_buf = REALLOC(buf->buf,
 669                                          new_capacity * sizeof(str[0]));
 670                 if (!new_buf) {
 671                         buf->oom = true;
 672                         return;
 673                 }
 674                 buf->buf = new_buf;
 675                 buf->capacity = new_capacity;
 676         }
 677         tmemcpy(&buf->buf[buf->count], str, len);
 678         buf->count += len;
 679 }
 680
 681 static void
 682 xml_puts(struct xml_out_buf *buf, const tchar *str)
 683 {
 684         xml_write(buf, str, tstrlen(str));
 685 }
 686
 687 static void
 688 xml_escape_and_puts(struct xml_out_buf *buf, const tchar *str)
 689 {
 690         const tchar *p = str, *saved, *seq = NULL;
 691
 692         for (;; p++) {
 693                 for (saved = p; *p && (seq = get_escape_seq(*p)) == NULL; p++)
 694                         ;
 695                 xml_write(buf, saved, p - saved);
 696                 if (!*p)
 697                         return;
 698                 xml_puts(buf, seq);
 699         }
 700 }
 701
 702 static void
 703 xml_write_element(struct xml_node *element, struct xml_out_buf *buf)
 704 {
 705         struct xml_node *child;
 706
 707         /* Write the start tag. */
 708         xml_puts(buf, T("<"));
 709         xml_puts(buf, element->name);
 710         xml_node_for_each_child(element, child) {
 711                 if (child->type == XML_ATTRIBUTE_NODE) {
 712                         xml_puts(buf, T(" "));
 713                         xml_puts(buf, child->name);
 714                         xml_puts(buf, T("=\""));
 715                         xml_escape_and_puts(buf, child->value);
 716                         xml_puts(buf, T("\""));
 717                 }
 718         }
 719         xml_puts(buf, T(">"));
 720
 721         /* Write the contents. */
 722         xml_node_for_each_child(element, child) {
 723                 if (child->type == XML_TEXT_NODE)
 724                         xml_escape_and_puts(buf, child->value);
 725                 else if (child->type == XML_ELEMENT_NODE)
 726                         xml_write_element(child, buf);
 727         }
 728
 729         /* Write the end tag. */
 730         xml_puts(buf, T("</"));
 731         xml_puts(buf, element->name);
 732         xml_puts(buf, T(">"));
 733 }
 734
 735 /*
 736  * Serialize the document @doc into @buf as a NUL-terminated string of 'tchar',
 737  * i.e. UTF-16LE in Windows builds and UTF-8 everywhere else.  A byte order mark
 738  * (BOM) is included, as this is needed for compatibility with WIMGAPI.
 739  */
 740 int
 741 xml_write_document(struct xml_node *doc, struct xml_out_buf *buf)
 742 {
 743         xml_puts(buf, BYTE_ORDER_MARK);
 744         xml_write_element(doc, buf);
 745         if (buf->oom)
 746                 return WIMLIB_ERR_NOMEM;
 747         buf->buf[buf->count] = '\0';
 748         return 0;
 749 }
 750
 751 /*----------------------------------------------------------------------------*
 752  *                              Test support                                  *
 753  *----------------------------------------------------------------------------*/
 754
 755 #ifdef ENABLE_TEST_SUPPORT
 756 WIMLIBAPI int
 757 wimlib_parse_and_write_xml_doc(const tchar *in, tchar **out_ret)
 758 {
 759         struct xml_node *doc;
 760         struct xml_out_buf buf = {};
 761         int ret;
 762
 763         ret = xml_parse_document(in, &doc);
 764         if (ret)
 765                 return ret;
 766         ret = xml_write_document(doc, &buf);
 767         xml_free_node(doc);
 768         *out_ret = buf.buf;
 769         return ret;
 770 }
 771 #endif /* ENABLE_TEST_SUPPORT */