libFuzzer: add encoding fuzzer

author Eric Biggers <ebiggers3@gmail.com>

Mon, 10 Apr 2023 00:02:21 +0000 (17:02 -0700)

committer Eric Biggers <ebiggers3@gmail.com>

Mon, 10 Apr 2023 00:13:08 +0000 (17:13 -0700)
author Eric Biggers <ebiggers3@gmail.com>
Mon, 10 Apr 2023 00:02:21 +0000 (17:02 -0700)
committer Eric Biggers <ebiggers3@gmail.com>
Mon, 10 Apr 2023 00:13:08 +0000 (17:13 -0700)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml

index d7919922f016c6b55c3dd495c9b14d3913877dc0..d1d33b1fe338c4d71dd2bb2c4517c454e88a0407 100644 (file)
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -202,6 +202,8 @@ jobs:
            sanitizer:
          - target: wim
            sanitizer: --asan --ubsan
+        - target: encoding
+          sanitizer: --asan --ubsan
          - target: xmlproc
            sanitizer:
          - target: xmlproc
diff --git a/include/wimlib/test_support.h b/include/wimlib/test_support.h

index 909b643c5a425998a6b175c8518502fd9da71016..8104921ddaf5dea7ad04e7d10ed85e20a526288d 100644 (file)
--- a/include/wimlib/test_support.h
+++ b/include/wimlib/test_support.h
@@ -25,6 +25,14 @@ wimlib_compare_images(WIMStruct *wim1, int image1,
  WIMLIBAPI int
  wimlib_parse_and_write_xml_doc(const tchar *in, tchar **out_ret);
  
+WIMLIBAPI int
+wimlib_utf8_to_utf16le(const char *in, size_t in_nbytes,
+                      utf16lechar **out_ret, size_t *out_nbytes_ret);
+
+WIMLIBAPI int
+wimlib_utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes,
+                      char **out_ret, size_t *out_nbytes_ret);
+
  #endif /* ENABLE_TEST_SUPPORT */
  
  #endif /* _WIMLIB_TEST_SUPPORT_H */
diff --git a/src/encoding.c b/src/encoding.c

index fde3adb2f88e30554c8da70123106f223b6c25f6..41917c9d917cb56a51d83812b175913488388145 100644 (file)
--- a/src/encoding.c
+++ b/src/encoding.c
@@ -462,3 +462,22 @@ utf16le_len_chars(const utf16lechar *s)
  {
         return utf16le_len_bytes(s) / sizeof(utf16lechar);
  }
+
+#ifdef ENABLE_TEST_SUPPORT
+
+#include "wimlib/test_support.h"
+
+WIMLIBAPI int
+wimlib_utf8_to_utf16le(const char *in, size_t in_nbytes,
+                      utf16lechar **out_ret, size_t *out_nbytes_ret)
+{
+       return utf8_to_utf16le(in, in_nbytes, out_ret, out_nbytes_ret);
+}
+
+WIMLIBAPI int
+wimlib_utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes,
+                      char **out_ret, size_t *out_nbytes_ret)
+{
+       return utf16le_to_utf8(in, in_nbytes, out_ret, out_nbytes_ret);
+}
+#endif /* ENABLE_TEST_SUPPORT */
diff --git a/tools/libFuzzer/encoding/corpus/0 b/tools/libFuzzer/encoding/corpus/0

new file mode 100644 (file)

index 0000000..e0d84f4

Binary files /dev/null and b/tools/libFuzzer/encoding/corpus/0 differ
diff --git a/tools/libFuzzer/encoding/corpus/1 b/tools/libFuzzer/encoding/corpus/1

new file mode 100644 (file)

index 0000000..eb05829

Binary files /dev/null and b/tools/libFuzzer/encoding/corpus/1 differ
diff --git a/tools/libFuzzer/encoding/fuzz.c b/tools/libFuzzer/encoding/fuzz.c

new file mode 100644 (file)

index 0000000..16c7d13
--- /dev/null
+++ b/tools/libFuzzer/encoding/fuzz.c
@@ -0,0 +1,83 @@
+#include "../fuzzer.h"
+
+/*
+ * "UTF-8" (actually "WTF-8") to UTF-16LE (actually "arbitrary sequence of
+ * 16-bit wchars") and back again should be lossless, unless the initial string
+ * isn't valid WTF-8, in which case WIMLIB_ERR_INVALID_UTF8_STRING is expected.
+ */
+static void
+fuzz_utf8_roundtrip(const u8 *in, size_t insize)
+{
+       utf16lechar *utf16;
+       size_t utf16_size;
+       int ret;
+       char *result;
+       size_t result_size;
+
+       ret = wimlib_utf8_to_utf16le((const char *)in, insize,
+                                    &utf16, &utf16_size);
+       if (ret) {
+               assert(ret == WIMLIB_ERR_INVALID_UTF8_STRING);
+               return;
+       }
+       assert(ret == 0);
+       ret = wimlib_utf16le_to_utf8(utf16, utf16_size, &result, &result_size);
+       assert(ret == 0);
+       assert(result_size == insize);
+       assert(memcmp(result, in, insize) == 0);
+       free(result);
+       free(utf16);
+}
+
+/*
+ * "UTF-16LE" (actually "arbitrary sequence of 16-bit wchars") to UTF-8
+ * (actually "WTF-8") and back again should be lossless, unless the initial
+ * length isn't a multiple of 2 bytes, in which case
+ * WIMLIB_ERR_INVALID_UTF16_STRING is expected.
+ */
+static void
+fuzz_utf16_roundtrip(const u8 *in, size_t insize)
+{
+       utf16lechar *in_aligned = malloc(insize);
+       char *utf8;
+       size_t utf8_size;
+       int ret;
+       utf16lechar *result;
+       size_t result_size;
+
+       memcpy(in_aligned, in, insize);
+       ret = wimlib_utf16le_to_utf8(in_aligned, insize, &utf8, &utf8_size);
+       if (insize % 2) {
+               assert(ret == WIMLIB_ERR_INVALID_UTF16_STRING);
+               free(in_aligned);
+               return;
+       }
+       assert(ret == 0);
+       ret = wimlib_utf8_to_utf16le(utf8, utf8_size, &result, &result_size);
+       assert(ret == 0);
+       assert(result_size == insize);
+       assert(memcmp(result, in, insize) == 0);
+       free(result);
+       free(utf8);
+       free(in_aligned);
+}
+
+/* Fuzz character encoding conversion. */
+int LLVMFuzzerTestOneInput(const u8 *in, size_t insize)
+{
+       int which;
+
+       if (insize < 1)
+               return 0;
+       which = *in++;
+       insize--;
+       switch (which) {
+       case 0:
+               fuzz_utf8_roundtrip(in, insize);
+               break;
+       case 1:
+               fuzz_utf16_roundtrip(in, insize);
+               break;
+       }
+       return 0;
+}
author	Eric Biggers <ebiggers3@gmail.com>
	Mon, 10 Apr 2023 00:02:21 +0000 (17:02 -0700)
committer	Eric Biggers <ebiggers3@gmail.com>
	Mon, 10 Apr 2023 00:13:08 +0000 (17:13 -0700)
.github/workflows/ci.yml		patch \| blob \| history
include/wimlib/test_support.h		patch \| blob \| history
src/encoding.c		patch \| blob \| history
tools/libFuzzer/encoding/corpus/0	[new file with mode: 0644]	patch \| blob
tools/libFuzzer/encoding/corpus/1	[new file with mode: 0644]	patch \| blob
tools/libFuzzer/encoding/fuzz.c	[new file with mode: 0644]	patch \| blob