]> wimlib.net Git - wimlib/commitdiff
libFuzzer: add encoding fuzzer
authorEric Biggers <ebiggers3@gmail.com>
Mon, 10 Apr 2023 00:02:21 +0000 (17:02 -0700)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 10 Apr 2023 00:13:08 +0000 (17:13 -0700)
.github/workflows/ci.yml
include/wimlib/test_support.h
src/encoding.c
tools/libFuzzer/encoding/corpus/0 [new file with mode: 0644]
tools/libFuzzer/encoding/corpus/1 [new file with mode: 0644]
tools/libFuzzer/encoding/fuzz.c [new file with mode: 0644]

index d7919922f016c6b55c3dd495c9b14d3913877dc0..d1d33b1fe338c4d71dd2bb2c4517c454e88a0407 100644 (file)
@@ -202,6 +202,8 @@ jobs:
           sanitizer:
         - target: wim
           sanitizer: --asan --ubsan
+        - target: encoding
+          sanitizer: --asan --ubsan
         - target: xmlproc
           sanitizer:
         - target: xmlproc
index 909b643c5a425998a6b175c8518502fd9da71016..8104921ddaf5dea7ad04e7d10ed85e20a526288d 100644 (file)
@@ -25,6 +25,14 @@ wimlib_compare_images(WIMStruct *wim1, int image1,
 WIMLIBAPI int
 wimlib_parse_and_write_xml_doc(const tchar *in, tchar **out_ret);
 
+WIMLIBAPI int
+wimlib_utf8_to_utf16le(const char *in, size_t in_nbytes,
+                      utf16lechar **out_ret, size_t *out_nbytes_ret);
+
+WIMLIBAPI int
+wimlib_utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes,
+                      char **out_ret, size_t *out_nbytes_ret);
+
 #endif /* ENABLE_TEST_SUPPORT */
 
 #endif /* _WIMLIB_TEST_SUPPORT_H */
index fde3adb2f88e30554c8da70123106f223b6c25f6..41917c9d917cb56a51d83812b175913488388145 100644 (file)
@@ -462,3 +462,22 @@ utf16le_len_chars(const utf16lechar *s)
 {
        return utf16le_len_bytes(s) / sizeof(utf16lechar);
 }
+
+#ifdef ENABLE_TEST_SUPPORT
+
+#include "wimlib/test_support.h"
+
+WIMLIBAPI int
+wimlib_utf8_to_utf16le(const char *in, size_t in_nbytes,
+                      utf16lechar **out_ret, size_t *out_nbytes_ret)
+{
+       return utf8_to_utf16le(in, in_nbytes, out_ret, out_nbytes_ret);
+}
+
+WIMLIBAPI int
+wimlib_utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes,
+                      char **out_ret, size_t *out_nbytes_ret)
+{
+       return utf16le_to_utf8(in, in_nbytes, out_ret, out_nbytes_ret);
+}
+#endif /* ENABLE_TEST_SUPPORT */
diff --git a/tools/libFuzzer/encoding/corpus/0 b/tools/libFuzzer/encoding/corpus/0
new file mode 100644 (file)
index 0000000..e0d84f4
Binary files /dev/null and b/tools/libFuzzer/encoding/corpus/0 differ
diff --git a/tools/libFuzzer/encoding/corpus/1 b/tools/libFuzzer/encoding/corpus/1
new file mode 100644 (file)
index 0000000..eb05829
Binary files /dev/null and b/tools/libFuzzer/encoding/corpus/1 differ
diff --git a/tools/libFuzzer/encoding/fuzz.c b/tools/libFuzzer/encoding/fuzz.c
new file mode 100644 (file)
index 0000000..16c7d13
--- /dev/null
@@ -0,0 +1,83 @@
+#include "../fuzzer.h"
+
+/*
+ * "UTF-8" (actually "WTF-8") to UTF-16LE (actually "arbitrary sequence of
+ * 16-bit wchars") and back again should be lossless, unless the initial string
+ * isn't valid WTF-8, in which case WIMLIB_ERR_INVALID_UTF8_STRING is expected.
+ */
+static void
+fuzz_utf8_roundtrip(const u8 *in, size_t insize)
+{
+       utf16lechar *utf16;
+       size_t utf16_size;
+       int ret;
+       char *result;
+       size_t result_size;
+
+       ret = wimlib_utf8_to_utf16le((const char *)in, insize,
+                                    &utf16, &utf16_size);
+       if (ret) {
+               assert(ret == WIMLIB_ERR_INVALID_UTF8_STRING);
+               return;
+       }
+       assert(ret == 0);
+       ret = wimlib_utf16le_to_utf8(utf16, utf16_size, &result, &result_size);
+       assert(ret == 0);
+       assert(result_size == insize);
+       assert(memcmp(result, in, insize) == 0);
+       free(result);
+       free(utf16);
+}
+
+/*
+ * "UTF-16LE" (actually "arbitrary sequence of 16-bit wchars") to UTF-8
+ * (actually "WTF-8") and back again should be lossless, unless the initial
+ * length isn't a multiple of 2 bytes, in which case
+ * WIMLIB_ERR_INVALID_UTF16_STRING is expected.
+ */
+static void
+fuzz_utf16_roundtrip(const u8 *in, size_t insize)
+{
+       utf16lechar *in_aligned = malloc(insize);
+       char *utf8;
+       size_t utf8_size;
+       int ret;
+       utf16lechar *result;
+       size_t result_size;
+
+       memcpy(in_aligned, in, insize);
+       ret = wimlib_utf16le_to_utf8(in_aligned, insize, &utf8, &utf8_size);
+       if (insize % 2) {
+               assert(ret == WIMLIB_ERR_INVALID_UTF16_STRING);
+               free(in_aligned);
+               return;
+       }
+       assert(ret == 0);
+       ret = wimlib_utf8_to_utf16le(utf8, utf8_size, &result, &result_size);
+       assert(ret == 0);
+       assert(result_size == insize);
+       assert(memcmp(result, in, insize) == 0);
+       free(result);
+       free(utf8);
+       free(in_aligned);
+}
+
+/* Fuzz character encoding conversion. */
+int LLVMFuzzerTestOneInput(const u8 *in, size_t insize)
+{
+       int which;
+
+       if (insize < 1)
+               return 0;
+       which = *in++;
+       insize--;
+       switch (which) {
+       case 0:
+               fuzz_utf8_roundtrip(in, insize);
+               break;
+       case 1:
+               fuzz_utf16_roundtrip(in, insize);
+               break;
+       }
+       return 0;
+}