From e0c9e124bb3603ce4677efc137850d17f51b53fc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 9 Apr 2023 17:02:21 -0700 Subject: [PATCH] libFuzzer: add encoding fuzzer --- .github/workflows/ci.yml | 2 + include/wimlib/test_support.h | 8 +++ src/encoding.c | 19 +++++++ tools/libFuzzer/encoding/corpus/0 | Bin 0 -> 8 bytes tools/libFuzzer/encoding/corpus/1 | Bin 0 -> 9 bytes tools/libFuzzer/encoding/fuzz.c | 83 ++++++++++++++++++++++++++++++ 6 files changed, 112 insertions(+) create mode 100644 tools/libFuzzer/encoding/corpus/0 create mode 100644 tools/libFuzzer/encoding/corpus/1 create mode 100644 tools/libFuzzer/encoding/fuzz.c diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d7919922..d1d33b1f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -202,6 +202,8 @@ jobs: sanitizer: - target: wim sanitizer: --asan --ubsan + - target: encoding + sanitizer: --asan --ubsan - target: xmlproc sanitizer: - target: xmlproc diff --git a/include/wimlib/test_support.h b/include/wimlib/test_support.h index 909b643c..8104921d 100644 --- a/include/wimlib/test_support.h +++ b/include/wimlib/test_support.h @@ -25,6 +25,14 @@ wimlib_compare_images(WIMStruct *wim1, int image1, WIMLIBAPI int wimlib_parse_and_write_xml_doc(const tchar *in, tchar **out_ret); +WIMLIBAPI int +wimlib_utf8_to_utf16le(const char *in, size_t in_nbytes, + utf16lechar **out_ret, size_t *out_nbytes_ret); + +WIMLIBAPI int +wimlib_utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes, + char **out_ret, size_t *out_nbytes_ret); + #endif /* ENABLE_TEST_SUPPORT */ #endif /* _WIMLIB_TEST_SUPPORT_H */ diff --git a/src/encoding.c b/src/encoding.c index fde3adb2..41917c9d 100644 --- a/src/encoding.c +++ b/src/encoding.c @@ -462,3 +462,22 @@ utf16le_len_chars(const utf16lechar *s) { return utf16le_len_bytes(s) / sizeof(utf16lechar); } + +#ifdef ENABLE_TEST_SUPPORT + +#include "wimlib/test_support.h" + +WIMLIBAPI int +wimlib_utf8_to_utf16le(const char *in, size_t in_nbytes, + utf16lechar **out_ret, size_t *out_nbytes_ret) +{ + return utf8_to_utf16le(in, in_nbytes, out_ret, out_nbytes_ret); +} + +WIMLIBAPI int +wimlib_utf16le_to_utf8(const utf16lechar *in, size_t in_nbytes, + char **out_ret, size_t *out_nbytes_ret) +{ + return utf16le_to_utf8(in, in_nbytes, out_ret, out_nbytes_ret); +} +#endif /* ENABLE_TEST_SUPPORT */ diff --git a/tools/libFuzzer/encoding/corpus/0 b/tools/libFuzzer/encoding/corpus/0 new file mode 100644 index 0000000000000000000000000000000000000000..e0d84f457d2b5c802ecd286e7e576365d94af3b9 GIT binary patch literal 8 PcmZQzWMXDvWn%{b09*ha literal 0 HcmV?d00001 diff --git a/tools/libFuzzer/encoding/corpus/1 b/tools/libFuzzer/encoding/corpus/1 new file mode 100644 index 0000000000000000000000000000000000000000..eb05829c55908a177a9e90cfd2a657f43711f881 GIT binary patch literal 9 QcmZQzWME=oW?*3e001!n3jhEB literal 0 HcmV?d00001 diff --git a/tools/libFuzzer/encoding/fuzz.c b/tools/libFuzzer/encoding/fuzz.c new file mode 100644 index 00000000..16c7d138 --- /dev/null +++ b/tools/libFuzzer/encoding/fuzz.c @@ -0,0 +1,83 @@ +#include "../fuzzer.h" + +/* + * "UTF-8" (actually "WTF-8") to UTF-16LE (actually "arbitrary sequence of + * 16-bit wchars") and back again should be lossless, unless the initial string + * isn't valid WTF-8, in which case WIMLIB_ERR_INVALID_UTF8_STRING is expected. + */ +static void +fuzz_utf8_roundtrip(const u8 *in, size_t insize) +{ + utf16lechar *utf16; + size_t utf16_size; + int ret; + char *result; + size_t result_size; + + ret = wimlib_utf8_to_utf16le((const char *)in, insize, + &utf16, &utf16_size); + if (ret) { + assert(ret == WIMLIB_ERR_INVALID_UTF8_STRING); + return; + } + assert(ret == 0); + ret = wimlib_utf16le_to_utf8(utf16, utf16_size, &result, &result_size); + assert(ret == 0); + assert(result_size == insize); + assert(memcmp(result, in, insize) == 0); + free(result); + free(utf16); +} + +/* + * "UTF-16LE" (actually "arbitrary sequence of 16-bit wchars") to UTF-8 + * (actually "WTF-8") and back again should be lossless, unless the initial + * length isn't a multiple of 2 bytes, in which case + * WIMLIB_ERR_INVALID_UTF16_STRING is expected. + */ +static void +fuzz_utf16_roundtrip(const u8 *in, size_t insize) +{ + utf16lechar *in_aligned = malloc(insize); + char *utf8; + size_t utf8_size; + int ret; + utf16lechar *result; + size_t result_size; + + memcpy(in_aligned, in, insize); + ret = wimlib_utf16le_to_utf8(in_aligned, insize, &utf8, &utf8_size); + if (insize % 2) { + assert(ret == WIMLIB_ERR_INVALID_UTF16_STRING); + free(in_aligned); + return; + } + assert(ret == 0); + ret = wimlib_utf8_to_utf16le(utf8, utf8_size, &result, &result_size); + assert(ret == 0); + assert(result_size == insize); + assert(memcmp(result, in, insize) == 0); + free(result); + free(utf8); + free(in_aligned); +} + +/* Fuzz character encoding conversion. */ +int LLVMFuzzerTestOneInput(const u8 *in, size_t insize) +{ + int which; + + if (insize < 1) + return 0; + which = *in++; + insize--; + switch (which) { + case 0: + fuzz_utf8_roundtrip(in, insize); + break; + case 1: + fuzz_utf16_roundtrip(in, insize); + break; + } + return 0; +} -- 2.43.0