From 503c0ad976f5e4ada2d1b47ae05aa7133351b645 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sun, 24 Mar 2024 16:28:34 -0700 Subject: [PATCH] Fix convert_case(), introduced in 5c40364dd6. Check source length before checking for NUL terminator to avoid reading one byte past the string end. Also fix unreachable bug when caller does not expect NUL-terminated result. Add unit test coverage of convert_case() in case_test.c, which makes it easier to reproduce the valgrind failure. Discussion: https://postgr.es/m/7a9fd36d-7a38-4dc2-e676-fc939491a95a@gmail.com Reported-by: Alexander Lakhin --- src/common/unicode/case_test.c | 101 ++++++++++++++++++++++++++++++++- src/common/unicode_case.c | 6 +- 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/common/unicode/case_test.c b/src/common/unicode/case_test.c index 7b82d5e0aad..85a43decb8f 100644 --- a/src/common/unicode/case_test.c +++ b/src/common/unicode/case_test.c @@ -48,6 +48,9 @@ icu_test_simple(pg_wchar code) } } +/* + * Exhaustively compare case mappings with the results from ICU. + */ static void test_icu(void) { @@ -82,9 +85,100 @@ test_icu(void) } #endif -/* - * Exhaustively compare case mappings with the results from libc and ICU. - */ +static void +test_strlower(const char *test_string, const char *expected) +{ + size_t src1len = strlen(test_string); + size_t src2len = -1; /* NUL-terminated */ + size_t dst1len = strlen(expected); + size_t dst2len = strlen(expected) + 1; /* NUL-terminated */ + char *src1 = malloc(src1len); + char *dst1 = malloc(dst1len); + char *src2 = strdup(test_string); + char *dst2 = malloc(dst2len); + size_t needed; + + memcpy(src1, test_string, src1len); /* not NUL-terminated */ + + /* neither source nor destination are NUL-terminated */ + memset(dst1, 0x7F, dst1len); + needed = unicode_strlower(dst1, dst1len, src1, src1len); + if (needed != strlen(expected)) + { + printf("case_test: convert_case test1 FAILURE: needed %zu\n", needed); + exit(1); + } + if (memcmp(dst1, expected, dst1len) != 0) + { + printf("case_test: convert_case test1 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n", + test_string, (int) dst1len, dst1, expected); + exit(1); + } + + /* destination is NUL-terminated and source is not */ + memset(dst2, 0x7F, dst2len); + needed = unicode_strlower(dst2, dst2len, src1, src1len); + if (needed != strlen(expected)) + { + printf("case_test: convert_case test2 FAILURE: needed %zu\n", needed); + exit(1); + } + if (strcmp(dst2, expected) != 0) + { + printf("case_test: convert_case test2 FAILURE: test: '%s' result: '%s' expected: '%s'\n", + test_string, dst2, expected); + exit(1); + } + + /* source is NUL-terminated and destination is not */ + memset(dst1, 0x7F, dst1len); + needed = unicode_strlower(dst1, dst1len, src2, src2len); + if (needed != strlen(expected)) + { + printf("case_test: convert_case test3 FAILURE: needed %zu\n", needed); + exit(1); + } + if (memcmp(dst1, expected, dst1len) != 0) + { + printf("case_test: convert_case test3 FAILURE: test: '%s' result: '%.*s' expected: '%s'\n", + test_string, (int) dst1len, dst1, expected); + exit(1); + } + + /* both source and destination are NUL-terminated */ + memset(dst2, 0x7F, dst2len); + needed = unicode_strlower(dst2, dst2len, src2, src2len); + if (needed != strlen(expected)) + { + printf("case_test: convert_case test4 FAILURE: needed %zu\n", needed); + exit(1); + } + if (strcmp(dst2, expected) != 0) + { + printf("case_test: convert_case test4 FAILURE: test: '%s' result: '%s' expected: '%s'\n", + test_string, dst2, expected); + exit(1); + } + + free(src1); + free(dst1); + free(src2); + free(dst2); +} + +static void +test_convert_case() +{ + /* test string with no case changes */ + test_strlower("√∞", "√∞"); + /* test string with case changes */ + test_strlower("ABC", "abc"); + /* test string with case changes and byte length changes */ + test_strlower("ȺȺȺ", "ⱥⱥⱥ"); + + printf("case_test: convert_case: success\n"); +} + int main(int argc, char **argv) { @@ -96,5 +190,6 @@ main(int argc, char **argv) printf("case_test: ICU not available; skipping\n"); #endif + test_convert_case(); exit(0); } diff --git a/src/common/unicode_case.c b/src/common/unicode_case.c index 8b77f39e4d1..5e77490006f 100644 --- a/src/common/unicode_case.c +++ b/src/common/unicode_case.c @@ -104,7 +104,7 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen, size_t srcoff = 0; size_t result_len = 0; - while (src[srcoff] != '\0' && (srclen < 0 || srcoff < srclen)) + while ((srclen < 0 || srcoff < srclen) && src[srcoff] != '\0') { pg_wchar u1 = utf8_to_unicode((unsigned char *) src + srcoff); int u1len = unicode_utf8len(u1); @@ -115,7 +115,7 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_wchar u2 = casemap->simplemap[casekind]; pg_wchar u2len = unicode_utf8len(u2); - if (result_len + u2len < dstsize) + if (result_len + u2len <= dstsize) unicode_to_utf8(u2, (unsigned char *) dst + result_len); result_len += u2len; @@ -123,7 +123,7 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen, else { /* no mapping; copy bytes from src */ - if (result_len + u1len < dstsize) + if (result_len + u1len <= dstsize) memcpy(dst + result_len, src + srcoff, u1len); result_len += u1len; -- 2.39.5