Avoid using %c printf format for potentially non-ASCII characters.
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 29 Jun 2020 15:41:19 +0000 (11:41 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 29 Jun 2020 15:41:19 +0000 (11:41 -0400)
Since %c only passes a C "char" to printf, it's incapable of dealing
with multibyte characters.  Passing just the first byte of such a
character leads to an output string that is visibly not correctly
encoded, resulting in undesirable behavior such as encoding conversion
failures while sending error messages to clients.

We've lived with this issue for a long time because it was inconvenient
to avoid in a portable fashion.  However, now that we always use our own
snprintf code, it's reasonable to use the %.*s format to print just one
possibly-multibyte character in a string.  (We previously avoided that
obvious-looking answer in order to work around glibc's bug #6530, cf
commits 54cd4f045 and ed437e2b2.)

Hence, run around and fix a bunch of places that used %c to report
a character found in a user-supplied string.  For simplicity, I did
not touch places that were emitting non-user-facing debug messages,
or reporting catalog data that should always be ASCII.  (It's also
unclear how useful this approach could be in frontend code, where
it's less certain that we know what encoding we're dealing with.)

In passing, improve a couple of poorly-written error messages in
pageinspect/heapfuncs.c.

This is a longstanding issue, but I'm hesitant to back-patch because
of the impact on translatable message strings.  In any case this fix
would not work reliably before v12.

Tom Lane and Quan Zongliang

Discussion: https://postgr.es/m/a120087c-4c88-d9d4-1ec5-808d7a7f133d@gmail.com

contrib/hstore/hstore_io.c
contrib/pageinspect/heapfuncs.c
src/backend/utils/adt/encode.c
src/backend/utils/adt/jsonpath_gram.y
src/backend/utils/adt/regexp.c
src/backend/utils/adt/varbit.c
src/backend/utils/adt/varlena.c

index 60bdbea46be18ca9ba6b9ba2dfb12bb12a947cd1..b3304ff84452c0205cbc1c86d1cf48f395106b10 100644 (file)
@@ -80,7 +80,9 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
            }
            else if (*(state->ptr) == '=' && !ignoreeq)
            {
-               elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
+               elog(ERROR, "Syntax error near \"%.*s\" at position %d",
+                    pg_mblen(state->ptr), state->ptr,
+                    (int32) (state->ptr - state->begin));
            }
            else if (*(state->ptr) == '\\')
            {
@@ -219,7 +221,9 @@ parse_hstore(HSParser *state)
            }
            else if (!isspace((unsigned char) *(state->ptr)))
            {
-               elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
+               elog(ERROR, "Syntax error near \"%.*s\" at position %d",
+                    pg_mblen(state->ptr), state->ptr,
+                    (int32) (state->ptr - state->begin));
            }
        }
        else if (st == WGT)
@@ -234,7 +238,9 @@ parse_hstore(HSParser *state)
            }
            else
            {
-               elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
+               elog(ERROR, "Syntax error near \"%.*s\" at position %d",
+                    pg_mblen(state->ptr), state->ptr,
+                    (int32) (state->ptr - state->begin));
            }
        }
        else if (st == WVAL)
@@ -267,7 +273,9 @@ parse_hstore(HSParser *state)
            }
            else if (!isspace((unsigned char) *(state->ptr)))
            {
-               elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
+               elog(ERROR, "Syntax error near \"%.*s\" at position %d",
+                    pg_mblen(state->ptr), state->ptr,
+                    (int32) (state->ptr - state->begin));
            }
        }
        else
index 11a910184bf3a7fbab5ef1b50773ce7573c8a604..f04455da127c0c253152028129eb25b3d746cfb3 100644 (file)
@@ -30,6 +30,7 @@
 #include "catalog/pg_am_d.h"
 #include "catalog/pg_type.h"
 #include "funcapi.h"
+#include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "pageinspect.h"
 #include "port/pg_bitutils.h"
@@ -99,7 +100,8 @@ text_to_bits(char *str, int len)
        else
            ereport(ERROR,
                    (errcode(ERRCODE_DATA_CORRUPTED),
-                    errmsg("illegal character '%c' in t_bits string", str[off])));
+                    errmsg("invalid character \"%.*s\" in t_bits string",
+                           pg_mblen(str + off), str + off)));
 
        if (off % 8 == 7)
            bits[off / 8] = byte;
@@ -460,14 +462,13 @@ tuple_data_split(PG_FUNCTION_ARGS)
        if (!t_bits_str)
            ereport(ERROR,
                    (errcode(ERRCODE_DATA_CORRUPTED),
-                    errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
-                           bits_len)));
+                    errmsg("t_bits string must not be NULL")));
 
        bits_str_len = strlen(t_bits_str);
        if (bits_len != bits_str_len)
            ereport(ERROR,
                    (errcode(ERRCODE_DATA_CORRUPTED),
-                    errmsg("unexpected length of t_bits %u, expected %d",
+                    errmsg("unexpected length of t_bits string: %u, expected %u",
                            bits_str_len, bits_len)));
 
        /* do the conversion */
@@ -478,7 +479,7 @@ tuple_data_split(PG_FUNCTION_ARGS)
        if (t_bits_str)
            ereport(ERROR,
                    (errcode(ERRCODE_DATA_CORRUPTED),
-                    errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
+                    errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long",
                            strlen(t_bits_str))));
    }
 
index 61d318d93ca29345a3e483e1c83266e5a88ff72d..a609d49c12c2f9b2906de39e50ad80db8121267c 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <ctype.h>
 
+#include "mb/pg_wchar.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
 
@@ -171,17 +172,19 @@ hex_encode(const char *src, size_t len, char *dst)
 }
 
 static inline char
-get_hex(char c)
+get_hex(const char *cp)
 {
+   unsigned char c = (unsigned char) *cp;
    int         res = -1;
 
-   if (c > 0 && c < 127)
-       res = hexlookup[(unsigned char) c];
+   if (c < 127)
+       res = hexlookup[c];
 
    if (res < 0)
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                errmsg("invalid hexadecimal digit: \"%c\"", c)));
+                errmsg("invalid hexadecimal digit: \"%.*s\"",
+                       pg_mblen(cp), cp)));
 
    return (char) res;
 }
@@ -205,13 +208,15 @@ hex_decode(const char *src, size_t len, char *dst)
            s++;
            continue;
        }
-       v1 = get_hex(*s++) << 4;
+       v1 = get_hex(s) << 4;
+       s++;
        if (s >= srcend)
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                     errmsg("invalid hexadecimal data: odd number of digits")));
 
-       v2 = get_hex(*s++);
+       v2 = get_hex(s);
+       s++;
        *p++ = v1 | v2;
    }
 
@@ -338,7 +343,8 @@ pg_base64_decode(const char *src, size_t len, char *dst)
            if (b < 0)
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                        errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c)));
+                        errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
+                               pg_mblen(s - 1), s - 1)));
        }
        /* add it to buffer */
        buf = (buf << 6) + b;
index f87db8ccf6b4b3081f29cfffc990b2a5227708f8..88ef9550e9db0eeae183863bf1666daa6ba85e39 100644 (file)
@@ -526,8 +526,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
                ereport(ERROR,
                        (errcode(ERRCODE_SYNTAX_ERROR),
                         errmsg("invalid input syntax for type %s", "jsonpath"),
-                        errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate",
-                                  flags->val[i])));
+                        errdetail("unrecognized flag character \"%.*s\" in LIKE_REGEX predicate",
+                                  pg_mblen(flags->val + i), flags->val + i)));
                break;
        }
    }
index 06f808652a4cb224f29b43f9b1b6f2c00bfadd69..c70c5eeeb37f9e124dd6dae2ddff02d47fee8be8 100644 (file)
@@ -423,8 +423,8 @@ parse_re_flags(pg_re_flags *flags, text *opts)
                default:
                    ereport(ERROR,
                            (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                            errmsg("invalid regular expression option: \"%c\"",
-                                   opt_p[i])));
+                            errmsg("invalid regular expression option: \"%.*s\"",
+                                   pg_mblen(opt_p + i), opt_p + i)));
                    break;
            }
        }
index f0c6a44b842509f0ecccd8a8ced4b3635b999321..3c03459f5192d4c627c9a8cbff41c919fa557da4 100644 (file)
@@ -230,8 +230,8 @@ bit_in(PG_FUNCTION_ARGS)
            else if (*sp != '0')
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                        errmsg("\"%c\" is not a valid binary digit",
-                               *sp)));
+                        errmsg("\"%.*s\" is not a valid binary digit",
+                               pg_mblen(sp), sp)));
 
            x >>= 1;
            if (x == 0)
@@ -255,8 +255,8 @@ bit_in(PG_FUNCTION_ARGS)
            else
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                        errmsg("\"%c\" is not a valid hexadecimal digit",
-                               *sp)));
+                        errmsg("\"%.*s\" is not a valid hexadecimal digit",
+                               pg_mblen(sp), sp)));
 
            if (bc)
            {
@@ -531,8 +531,8 @@ varbit_in(PG_FUNCTION_ARGS)
            else if (*sp != '0')
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                        errmsg("\"%c\" is not a valid binary digit",
-                               *sp)));
+                        errmsg("\"%.*s\" is not a valid binary digit",
+                               pg_mblen(sp), sp)));
 
            x >>= 1;
            if (x == 0)
@@ -556,8 +556,8 @@ varbit_in(PG_FUNCTION_ARGS)
            else
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                        errmsg("\"%c\" is not a valid hexadecimal digit",
-                               *sp)));
+                        errmsg("\"%.*s\" is not a valid hexadecimal digit",
+                               pg_mblen(sp), sp)));
 
            if (bc)
            {
index 2eaabd6231d377017f67fcfddc9aba2adcaa3ed9..df10bfb906ed8c07ee326540c3ad572d153b2523 100644 (file)
@@ -5586,8 +5586,8 @@ text_format(PG_FUNCTION_ARGS)
        if (strchr("sIL", *cp) == NULL)
            ereport(ERROR,
                    (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                    errmsg("unrecognized format() type specifier \"%c\"",
-                           *cp),
+                    errmsg("unrecognized format() type specifier \"%.*s\"",
+                           pg_mblen(cp), cp),
                     errhint("For a single \"%%\" use \"%%%%\".")));
 
        /* If indirect width was specified, get its value */
@@ -5707,8 +5707,8 @@ text_format(PG_FUNCTION_ARGS)
                /* should not get here, because of previous check */
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                        errmsg("unrecognized format() type specifier \"%c\"",
-                               *cp),
+                        errmsg("unrecognized format() type specifier \"%.*s\"",
+                               pg_mblen(cp), cp),
                         errhint("For a single \"%%\" use \"%%%%\".")));
                break;
        }