replication parser: pure parser and reentrant scanner
authorPeter Eisentraut <peter@eisentraut.org>
Mon, 2 Dec 2024 09:35:37 +0000 (10:35 +0100)
committerPeter Eisentraut <peter@eisentraut.org>
Tue, 24 Dec 2024 15:40:09 +0000 (16:40 +0100)
Use the flex %option reentrant and the bison option %pure-parser to
make the generated scanner and parser pure, reentrant, and
thread-safe.

Make the generated scanner use palloc() etc. instead of malloc() etc.
Previously, we only used palloc() for the buffer, but flex would still
use malloc() for its internal structures.  As a result, there could be
some small memory leaks in case of uncaught errors.  Now, all the
memory is under palloc() control, so there are no more such issues.

Simplify flex scan buffer management: Instead of constructing the
buffer from pieces and then using yy_scan_buffer(), we can just use
yy_scan_string(), which does the same thing internally.

The previous code was necessary because we allocated the buffer with
palloc() and the rest of the state was handled by malloc().  But this
is no longer the case; everything is under palloc() now.

Use flex yyextra to handle context information, instead of global
variables.  This complements the other changes to make the scanner
reentrant.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Co-authored-by: Andreas Karlsson <andreas@proxel.se>
Reviewed-by: Andreas Karlsson <andreas@proxel.se>
Discussion: https://www.postgresql.org/message-id/flat/eb6faeac-2a8a-4b69-9189-c33c520e5b7b@eisentraut.org

src/backend/nls.mk
src/backend/replication/repl_gram.y
src/backend/replication/repl_scanner.l
src/backend/replication/walsender.c
src/include/replication/walsender_private.h

index 6c2716149d99b98e27e0b0e94b971bbe24cb5666..3fa1833b76aa657caca1fa087151acd238f83794 100644 (file)
@@ -9,7 +9,7 @@ GETTEXT_TRIGGERS = $(BACKEND_COMMON_GETTEXT_TRIGGERS) \
                    yyerror \
                    jsonpath_yyerror:3 \
                    parser_yyerror \
-                   replication_yyerror \
+                   replication_yyerror:2 \
                    scanner_yyerror \
                    syncrep_yyerror \
                    report_invalid_record:2 \
index 06daa9548132d565d1799041fd44017971c069a8..4fa71377e20e87f08d0424572be3bd79dccc9fb1 100644 (file)
 
 #include "repl_gram.h"
 
-/* silence -Wmissing-variable-declarations */
-extern int replication_yychar;
-extern int replication_yynerrs;
-
 
 /* Result of the parsing is returned here */
 Node *replication_parse_result;
@@ -43,6 +39,9 @@ Node *replication_parse_result;
 
 %}
 
+%parse-param {yyscan_t yyscanner}
+%lex-param   {yyscan_t yyscanner}
+%pure-parser
 %expect 0
 %name-prefix="replication_yy"
 
@@ -106,6 +105,8 @@ Node *replication_parse_result;
 firstcmd: command opt_semicolon
                {
                    replication_parse_result = $1;
+
+                   (void) yynerrs; /* suppress compiler warning */
                }
            ;
 
index 5b10658a58b5a23e824f206570f50b5dc8c92bad..899114d901abac1f675bf2f3cb09c6529af94912 100644 (file)
@@ -38,30 +38,36 @@ fprintf_to_ereport(const char *fmt, const char *msg)
    ereport(ERROR, (errmsg_internal("%s", msg)));
 }
 
-/* Handle to the buffer that the lexer uses internally */
-static YY_BUFFER_STATE scanbufhandle;
-
-/* Pushed-back token (we only handle one) */
-static int repl_pushed_back_token;
+struct replication_yy_extra_type
+{
+   /* Pushed-back token (we only handle one) */
+   int         repl_pushed_back_token;
 
-/* Work area for collecting literals */
-static StringInfoData litbuf;
+   /* Work area for collecting literals */
+   StringInfoData litbuf;
+};
+#define YY_EXTRA_TYPE struct replication_yy_extra_type *
 
-static void startlit(void);
-static char *litbufdup(void);
-static void addlit(char *ytext, int yleng);
-static void addlitchar(unsigned char ychar);
+static void startlit(yyscan_t yyscanner);
+static char *litbufdup(yyscan_t yyscanner);
+static void addlit(char *ytext, int yleng, yyscan_t yyscanner);
+static void addlitchar(unsigned char ychar, yyscan_t yyscanner);
 
 /* LCOV_EXCL_START */
 
 %}
 
+%option reentrant
+%option bison-bridge
 %option 8bit
 %option never-interactive
 %option nodefault
 %option noinput
 %option nounput
 %option noyywrap
+%option noyyalloc
+%option noyyrealloc
+%option noyyfree
 %option warn
 %option prefix="replication_yy"
 
@@ -108,11 +114,11 @@ identifier        {ident_start}{ident_cont}*
    /* This code is inserted at the start of replication_yylex() */
 
    /* If we have a pushed-back token, return that. */
-   if (repl_pushed_back_token)
+   if (yyextra->repl_pushed_back_token)
    {
-       int         result = repl_pushed_back_token;
+       int         result = yyextra->repl_pushed_back_token;
 
-       repl_pushed_back_token = 0;
+       yyextra->repl_pushed_back_token = 0;
        return result;
    }
 %}
@@ -142,7 +148,7 @@ UPLOAD_MANIFEST     { return K_UPLOAD_MANIFEST; }
 {space}+       { /* do nothing */ }
 
 {digit}+       {
-                   replication_yylval.uintval = strtoul(yytext, NULL, 10);
+                   yylval->uintval = strtoul(yytext, NULL, 10);
                    return UCONST;
                }
 
@@ -150,34 +156,34 @@ UPLOAD_MANIFEST       { return K_UPLOAD_MANIFEST; }
                    uint32  hi,
                            lo;
                    if (sscanf(yytext, "%X/%X", &hi, &lo) != 2)
-                       replication_yyerror("invalid streaming start location");
-                   replication_yylval.recptr = ((uint64) hi) << 32 | lo;
+                       replication_yyerror(yyscanner, "invalid streaming start location");
+                   yylval->recptr = ((uint64) hi) << 32 | lo;
                    return RECPTR;
                }
 
 {xqstart}      {
                    BEGIN(xq);
-                   startlit();
+                   startlit(yyscanner);
                }
 
 <xq>{quotestop}    {
                    yyless(1);
                    BEGIN(INITIAL);
-                   replication_yylval.str = litbufdup();
+                   yylval->str = litbufdup(yyscanner);
                    return SCONST;
                }
 
 <xq>{xqdouble} {
-                   addlitchar('\'');
+                   addlitchar('\'', yyscanner);
                }
 
 <xq>{xqinside}  {
-                   addlit(yytext, yyleng);
+                   addlit(yytext, yyleng, yyscanner);
                }
 
 {xdstart}      {
                    BEGIN(xd);
-                   startlit();
+                   startlit(yyscanner);
                }
 
 <xd>{xdstop}   {
@@ -185,20 +191,20 @@ UPLOAD_MANIFEST       { return K_UPLOAD_MANIFEST; }
 
                    yyless(1);
                    BEGIN(INITIAL);
-                   replication_yylval.str = litbufdup();
-                   len = strlen(replication_yylval.str);
-                   truncate_identifier(replication_yylval.str, len, true);
+                   yylval->str = litbufdup(yyscanner);
+                   len = strlen(yylval->str);
+                   truncate_identifier(yylval->str, len, true);
                    return IDENT;
                }
 
 <xd>{xdinside}  {
-                   addlit(yytext, yyleng);
+                   addlit(yytext, yyleng, yyscanner);
                }
 
 {identifier}   {
                    int         len = strlen(yytext);
 
-                   replication_yylval.str = downcase_truncate_identifier(yytext, len, true);
+                   yylval->str = downcase_truncate_identifier(yytext, len, true);
                    return IDENT;
                }
 
@@ -207,7 +213,7 @@ UPLOAD_MANIFEST     { return K_UPLOAD_MANIFEST; }
                    return yytext[0];
                }
 
-<xq,xd><<EOF>> { replication_yyerror("unterminated quoted string"); }
+<xq,xd><<EOF>> { replication_yyerror(yyscanner, "unterminated quoted string"); }
 
 
 <<EOF>>            {
@@ -218,32 +224,36 @@ UPLOAD_MANIFEST       { return K_UPLOAD_MANIFEST; }
 
 /* LCOV_EXCL_STOP */
 
+/* see scan.l */
+#undef yyextra
+#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r)
+
 static void
-startlit(void)
+startlit(yyscan_t yyscanner)
 {
-   initStringInfo(&litbuf);
+   initStringInfo(&yyextra->litbuf);
 }
 
 static char *
-litbufdup(void)
+litbufdup(yyscan_t yyscanner)
 {
-   return litbuf.data;
+   return yyextra->litbuf.data;
 }
 
 static void
-addlit(char *ytext, int yleng)
+addlit(char *ytext, int yleng, yyscan_t yyscanner)
 {
-   appendBinaryStringInfo(&litbuf, ytext, yleng);
+   appendBinaryStringInfo(&yyextra->litbuf, ytext, yleng);
 }
 
 static void
-addlitchar(unsigned char ychar)
+addlitchar(unsigned char ychar, yyscan_t yyscanner)
 {
-   appendStringInfoChar(&litbuf, ychar);
+   appendStringInfoChar(&yyextra->litbuf, ychar);
 }
 
 void
-replication_yyerror(const char *message)
+replication_yyerror(yyscan_t yyscanner, const char *message)
 {
    ereport(ERROR,
            (errcode(ERRCODE_SYNTAX_ERROR),
@@ -251,35 +261,26 @@ replication_yyerror(const char *message)
 }
 
 void
-replication_scanner_init(const char *str)
+replication_scanner_init(const char *str, yyscan_t *yyscannerp)
 {
-   Size        slen = strlen(str);
-   char       *scanbuf;
-
-   /*
-    * Might be left over after ereport()
-    */
-   if (YY_CURRENT_BUFFER)
-       yy_delete_buffer(YY_CURRENT_BUFFER);
-
-   /*
-    * Make a scan buffer with special termination needed by flex.
-    */
-   scanbuf = (char *) palloc(slen + 2);
-   memcpy(scanbuf, str, slen);
-   scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
-   scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
-
-   /* Make sure we start in proper state */
-   BEGIN(INITIAL);
-   repl_pushed_back_token = 0;
+   yyscan_t    yyscanner;
+   struct replication_yy_extra_type *yyext = palloc0_object(struct replication_yy_extra_type);
+
+   if (yylex_init(yyscannerp) != 0)
+       elog(ERROR, "yylex_init() failed: %m");
+
+   yyscanner = *yyscannerp;
+
+   yyset_extra(yyext, yyscanner);
+
+   yy_scan_string(str, yyscanner);
 }
 
 void
-replication_scanner_finish(void)
+replication_scanner_finish(yyscan_t yyscanner)
 {
-   yy_delete_buffer(scanbufhandle);
-   scanbufhandle = NULL;
+   pfree(yyextra);
+   yylex_destroy(yyscanner);
 }
 
 /*
@@ -291,9 +292,10 @@ replication_scanner_finish(void)
  * IDENT token here, although some other cases are possible.
  */
 bool
-replication_scanner_is_replication_command(void)
+replication_scanner_is_replication_command(yyscan_t yyscanner)
 {
-   int         first_token = replication_yylex();
+   YYSTYPE     dummy;
+   int         first_token = replication_yylex(&dummy, yyscanner);
 
    switch (first_token)
    {
@@ -308,10 +310,37 @@ replication_scanner_is_replication_command(void)
        case K_UPLOAD_MANIFEST:
        case K_SHOW:
            /* Yes; push back the first token so we can parse later. */
-           repl_pushed_back_token = first_token;
+           yyextra->repl_pushed_back_token = first_token;
            return true;
        default:
            /* Nope; we don't bother to push back the token. */
            return false;
    }
 }
+
+/*
+ * Interface functions to make flex use palloc() instead of malloc().
+ * It'd be better to make these static, but flex insists otherwise.
+ */
+
+void *
+yyalloc(yy_size_t size, yyscan_t yyscanner)
+{
+   return palloc(size);
+}
+
+void *
+yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner)
+{
+   if (ptr)
+       return repalloc(ptr, size);
+   else
+       return palloc(size);
+}
+
+void
+yyfree(void *ptr, yyscan_t yyscanner)
+{
+   if (ptr)
+      pfree(ptr);
+}
index 371eef3dddc069e52b0d59db4f7c675bfa04d4b4..dc25dd6af9171d3bd3a0770ea512d6b0380f68cc 100644 (file)
@@ -1951,6 +1951,7 @@ WalSndWaitForWal(XLogRecPtr loc)
 bool
 exec_replication_command(const char *cmd_string)
 {
+   yyscan_t    scanner;
    int         parse_rc;
    Node       *cmd_node;
    const char *cmdtag;
@@ -1990,15 +1991,15 @@ exec_replication_command(const char *cmd_string)
                                        ALLOCSET_DEFAULT_SIZES);
    old_context = MemoryContextSwitchTo(cmd_context);
 
-   replication_scanner_init(cmd_string);
+   replication_scanner_init(cmd_string, &scanner);
 
    /*
     * Is it a WalSender command?
     */
-   if (!replication_scanner_is_replication_command())
+   if (!replication_scanner_is_replication_command(scanner))
    {
        /* Nope; clean up and get out. */
-       replication_scanner_finish();
+       replication_scanner_finish(scanner);
 
        MemoryContextSwitchTo(old_context);
        MemoryContextDelete(cmd_context);
@@ -2016,13 +2017,13 @@ exec_replication_command(const char *cmd_string)
    /*
     * Looks like a WalSender command, so parse it.
     */
-   parse_rc = replication_yyparse();
+   parse_rc = replication_yyparse(scanner);
    if (parse_rc != 0)
        ereport(ERROR,
                (errcode(ERRCODE_SYNTAX_ERROR),
                 errmsg_internal("replication command parser returned %d",
                                 parse_rc)));
-   replication_scanner_finish();
+   replication_scanner_finish(scanner);
 
    cmd_node = replication_parse_result;
 
index 41ac736b953af8b583ac57130b0cc9f9b5128265..9a9c40d6f34f71e13729c4e0d6aa7ac994fdec77 100644 (file)
@@ -125,12 +125,17 @@ extern void WalSndSetState(WalSndState state);
  * Internal functions for parsing the replication grammar, in repl_gram.y and
  * repl_scanner.l
  */
-extern int replication_yyparse(void);
-extern int replication_yylex(void);
-extern void replication_yyerror(const char *message) pg_attribute_noreturn();
-extern void replication_scanner_init(const char *str);
-extern void replication_scanner_finish(void);
-extern bool replication_scanner_is_replication_command(void);
+union YYSTYPE;
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+#endif
+extern int replication_yyparse(yyscan_t yyscanner);
+extern int replication_yylex(union YYSTYPE *yylval_param, yyscan_t yyscanner);
+extern void replication_yyerror(yyscan_t yyscanner, const char *message) pg_attribute_noreturn();
+extern void replication_scanner_init(const char *str, yyscan_t *yyscannerp);
+extern void replication_scanner_finish(yyscan_t yyscanner);
+extern bool replication_scanner_is_replication_command(yyscan_t yyscanner);
 
 extern PGDLLIMPORT Node *replication_parse_result;