Tweak the core scanner so that it can be used by plpgsql too.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 14 Jul 2009 20:24:10 +0000 (20:24 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 14 Jul 2009 20:24:10 +0000 (20:24 +0000)
Changes:

Pass in the keyword lookup array instead of having it be hardwired.
(This incidentally allows elimination of some duplicate coding in ecpg.)

Re-order the token declarations in gram.y so that non-keyword tokens have
numbers that won't change when keywords are added or removed.

Add ".." and ":=" to the set of tokens recognized by scan.l.  (Since these
combinations are nowhere legal in core SQL, this does not change anything
except the precise wording of the error you get when you write this.)

15 files changed:
src/backend/parser/gram.y
src/backend/parser/keywords.c
src/backend/parser/kwlookup.c
src/backend/parser/parser.c
src/backend/parser/scan.l
src/backend/utils/adt/misc.c
src/backend/utils/adt/ruleutils.c
src/bin/pg_dump/dumputils.c
src/bin/pg_dump/keywords.c
src/include/parser/gramparse.h
src/include/parser/keywords.h
src/interfaces/ecpg/preproc/c_keywords.c
src/interfaces/ecpg/preproc/ecpg_keywords.c
src/interfaces/ecpg/preproc/extern.h
src/interfaces/ecpg/preproc/keywords.c

index 3f4eca77cf1966d9659bb59a8217679e1915bf88..8faf593433a38bdbd5a2ae6289b3e4cb0c30dbd9 100644 (file)
@@ -421,10 +421,23 @@ static TypeName *TableFuncTypeName(List *columns);
 
 
 /*
- * If you make any token changes, update the keyword table in
- * src/include/parser/kwlist.h and add new keywords to the appropriate one of
- * the reserved-or-not-so-reserved keyword lists, below; search
- * this file for "Name classification hierarchy".
+ * Non-keyword token types.  These are hard-wired into the "flex" lexer.
+ * They must be listed first so that their numeric codes do not depend on
+ * the set of keywords.  PL/pgsql depends on this so that it can share the
+ * same lexer.  If you add/change tokens here, fix PL/pgsql to match!
+ *
+ * DOT_DOT and COLON_EQUALS are unused in the core SQL grammar, and so will
+ * always provoke parse errors.  They are needed by PL/pgsql.
+ */
+%token <str>   IDENT FCONST SCONST BCONST XCONST Op
+%token <ival>  ICONST PARAM
+%token                 TYPECAST DOT_DOT COLON_EQUALS
+
+/*
+ * If you want to make any keyword changes, update the keyword table in
+ * src/include/parser/kwlist.h and add new keywords to the appropriate one
+ * of the reserved-or-not-so-reserved keyword lists, below; search
+ * this file for "Keyword category lists".
  */
 
 /* ordinary key words in alphabetical order */
@@ -515,17 +528,15 @@ static TypeName *TableFuncTypeName(List *columns);
 
        ZONE
 
-/* The grammar thinks these are keywords, but they are not in the kwlist.h
+/*
+ * The grammar thinks these are keywords, but they are not in the kwlist.h
  * list and so can never be entered directly.  The filter in parser.c
  * creates these tokens when required.
  */
 %token                 NULLS_FIRST NULLS_LAST WITH_TIME
 
-/* Special token types, not actually keywords - see the "lex" file */
-%token <str>   IDENT FCONST SCONST BCONST XCONST Op
-%token <ival>  ICONST PARAM
 
-/* precedence: lowest to highest */
+/* Precedence: lowest to highest */
 %nonassoc      SET                             /* see relation_expr_opt_alias */
 %left          UNION EXCEPT
 %left          INTERSECT
index 5a56a1f17cda513d934d8bac670841fbc71ec3e0..732f3065ff975134d179517c31208a324a6187af 100644 (file)
@@ -16,7 +16,6 @@
 #include "postgres.h"
 
 #include "parser/gramparse.h"
-#include "parser/keywords.h"
 
 #define PG_KEYWORD(a,b,c) {a,b,c},
 
@@ -25,5 +24,4 @@ const ScanKeyword ScanKeywords[] = {
 #include "parser/kwlist.h"
 };
 
-/* End of ScanKeywords, for use in kwlookup.c and elsewhere */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int      NumScanKeywords = lengthof(ScanKeywords);
index f941837fb19355ea8c033755d116f9874794aaee..e6675e65125d2a6800c40cc3c47824621b1f99fd 100644 (file)
@@ -6,9 +6,6 @@
  * NB - this file is also used by ECPG and several frontend programs in
  * src/bin/ including pg_dump and psql
  *
- * Note that this file expects that the ScanKeywords array is defined
- * and that LastScanKeyword points to its element one past the last.
- *
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -39,7 +36,9 @@
  * receive a different case-normalization mapping.
  */
 const ScanKeyword *
-ScanKeywordLookup(const char *text)
+ScanKeywordLookup(const char *text,
+                                 const ScanKeyword *keywords,
+                                 int num_keywords)
 {
        int                     len,
                                i;
@@ -69,8 +68,8 @@ ScanKeywordLookup(const char *text)
        /*
         * Now do a binary search using plain strcmp() comparison.
         */
-       low = &ScanKeywords[0];
-       high = LastScanKeyword - 1;
+       low = keywords;
+       high = keywords + (num_keywords - 1);
        while (low <= high)
        {
                const ScanKeyword *middle;
index 0e6c6c7c278cd47f34768e00c66725f9a52d959d..b5370a8b14b5426182a4f7c7e359b11599b75bd4 100644 (file)
@@ -39,7 +39,7 @@ raw_parser(const char *str)
        int                     yyresult;
 
        /* initialize the flex scanner */
-       yyscanner = scanner_init(str, &yyextra);
+       yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords);
 
        /* filtered_base_yylex() only needs this much initialization */
        yyextra.have_lookahead = false;
@@ -79,7 +79,7 @@ pg_parse_string_token(const char *token)
        YYSTYPE         yylval;
        YYLTYPE         yylloc;
 
-       yyscanner = scanner_init(token, &yyextra);
+       yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords);
 
        ctoken = base_yylex(&yylval, &yylloc, yyscanner);
 
index 0d423c8ef759f389e0f092524b22a89e0fb19aeb..f404f9dc8b65630ed323b7e5d1ee13df02249b5a 100644 (file)
@@ -304,6 +304,10 @@ identifier         {ident_start}{ident_cont}*
 
 typecast               "::"
 
+/* these two token types are used by PL/pgsql, though not in core SQL */
+dot_dot                        \.\.
+colon_equals   ":="
+
 /*
  * "self" is the set of chars that should be returned as single-character
  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
@@ -450,11 +454,21 @@ other                     .
 
                                        SET_YYLLOC();
                                        yyless(1);                              /* eat only 'n' this time */
-                                       /* nchar had better be a keyword! */
-                                       keyword = ScanKeywordLookup("nchar");
-                                       Assert(keyword != NULL);
-                                       yylval->keyword = keyword->name;
-                                       return keyword->value;
+
+                                       keyword = ScanKeywordLookup("nchar",
+                                                                                               yyextra->keywords,
+                                                                                               yyextra->num_keywords);
+                                       if (keyword != NULL)
+                                       {
+                                               yylval->keyword = keyword->name;
+                                               return keyword->value;
+                                       }
+                                       else
+                                       {
+                                               /* If NCHAR isn't a keyword, just return "n" */
+                                               yylval->str = pstrdup("n");
+                                               return IDENT;
+                                       }
                                }
 
 {xqstart}              {
@@ -680,6 +694,16 @@ other                      .
                                        return TYPECAST;
                                }
 
+{dot_dot}              {
+                                       SET_YYLLOC();
+                                       return DOT_DOT;
+                               }
+
+{colon_equals} {
+                                       SET_YYLLOC();
+                                       return COLON_EQUALS;
+                               }
+
 {self}                 {
                                        SET_YYLLOC();
                                        return yytext[0];
@@ -830,7 +854,9 @@ other                       .
                                        SET_YYLLOC();
 
                                        /* Is it a keyword? */
-                                       keyword = ScanKeywordLookup(yytext);
+                                       keyword = ScanKeywordLookup(yytext,
+                                                                                               yyextra->keywords,
+                                                                                               yyextra->num_keywords);
                                        if (keyword != NULL)
                                        {
                                                yylval->keyword = keyword->name;
@@ -939,7 +965,10 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner)
  * Called before any actual parsing is done
  */
 base_yyscan_t
-scanner_init(const char *str, base_yy_extra_type *yyext)
+scanner_init(const char *str,
+                        base_yy_extra_type *yyext,
+                        const ScanKeyword *keywords,
+                        int num_keywords)
 {
        Size            slen = strlen(str);
        yyscan_t        scanner;
@@ -949,6 +978,9 @@ scanner_init(const char *str, base_yy_extra_type *yyext)
 
        base_yyset_extra(yyext, scanner);
 
+       yyext->keywords = keywords;
+       yyext->num_keywords = num_keywords;
+
        /*
         * Make a scan buffer with special termination needed by flex.
         */
index 06c17a3dfd29b6525aabe1b4167828a46d1cffd5..93e0e17f375039eb59eb14c75435b486a30fb951 100644 (file)
@@ -334,7 +334,7 @@ pg_get_keywords(PG_FUNCTION_ARGS)
 
        funcctx = SRF_PERCALL_SETUP();
 
-       if (&ScanKeywords[funcctx->call_cntr] < LastScanKeyword)
+       if (funcctx->call_cntr < NumScanKeywords)
        {
                char       *values[3];
                HeapTuple       tuple;
index 1562a5444c7395e303e85d30c143abd4c1cf7e36..ac063af0e6a677e99df47ee4c08099b107e15ab1 100644 (file)
@@ -6219,7 +6219,9 @@ quote_identifier(const char *ident)
                 * Note: ScanKeywordLookup() does case-insensitive comparison, but
                 * that's fine, since we already know we have all-lower-case.
                 */
-               const ScanKeyword *keyword = ScanKeywordLookup(ident);
+               const ScanKeyword *keyword = ScanKeywordLookup(ident,
+                                                                                                          ScanKeywords,
+                                                                                                          NumScanKeywords);
 
                if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
                        safe = false;
index 93bd4d42735d7daf8e377c1755731ff9a61f30a7..178eb7c8d57095eb94baed792df18b062db7b623 100644 (file)
@@ -130,7 +130,9 @@ fmtId(const char *rawid)
                 * Note: ScanKeywordLookup() does case-insensitive comparison, but
                 * that's fine, since we already know we have all-lower-case.
                 */
-               const ScanKeyword *keyword = ScanKeywordLookup(rawid);
+               const ScanKeyword *keyword = ScanKeywordLookup(rawid,
+                                                                                                          ScanKeywords,
+                                                                                                          NumScanKeywords);
 
                if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
                        need_quotes = true;
index 99cdf6e116867a7f4702f998e1b25c032cb5e30b..29c64b26133990c1753befa4949a96c07a14081a 100644 (file)
@@ -27,5 +27,4 @@ const ScanKeyword ScanKeywords[] = {
 #include "parser/kwlist.h"
 };
 
-/* End of ScanKeywords, for use in kwlookup.c */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int      NumScanKeywords = lengthof(ScanKeywords);
index 40382fd699beb86913d016c66ddeaf07825e43a2..a54a1b1bb4f961f5fc2e1b95b4beee8e574492fa 100644 (file)
@@ -20,6 +20,7 @@
 #define GRAMPARSE_H
 
 #include "nodes/parsenodes.h"
+#include "parser/keywords.h"
 
 /*
  * We track token locations in terms of byte offsets from the start of the
@@ -49,6 +50,12 @@ typedef struct base_yy_extra_type
        char       *scanbuf;
        Size            scanbuflen;
 
+       /*
+        * The keyword list to use.
+        */
+       const ScanKeyword *keywords;
+       int                     num_keywords;
+
        /*
         * literalbuf is used to accumulate literal values when multiple rules
         * are needed to parse a single literal.  Call startlit() to reset buffer
@@ -106,7 +113,10 @@ extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
                                                                base_yyscan_t yyscanner);
 
 /* from scan.l */
-extern base_yyscan_t scanner_init(const char *str, base_yy_extra_type *yyext);
+extern base_yyscan_t scanner_init(const char *str,
+                                                                 base_yy_extra_type *yyext,
+                                                                 const ScanKeyword *keywords,
+                                                                 int num_keywords);
 extern void scanner_finish(base_yyscan_t yyscanner);
 extern int     base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
                                           base_yyscan_t yyscanner);
index 4c56c14ea3a908a77d8a4e5f77fc7423fe7a62f2..51f9c94b895fe366457705331cc79175f3566873 100644 (file)
@@ -29,8 +29,10 @@ typedef struct ScanKeyword
 } ScanKeyword;
 
 extern const ScanKeyword ScanKeywords[];
-extern const ScanKeyword *LastScanKeyword;
+extern const int       NumScanKeywords;
 
-extern const ScanKeyword *ScanKeywordLookup(const char *text);
+extern const ScanKeyword *ScanKeywordLookup(const char *text,
+                                                                                       const ScanKeyword *keywords,
+                                                                                       int num_keywords);
 
 #endif   /* KEYWORDS_H */
index 62e729db5e0758b9ed107a9a8f4ea7fcacd3a586..9bf504a10634806d3f4cf466d74a57ef7fbf7c46 100644 (file)
@@ -1,10 +1,10 @@
 /*-------------------------------------------------------------------------
  *
- * keywords.c
+ * c_keywords.c
  *       lexical token lookup for reserved words in postgres embedded SQL
  *
  * $PostgreSQL$
- * §
+ *
  *-------------------------------------------------------------------------
  */
 #include "postgres_fe.h"
@@ -55,8 +55,31 @@ static const ScanKeyword ScanCKeywords[] = {
        {"year", YEAR_P, 0},
 };
 
+
+/*
+ * Do a binary search using plain strcmp() comparison.  This is much like
+ * ScanKeywordLookup(), except we want case-sensitive matching.
+ */
 const ScanKeyword *
 ScanCKeywordLookup(const char *text)
 {
-       return DoLookup(text, &ScanCKeywords[0], endof(ScanCKeywords) - 1);
+       const ScanKeyword *low = &ScanCKeywords[0];
+       const ScanKeyword *high = &ScanCKeywords[lengthof(ScanCKeywords) - 1];
+
+       while (low <= high)
+       {
+               const ScanKeyword *middle;
+               int                     difference;
+
+               middle = low + (high - low) / 2;
+               difference = strcmp(middle->name, text);
+               if (difference == 0)
+                       return middle;
+               else if (difference < 0)
+                       low = middle + 1;
+               else
+                       high = middle - 1;
+       }
+
+       return NULL;
 }
index 9a7fde718160ae58ff0c822a8710a8e4065405f4..833e4e3bc90c33f7beae820c2ce7567b00e67b29 100644 (file)
@@ -75,79 +75,26 @@ static const ScanKeyword ScanECPGKeywords[] = {
        {"whenever", SQL_WHENEVER, 0},
 };
 
-/* This is all taken from src/backend/parser/keyword.c and adjusted for our needs. */
-/*
- * Do a binary search using plain strcmp() comparison.
- */
-const ScanKeyword *
-DoLookup(const char *word, const ScanKeyword *low, const ScanKeyword *high)
-{
-       while (low <= high)
-       {
-               const ScanKeyword *middle;
-               int                     difference;
-
-               middle = low + (high - low) / 2;
-               difference = strcmp(middle->name, word);
-               if (difference == 0)
-                       return middle;
-               else if (difference < 0)
-                       low = middle + 1;
-               else
-                       high = middle - 1;
-       }
-
-       return NULL;
-}
-
 /*
  * ScanECPGKeywordLookup - see if a given word is a keyword
  *
  * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
- *
- * The match is done case-insensitively.  Note that we deliberately use a
- * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
- * even if we are in a locale where tolower() would produce more or different
- * translations.  This is to conform to the SQL99 spec, which says that
- * keywords are to be matched in this way even though non-keyword identifiers
- * receive a different case-normalization mapping.
+ * Keywords are matched using the same case-folding rules as in the backend.
  */
 const ScanKeyword *
 ScanECPGKeywordLookup(const char *text)
 {
-       int                     len,
-                               i;
-       char            word[NAMEDATALEN];
        const ScanKeyword *res;
 
        /* First check SQL symbols defined by the backend. */
-
-       res = ScanKeywordLookup(text);
+       res = ScanKeywordLookup(text, ScanKeywords, NumScanKeywords);
        if (res)
                return res;
 
-       len = strlen(text);
-       /* We assume all keywords are shorter than NAMEDATALEN. */
-       if (len >= NAMEDATALEN)
-               return NULL;
-
-       /*
-        * Apply an ASCII-only downcasing. We must not use tolower() since it may
-        * produce the wrong translation in some locales (eg, Turkish).
-        */
-       for (i = 0; i < len; i++)
-       {
-               char            ch = text[i];
-
-               if (ch >= 'A' && ch <= 'Z')
-                       ch += 'a' - 'A';
-               word[i] = ch;
-       }
-       word[len] = '\0';
-
-       /*
-        * Now do a binary search using plain strcmp() comparison.
-        */
+       /* Try ECPG-specific keywords. */
+       res = ScanKeywordLookup(text, ScanECPGKeywords, lengthof(ScanECPGKeywords));
+       if (res)
+               return res;
 
-       return DoLookup(word, &ScanECPGKeywords[0], endof(ScanECPGKeywords) - 1);
+       return NULL;
 }
index 013359aab450aa194f8f682fb644a4e1ea1d49f7..d760e23443e5307bf9d2bb217c313b6458bdc127 100644 (file)
@@ -101,7 +101,6 @@ extern void remove_variables(int);
 extern struct variable *new_variable(const char *, struct ECPGtype *, int);
 extern const ScanKeyword *ScanCKeywordLookup(const char *);
 extern const ScanKeyword *ScanECPGKeywordLookup(const char *text);
-extern const ScanKeyword *DoLookup(const char *, const ScanKeyword *, const ScanKeyword *);
 extern void scanner_init(const char *);
 extern void parser_init(void);
 extern void scanner_finish(void);
index fa6db2ed0426a7bbcfc0bb51695f39c9edbba128..57eecef889efb0abd6c5bf25dd3764a6095d562f 100644 (file)
@@ -26,5 +26,4 @@ const ScanKeyword ScanKeywords[] = {
 #include "parser/kwlist.h"
 };
 
-/* End of ScanKeywords, for use in kwlookup.c */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int      NumScanKeywords = lengthof(ScanKeywords);