Tweak the core scanner so that it can be used by plpgsql too.

author Tom Lane <tgl@sss.pgh.pa.us>

Tue, 14 Jul 2009 20:24:10 +0000 (20:24 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Tue, 14 Jul 2009 20:24:10 +0000 (20:24 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Tue, 14 Jul 2009 20:24:10 +0000 (20:24 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Tue, 14 Jul 2009 20:24:10 +0000 (20:24 +0000)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index 3f4eca77cf1966d9659bb59a8217679e1915bf88..8faf593433a38bdbd5a2ae6289b3e4cb0c30dbd9 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -421,10 +421,23 @@ static TypeName *TableFuncTypeName(List *columns);
  
  
  /*
- * If you make any token changes, update the keyword table in
- * src/include/parser/kwlist.h and add new keywords to the appropriate one of
- * the reserved-or-not-so-reserved keyword lists, below; search
- * this file for "Name classification hierarchy".
+ * Non-keyword token types.  These are hard-wired into the "flex" lexer.
+ * They must be listed first so that their numeric codes do not depend on
+ * the set of keywords.  PL/pgsql depends on this so that it can share the
+ * same lexer.  If you add/change tokens here, fix PL/pgsql to match!
+ *
+ * DOT_DOT and COLON_EQUALS are unused in the core SQL grammar, and so will
+ * always provoke parse errors.  They are needed by PL/pgsql.
+ */
+%token <str>   IDENT FCONST SCONST BCONST XCONST Op
+%token <ival>  ICONST PARAM
+%token                 TYPECAST DOT_DOT COLON_EQUALS
+
+/*
+ * If you want to make any keyword changes, update the keyword table in
+ * src/include/parser/kwlist.h and add new keywords to the appropriate one
+ * of the reserved-or-not-so-reserved keyword lists, below; search
+ * this file for "Keyword category lists".
   */
  
  /* ordinary key words in alphabetical order */
@@ -515,17 +528,15 @@ static TypeName *TableFuncTypeName(List *columns);
  
         ZONE
  
-/* The grammar thinks these are keywords, but they are not in the kwlist.h
+/*
+ * The grammar thinks these are keywords, but they are not in the kwlist.h
   * list and so can never be entered directly.  The filter in parser.c
   * creates these tokens when required.
   */
  %token                 NULLS_FIRST NULLS_LAST WITH_TIME
  
-/* Special token types, not actually keywords - see the "lex" file */
-%token <str>   IDENT FCONST SCONST BCONST XCONST Op
-%token <ival>  ICONST PARAM
  
-/* precedence: lowest to highest */
+/* Precedence: lowest to highest */
  %nonassoc      SET                             /* see relation_expr_opt_alias */
  %left          UNION EXCEPT
  %left          INTERSECT
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c

index 5a56a1f17cda513d934d8bac670841fbc71ec3e0..732f3065ff975134d179517c31208a324a6187af 100644 (file)
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -16,7 +16,6 @@
  #include "postgres.h"
  
  #include "parser/gramparse.h"
-#include "parser/keywords.h"
  
  #define PG_KEYWORD(a,b,c) {a,b,c},
  
@@ -25,5 +24,4 @@ const ScanKeyword ScanKeywords[] = {
  #include "parser/kwlist.h"
  };
  
-/* End of ScanKeywords, for use in kwlookup.c and elsewhere */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int      NumScanKeywords = lengthof(ScanKeywords);
diff --git a/src/backend/parser/kwlookup.c b/src/backend/parser/kwlookup.c

index f941837fb19355ea8c033755d116f9874794aaee..e6675e65125d2a6800c40cc3c47824621b1f99fd 100644 (file)
--- a/src/backend/parser/kwlookup.c
+++ b/src/backend/parser/kwlookup.c
@@ -6,9 +6,6 @@
   * NB - this file is also used by ECPG and several frontend programs in
   * src/bin/ including pg_dump and psql
   *
- * Note that this file expects that the ScanKeywords array is defined
- * and that LastScanKeyword points to its element one past the last.
- *
   * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
@@ -39,7 +36,9 @@
   * receive a different case-normalization mapping.
   */
  const ScanKeyword *
-ScanKeywordLookup(const char *text)
+ScanKeywordLookup(const char *text,
+                                 const ScanKeyword *keywords,
+                                 int num_keywords)
  {
         int                     len,
                                 i;
@@ -69,8 +68,8 @@ ScanKeywordLookup(const char *text)
         /*
          * Now do a binary search using plain strcmp() comparison.
          */
-       low = &ScanKeywords[0];
-       high = LastScanKeyword - 1;
+       low = keywords;
+       high = keywords + (num_keywords - 1);
         while (low <= high)
         {
                 const ScanKeyword *middle;
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c

index 0e6c6c7c278cd47f34768e00c66725f9a52d959d..b5370a8b14b5426182a4f7c7e359b11599b75bd4 100644 (file)
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -39,7 +39,7 @@ raw_parser(const char *str)
         int                     yyresult;
  
         /* initialize the flex scanner */
-       yyscanner = scanner_init(str, &yyextra);
+       yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords);
  
         /* filtered_base_yylex() only needs this much initialization */
         yyextra.have_lookahead = false;
@@ -79,7 +79,7 @@ pg_parse_string_token(const char *token)
         YYSTYPE         yylval;
         YYLTYPE         yylloc;
  
-       yyscanner = scanner_init(token, &yyextra);
+       yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords);
  
         ctoken = base_yylex(&yylval, &yylloc, yyscanner);
  
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l

index 0d423c8ef759f389e0f092524b22a89e0fb19aeb..f404f9dc8b65630ed323b7e5d1ee13df02249b5a 100644 (file)
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -304,6 +304,10 @@ identifier         {ident_start}{ident_cont}*
  
  typecast               "::"
  
+/* these two token types are used by PL/pgsql, though not in core SQL */
+dot_dot                        \.\.
+colon_equals   ":="
+
  /*
   * "self" is the set of chars that should be returned as single-character
   * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
@@ -450,11 +454,21 @@ other                     .
  
                                         SET_YYLLOC();
                                         yyless(1);                              /* eat only 'n' this time */
-                                       /* nchar had better be a keyword! */
-                                       keyword = ScanKeywordLookup("nchar");
-                                       Assert(keyword != NULL);
-                                       yylval->keyword = keyword->name;
-                                       return keyword->value;
+
+                                       keyword = ScanKeywordLookup("nchar",
+                                                                                               yyextra->keywords,
+                                                                                               yyextra->num_keywords);
+                                       if (keyword != NULL)
+                                       {
+                                               yylval->keyword = keyword->name;
+                                               return keyword->value;
+                                       }
+                                       else
+                                       {
+                                               /* If NCHAR isn't a keyword, just return "n" */
+                                               yylval->str = pstrdup("n");
+                                               return IDENT;
+                                       }
                                 }
  
  {xqstart}              {
@@ -680,6 +694,16 @@ other                      .
                                         return TYPECAST;
                                 }
  
+{dot_dot}              {
+                                       SET_YYLLOC();
+                                       return DOT_DOT;
+                               }
+
+{colon_equals} {
+                                       SET_YYLLOC();
+                                       return COLON_EQUALS;
+                               }
+
  {self}                 {
                                         SET_YYLLOC();
                                         return yytext[0];
@@ -830,7 +854,9 @@ other                       .
                                         SET_YYLLOC();
  
                                         /* Is it a keyword? */
-                                       keyword = ScanKeywordLookup(yytext);
+                                       keyword = ScanKeywordLookup(yytext,
+                                                                                               yyextra->keywords,
+                                                                                               yyextra->num_keywords);
                                         if (keyword != NULL)
                                         {
                                                 yylval->keyword = keyword->name;
@@ -939,7 +965,10 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner)
   * Called before any actual parsing is done
   */
  base_yyscan_t
-scanner_init(const char *str, base_yy_extra_type *yyext)
+scanner_init(const char *str,
+                        base_yy_extra_type *yyext,
+                        const ScanKeyword *keywords,
+                        int num_keywords)
  {
         Size            slen = strlen(str);
         yyscan_t        scanner;
@@ -949,6 +978,9 @@ scanner_init(const char *str, base_yy_extra_type *yyext)
  
         base_yyset_extra(yyext, scanner);
  
+       yyext->keywords = keywords;
+       yyext->num_keywords = num_keywords;
+
         /*
          * Make a scan buffer with special termination needed by flex.
          */
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c

index 06c17a3dfd29b6525aabe1b4167828a46d1cffd5..93e0e17f375039eb59eb14c75435b486a30fb951 100644 (file)
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -334,7 +334,7 @@ pg_get_keywords(PG_FUNCTION_ARGS)
  
         funcctx = SRF_PERCALL_SETUP();
  
-       if (&ScanKeywords[funcctx->call_cntr] < LastScanKeyword)
+       if (funcctx->call_cntr < NumScanKeywords)
         {
                 char       *values[3];
                 HeapTuple       tuple;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c

index 1562a5444c7395e303e85d30c143abd4c1cf7e36..ac063af0e6a677e99df47ee4c08099b107e15ab1 100644 (file)
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -6219,7 +6219,9 @@ quote_identifier(const char *ident)
                  * Note: ScanKeywordLookup() does case-insensitive comparison, but
                  * that's fine, since we already know we have all-lower-case.
                  */
-               const ScanKeyword *keyword = ScanKeywordLookup(ident);
+               const ScanKeyword *keyword = ScanKeywordLookup(ident,
+                                                                                                          ScanKeywords,
+                                                                                                          NumScanKeywords);
  
                 if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
                         safe = false;
diff --git a/src/bin/pg_dump/dumputils.c b/src/bin/pg_dump/dumputils.c

index 93bd4d42735d7daf8e377c1755731ff9a61f30a7..178eb7c8d57095eb94baed792df18b062db7b623 100644 (file)
--- a/src/bin/pg_dump/dumputils.c
+++ b/src/bin/pg_dump/dumputils.c
@@ -130,7 +130,9 @@ fmtId(const char *rawid)
                  * Note: ScanKeywordLookup() does case-insensitive comparison, but
                  * that's fine, since we already know we have all-lower-case.
                  */
-               const ScanKeyword *keyword = ScanKeywordLookup(rawid);
+               const ScanKeyword *keyword = ScanKeywordLookup(rawid,
+                                                                                                          ScanKeywords,
+                                                                                                          NumScanKeywords);
  
                 if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
                         need_quotes = true;
diff --git a/src/bin/pg_dump/keywords.c b/src/bin/pg_dump/keywords.c

index 99cdf6e116867a7f4702f998e1b25c032cb5e30b..29c64b26133990c1753befa4949a96c07a14081a 100644 (file)
--- a/src/bin/pg_dump/keywords.c
+++ b/src/bin/pg_dump/keywords.c
@@ -27,5 +27,4 @@ const ScanKeyword ScanKeywords[] = {
  #include "parser/kwlist.h"
  };
  
-/* End of ScanKeywords, for use in kwlookup.c */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int      NumScanKeywords = lengthof(ScanKeywords);
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h

index 40382fd699beb86913d016c66ddeaf07825e43a2..a54a1b1bb4f961f5fc2e1b95b4beee8e574492fa 100644 (file)
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -20,6 +20,7 @@
  #define GRAMPARSE_H
  
  #include "nodes/parsenodes.h"
+#include "parser/keywords.h"
  
  /*
   * We track token locations in terms of byte offsets from the start of the
@@ -49,6 +50,12 @@ typedef struct base_yy_extra_type
         char       *scanbuf;
         Size            scanbuflen;
  
+       /*
+        * The keyword list to use.
+        */
+       const ScanKeyword *keywords;
+       int                     num_keywords;
+
         /*
          * literalbuf is used to accumulate literal values when multiple rules
          * are needed to parse a single literal.  Call startlit() to reset buffer
@@ -106,7 +113,10 @@ extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
                                                                 base_yyscan_t yyscanner);
  
  /* from scan.l */
-extern base_yyscan_t scanner_init(const char *str, base_yy_extra_type *yyext);
+extern base_yyscan_t scanner_init(const char *str,
+                                                                 base_yy_extra_type *yyext,
+                                                                 const ScanKeyword *keywords,
+                                                                 int num_keywords);
  extern void scanner_finish(base_yyscan_t yyscanner);
  extern int     base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
                                            base_yyscan_t yyscanner);
diff --git a/src/include/parser/keywords.h b/src/include/parser/keywords.h

index 4c56c14ea3a908a77d8a4e5f77fc7423fe7a62f2..51f9c94b895fe366457705331cc79175f3566873 100644 (file)
--- a/src/include/parser/keywords.h
+++ b/src/include/parser/keywords.h
@@ -29,8 +29,10 @@ typedef struct ScanKeyword
  } ScanKeyword;
  
  extern const ScanKeyword ScanKeywords[];
-extern const ScanKeyword *LastScanKeyword;
+extern const int       NumScanKeywords;
  
-extern const ScanKeyword *ScanKeywordLookup(const char *text);
+extern const ScanKeyword *ScanKeywordLookup(const char *text,
+                                                                                       const ScanKeyword *keywords,
+                                                                                       int num_keywords);
  
  #endif   /* KEYWORDS_H */
diff --git a/src/interfaces/ecpg/preproc/c_keywords.c b/src/interfaces/ecpg/preproc/c_keywords.c

index 62e729db5e0758b9ed107a9a8f4ea7fcacd3a586..9bf504a10634806d3f4cf466d74a57ef7fbf7c46 100644 (file)
--- a/src/interfaces/ecpg/preproc/c_keywords.c
+++ b/src/interfaces/ecpg/preproc/c_keywords.c
@@ -1,10 +1,10 @@
  /*-------------------------------------------------------------------------
   *
- * keywords.c
+ * c_keywords.c
   *       lexical token lookup for reserved words in postgres embedded SQL
   *
   * $PostgreSQL$
- * §
+ *
   *-------------------------------------------------------------------------
   */
  #include "postgres_fe.h"
@@ -55,8 +55,31 @@ static const ScanKeyword ScanCKeywords[] = {
         {"year", YEAR_P, 0},
  };
  
+
+/*
+ * Do a binary search using plain strcmp() comparison.  This is much like
+ * ScanKeywordLookup(), except we want case-sensitive matching.
+ */
  const ScanKeyword *
  ScanCKeywordLookup(const char *text)
  {
-       return DoLookup(text, &ScanCKeywords[0], endof(ScanCKeywords) - 1);
+       const ScanKeyword *low = &ScanCKeywords[0];
+       const ScanKeyword *high = &ScanCKeywords[lengthof(ScanCKeywords) - 1];
+
+       while (low <= high)
+       {
+               const ScanKeyword *middle;
+               int                     difference;
+
+               middle = low + (high - low) / 2;
+               difference = strcmp(middle->name, text);
+               if (difference == 0)
+                       return middle;
+               else if (difference < 0)
+                       low = middle + 1;
+               else
+                       high = middle - 1;
+       }
+
+       return NULL;
  }
diff --git a/src/interfaces/ecpg/preproc/ecpg_keywords.c b/src/interfaces/ecpg/preproc/ecpg_keywords.c

index 9a7fde718160ae58ff0c822a8710a8e4065405f4..833e4e3bc90c33f7beae820c2ce7567b00e67b29 100644 (file)
--- a/src/interfaces/ecpg/preproc/ecpg_keywords.c
+++ b/src/interfaces/ecpg/preproc/ecpg_keywords.c
@@ -75,79 +75,26 @@ static const ScanKeyword ScanECPGKeywords[] = {
         {"whenever", SQL_WHENEVER, 0},
  };
  
-/* This is all taken from src/backend/parser/keyword.c and adjusted for our needs. */
-/*
- * Do a binary search using plain strcmp() comparison.
- */
-const ScanKeyword *
-DoLookup(const char *word, const ScanKeyword *low, const ScanKeyword *high)
-{
-       while (low <= high)
-       {
-               const ScanKeyword *middle;
-               int                     difference;
-
-               middle = low + (high - low) / 2;
-               difference = strcmp(middle->name, word);
-               if (difference == 0)
-                       return middle;
-               else if (difference < 0)
-                       low = middle + 1;
-               else
-                       high = middle - 1;
-       }
-
-       return NULL;
-}
-
  /*
   * ScanECPGKeywordLookup - see if a given word is a keyword
   *
   * Returns a pointer to the ScanKeyword table entry, or NULL if no match.
- *
- * The match is done case-insensitively.  Note that we deliberately use a
- * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
- * even if we are in a locale where tolower() would produce more or different
- * translations.  This is to conform to the SQL99 spec, which says that
- * keywords are to be matched in this way even though non-keyword identifiers
- * receive a different case-normalization mapping.
+ * Keywords are matched using the same case-folding rules as in the backend.
   */
  const ScanKeyword *
  ScanECPGKeywordLookup(const char *text)
  {
-       int                     len,
-                               i;
-       char            word[NAMEDATALEN];
         const ScanKeyword *res;
  
         /* First check SQL symbols defined by the backend. */
-
-       res = ScanKeywordLookup(text);
+       res = ScanKeywordLookup(text, ScanKeywords, NumScanKeywords);
         if (res)
                 return res;
  
-       len = strlen(text);
-       /* We assume all keywords are shorter than NAMEDATALEN. */
-       if (len >= NAMEDATALEN)
-               return NULL;
-
-       /*
-        * Apply an ASCII-only downcasing. We must not use tolower() since it may
-        * produce the wrong translation in some locales (eg, Turkish).
-        */
-       for (i = 0; i < len; i++)
-       {
-               char            ch = text[i];
-
-               if (ch >= 'A' && ch <= 'Z')
-                       ch += 'a' - 'A';
-               word[i] = ch;
-       }
-       word[len] = '\0';
-
-       /*
-        * Now do a binary search using plain strcmp() comparison.
-        */
+       /* Try ECPG-specific keywords. */
+       res = ScanKeywordLookup(text, ScanECPGKeywords, lengthof(ScanECPGKeywords));
+       if (res)
+               return res;
  
-       return DoLookup(word, &ScanECPGKeywords[0], endof(ScanECPGKeywords) - 1);
+       return NULL;
  }
diff --git a/src/interfaces/ecpg/preproc/extern.h b/src/interfaces/ecpg/preproc/extern.h

index 013359aab450aa194f8f682fb644a4e1ea1d49f7..d760e23443e5307bf9d2bb217c313b6458bdc127 100644 (file)
--- a/src/interfaces/ecpg/preproc/extern.h
+++ b/src/interfaces/ecpg/preproc/extern.h
@@ -101,7 +101,6 @@ extern void remove_variables(int);
  extern struct variable *new_variable(const char *, struct ECPGtype *, int);
  extern const ScanKeyword *ScanCKeywordLookup(const char *);
  extern const ScanKeyword *ScanECPGKeywordLookup(const char *text);
-extern const ScanKeyword *DoLookup(const char *, const ScanKeyword *, const ScanKeyword *);
  extern void scanner_init(const char *);
  extern void parser_init(void);
  extern void scanner_finish(void);
diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c

index fa6db2ed0426a7bbcfc0bb51695f39c9edbba128..57eecef889efb0abd6c5bf25dd3764a6095d562f 100644 (file)
--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -26,5 +26,4 @@ const ScanKeyword ScanKeywords[] = {
  #include "parser/kwlist.h"
  };
  
-/* End of ScanKeywords, for use in kwlookup.c */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int      NumScanKeywords = lengthof(ScanKeywords);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 14 Jul 2009 20:24:10 +0000 (20:24 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Tue, 14 Jul 2009 20:24:10 +0000 (20:24 +0000)
src/backend/parser/gram.y		patch \| blob \| blame \| history
src/backend/parser/keywords.c		patch \| blob \| blame \| history
src/backend/parser/kwlookup.c		patch \| blob \| blame \| history
src/backend/parser/parser.c		patch \| blob \| blame \| history
src/backend/parser/scan.l		patch \| blob \| blame \| history
src/backend/utils/adt/misc.c		patch \| blob \| blame \| history
src/backend/utils/adt/ruleutils.c		patch \| blob \| blame \| history
src/bin/pg_dump/dumputils.c		patch \| blob \| blame \| history
src/bin/pg_dump/keywords.c		patch \| blob \| blame \| history
src/include/parser/gramparse.h		patch \| blob \| blame \| history
src/include/parser/keywords.h		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/c_keywords.c		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/ecpg_keywords.c		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/extern.h		patch \| blob \| blame \| history
src/interfaces/ecpg/preproc/keywords.c		patch \| blob \| blame \| history