Improve behavior of to_timestamp()/to_date() functions

author Alexander Korotkov <akorotkov@postgresql.org>

Sun, 9 Sep 2018 18:19:51 +0000 (21:19 +0300)

committer Alexander Korotkov <akorotkov@postgresql.org>

Sun, 9 Sep 2018 18:19:51 +0000 (21:19 +0300)
author Alexander Korotkov <akorotkov@postgresql.org>
Sun, 9 Sep 2018 18:19:51 +0000 (21:19 +0300)
committer Alexander Korotkov <akorotkov@postgresql.org>
Sun, 9 Sep 2018 18:19:51 +0000 (21:19 +0300)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml

index 335900a86ef10d2a77dba8a8829b741a97681187..4331bebc96b87440bee066d48dd11550f41c3e64 100644 (file)
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -6262,16 +6262,57 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
       <listitem>
        <para>
         <function>to_timestamp</function> and <function>to_date</function>
-       skip multiple blank spaces in the input string unless the
-       <literal>FX</literal> option is used. For example,
-       <literal>to_timestamp('2000&nbsp;&nbsp;&nbsp;&nbsp;JUN', 'YYYY MON')</literal> works, but
+       skip multiple blank spaces at the beginning of the input string and
+       around date and time values unless the <literal>FX</literal> option is used.  For example,
+       <literal>to_timestamp('&nbsp;2000&nbsp;&nbsp;&nbsp;&nbsp;JUN', 'YYYY MON')</literal> and
+       <literal>to_timestamp('2000 - JUN', 'YYYY-MON')</literal> work, but
         <literal>to_timestamp('2000&nbsp;&nbsp;&nbsp;&nbsp;JUN', 'FXYYYY MON')</literal> returns an error
-       because <function>to_timestamp</function> expects one space only.
+       because <function>to_timestamp</function> expects a single space only.
         <literal>FX</literal> must be specified as the first item in
         the template.
        </para>
       </listitem>
  
+     <listitem>
+      <para>
+       A separator (a space or a non-letter/non-digit character) in the template string of
+       <function>to_timestamp</function> and <function>to_date</function>
+       matches any single separator in the input string or is skipped,
+       unless the <literal>FX</literal> option is used.
+       For example, <literal>to_timestamp('2000JUN', 'YYYY///MON')</literal> and
+       <literal>to_timestamp('2000/JUN', 'YYYY MON')</literal> work, but
+       <literal>to_timestamp('2000//JUN', 'YYYY/MON')</literal>
+       returns an error because the number of separators in the input string
+       exceeds the number of separators in the template.
+      </para>
+      <para>
+       If <literal>FX</literal> is specified, separator in template string
+       matches to exactly one character in input string.  Notice we don't insist
+       input string character to be the same as template string separator.
+       For example, <literal>to_timestamp('2000/JUN', 'FXYYYY MON')</literal>
+       works, but <literal>to_timestamp('2000/JUN', 'FXYYYY&nbsp;&nbsp;MON')</literal>
+       returns an error because a space second template string space consumed
+       letter <literal>J</literal> from the input string.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       <literal>TZH</literal> template pattern can match a signed number.
+       Without the <literal>FX</literal> option, it may lead to ambiguity in
+       interpretation of the minus sign, which can also be interpreted as a separator.
+       This ambiguity is resolved as follows.  If the number of separators before
+       <literal>TZH</literal> in the template string is less than the number of
+       separators before the minus sign in the input string, the minus sign
+       is interpreted as part of <literal>TZH</literal>.
+       Otherwise, the minus sign is considered to be a separator between values.
+       For example, <literal>to_timestamp('2000 -10', 'YYYY TZH')</literal> matches
+       <literal>-10</literal> to <literal>TZH</literal>, but
+       <literal>to_timestamp('2000 -10', 'YYYY&nbsp;&nbsp;TZH')</literal>
+       matches <literal>10</literal> to <literal>TZH</literal>.
+      </para>
+     </listitem>
+
       <listitem>
        <para>
         Ordinary text is allowed in <function>to_char</function>
@@ -6287,6 +6328,19 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
         string; for example <literal>"XX"</literal> skips two input characters
         (whether or not they are <literal>XX</literal>).
        </para>
+      <tip>
+        <para>
+          Prior to <productname>PostgreSQL</productname> 12, it was possible to
+          skip arbitrary text in the input string using non-letter or non-digit
+          characters. For example,
+          <literal>to_timestamp('2000y6m1d', 'yyyy-MM-DD')</literal> used to
+          work.  Now you can only use letter characters for this purpose.  For example,
+          <literal>to_timestamp('2000y6m1d', 'yyyytMMtDDt')</literal> and
+          <literal>to_timestamp('2000y6m1d', 'yyyy"y"MM"m"DD"d"')</literal>
+          skip <literal>y</literal>, <literal>m</literal>, and
+          <literal>d</literal>.
+        </para>
+      </tip>
       </listitem>
  
       <listitem>
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c

index 30696e3575dc778f1ee0fe3c01be61f44bbb7179..2ed8ca675bde9bf15049f1c2482d99e70447ef2c 100644 (file)
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -165,6 +165,8 @@ typedef struct
  #define NODE_TYPE_END      1
  #define NODE_TYPE_ACTION   2
  #define NODE_TYPE_CHAR     3
+#define NODE_TYPE_SEPARATOR    4
+#define NODE_TYPE_SPACE        5
  
  #define SUFFTYPE_PREFIX        1
  #define SUFFTYPE_POSTFIX   2
@@ -955,6 +957,7 @@ typedef struct NUMProc
  static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
                  const int *index);
  static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
+static bool is_separator_char(const char *str);
  static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
  static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
              const KeySuffix *suf, const int *index, int ver, NUMDesc *Num);
@@ -1044,6 +1047,16 @@ suff_search(const char *str, const KeySuffix *suf, int type)
     return NULL;
  }
  
+static bool
+is_separator_char(const char *str)
+{
+   /* ASCII printable character, but not letter or digit */
+   return (*str > 0x20 && *str < 0x7F &&
+           !(*str >= 'A' && *str <= 'Z') &&
+           !(*str >= 'a' && *str <= 'z') &&
+           !(*str >= '0' && *str <= '9'));
+}
+
  /* ----------
   * Prepare NUMDesc (number description struct) via FormatNode struct
   * ----------
@@ -1319,7 +1332,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw,
                 if (*str == '\\' && *(str + 1) == '"')
                     str++;
                 chlen = pg_mblen(str);
-               n->type = NODE_TYPE_CHAR;
+
+               if (ver == DCH_TYPE && is_separator_char(str))
+                   n->type = NODE_TYPE_SEPARATOR;
+               else if (isspace((unsigned char) *str))
+                   n->type = NODE_TYPE_SPACE;
+               else
+                   n->type = NODE_TYPE_CHAR;
+
                 memcpy(n->character, str, chlen);
                 n->character[chlen] = '\0';
                 n->key = NULL;
@@ -2987,27 +3007,66 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
     int         len,
                 value;
     bool        fx_mode = false;
+   /* number of extra skipped characters (more than given in format string) */
+   int         extra_skip = 0;
  
     for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
     {
-       if (n->type != NODE_TYPE_ACTION)
+       /*
+        * Ignore spaces at the beginning of the string and before fields when
+        * not in FX (fixed width) mode.
+        */
+       if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
+           (n->type == NODE_TYPE_ACTION || n == node))
+       {
+           while (*s != '\0' && isspace((unsigned char) *s))
+           {
+               s++;
+               extra_skip++;
+           }
+       }
+
+       if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
+       {
+           if (!fx_mode)
+           {
+               /*
+                * In non FX (fixed format) mode one format string space or
+                * separator match to one space or separator in input string.
+                * Or match nothing if there is no space or separator in
+                * the current position of input string.
+                */
+               extra_skip--;
+               if (isspace((unsigned char) *s) || is_separator_char(s))
+               {
+                   s++;
+                   extra_skip++;
+               }
+           }
+           else
+           {
+               /*
+                * In FX mode, on format string space or separator we consume
+                * exactly one character from input string.  Notice we don't
+                * insist that the consumed character match the format's
+                * character.
+                */
+               s += pg_mblen(s);
+           }
+           continue;
+       }
+       else if (n->type != NODE_TYPE_ACTION)
         {
             /*
-            * Separator, so consume one character from input string.  Notice
-            * we don't insist that the consumed character match the format's
-            * character.
+            * Text character, so consume one character from input string.
+            * Notice we don't insist that the consumed character match the
+            * format's character.
+            * Text field ignores FX mode.
              */
             s += pg_mblen(s);
             continue;
         }
  
-       /* Ignore spaces before fields when not in FX (fixed width) mode */
-       if (!fx_mode && n->key->id != DCH_FX)
-       {
-           while (*s != '\0' && isspace((unsigned char) *s))
-               s++;
-       }
-
         from_char_set_mode(out, n->key->date_mode);
  
         switch (n->key->id)
@@ -3086,10 +3145,24 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
                                 n->key->name)));
                 break;
             case DCH_TZH:
-               out->tzsign = *s == '-' ? -1 : +1;
-
+               /*
+                * Value of TZH might be negative.  And the issue is that we
+                * might swallow minus sign as the separator.  So, if we have
+                * skipped more characters than specified in the format string,
+                * then we consider prepending last skipped minus to TZH.
+                */
                 if (*s == '+' || *s == '-' || *s == ' ')
+               {
+                   out->tzsign = *s == '-' ? -1 : +1;
                     s++;
+               }
+               else
+               {
+                   if (extra_skip > 0 && *(s - 1) == '-')
+                       out->tzsign = -1;
+                   else
+                       out->tzsign = +1;
+               }
  
                 from_char_parse_int_len(&out->tzh, &s, 2, n);
                 break;
@@ -3261,6 +3334,17 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out)
                 SKIP_THth(s, n->suffix);
                 break;
         }
+
+       /* Ignore all spaces after fields */
+       if (!fx_mode)
+       {
+           extra_skip = 0;
+           while (*s != '\0' && isspace((unsigned char) *s))
+           {
+               s++;
+               extra_skip++;
+           }
+       }
     }
  }
  
diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out

index 63e39198e686be04555b33292aac5dbeaccd6554..7d11f25158491ab3ba779fce526ad7ba69952619 100644 (file)
--- a/src/test/regress/expected/horology.out
+++ b/src/test/regress/expected/horology.out
@@ -2769,14 +2769,32 @@ SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS');
   Sat Feb 16 08:14:30 0097 PST
  (1 row)
  
+SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS');
+         to_timestamp         
+------------------------------
+ Fri Mar 18 23:38:15 2011 PDT
+(1 row)
+
  SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD');
           to_timestamp         
  ------------------------------
   Sat Jan 12 00:00:00 1985 PST
  (1 row)
  
+SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD');
+         to_timestamp         
+------------------------------
+ Sat Jan 12 00:00:00 1985 PST
+(1 row)
+
+SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD');
+         to_timestamp         
+------------------------------
+ Sat Jan 12 00:00:00 1985 PST
+(1 row)
+
  SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16',
-                    '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD');
+                    '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD');
           to_timestamp         
  ------------------------------
   Sun May 16 00:00:00 1976 PDT
@@ -2789,7 +2807,7 @@ SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD');
  (1 row)
  
  SELECT to_timestamp('15 "text between quote marks" 98 54 45',
-                    E'HH24 "\\text between quote marks\\"" YY MI SS');
+                    E'HH24 "\\"text between quote marks\\"" YY MI SS');
           to_timestamp         
  ------------------------------
   Thu Jan 01 15:54:45 1998 PST
@@ -2810,6 +2828,24 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay');
  SELECT to_timestamp('97/Feb/16', 'YYMonDD');
  ERROR:  invalid value "/Fe" for "Mon"
  DETAIL:  The given value did not match any of the allowed values for this field.
+SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD');
+         to_timestamp         
+------------------------------
+ Sun Feb 16 00:00:00 1997 PST
+(1 row)
+
+SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD');
+         to_timestamp         
+------------------------------
+ Sun Feb 16 00:00:00 1997 PST
+(1 row)
+
+SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD');
+         to_timestamp         
+------------------------------
+ Sun Feb 16 00:00:00 1997 PST
+(1 row)
+
  SELECT to_timestamp('19971116', 'YYYYMMDD');
           to_timestamp         
  ------------------------------
@@ -2966,7 +3002,7 @@ SELECT to_timestamp('2011-12-18 11:38 20',     'YYYY-MM-DD HH12:MI TZM');
  SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD  HH24:MI:SS');
           to_timestamp         
  ------------------------------
- Sun Dec 18 03:38:15 2011 PST
+ Sun Dec 18 23:38:15 2011 PST
  (1 row)
  
  SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD  HH24:MI:SS');
@@ -2996,7 +3032,64 @@ SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD  HH24:MI:SS');
  SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD   HH24:MI:SS');
           to_timestamp         
  ------------------------------
- Sun Dec 18 03:38:15 2011 PST
+ Sun Dec 18 23:38:15 2011 PST
+(1 row)
+
+SELECT to_timestamp('2000+   JUN', 'YYYY/MON');
+         to_timestamp         
+------------------------------
+ Thu Jun 01 00:00:00 2000 PDT
+(1 row)
+
+SELECT to_timestamp('  2000 +JUN', 'YYYY/MON');
+         to_timestamp         
+------------------------------
+ Thu Jun 01 00:00:00 2000 PDT
+(1 row)
+
+SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON');
+         to_timestamp         
+------------------------------
+ Thu Jun 01 00:00:00 2000 PDT
+(1 row)
+
+SELECT to_timestamp('2000  +JUN', 'YYYY//MON');
+         to_timestamp         
+------------------------------
+ Thu Jun 01 00:00:00 2000 PDT
+(1 row)
+
+SELECT to_timestamp('2000 + JUN', 'YYYY MON');
+         to_timestamp         
+------------------------------
+ Thu Jun 01 00:00:00 2000 PDT
+(1 row)
+
+SELECT to_timestamp('2000 ++ JUN', 'YYYY  MON');
+         to_timestamp         
+------------------------------
+ Thu Jun 01 00:00:00 2000 PDT
+(1 row)
+
+SELECT to_timestamp('2000 + + JUN', 'YYYY  MON');
+ERROR:  invalid value "+ J" for "MON"
+DETAIL:  The given value did not match any of the allowed values for this field.
+SELECT to_timestamp('2000 + + JUN', 'YYYY   MON');
+         to_timestamp         
+------------------------------
+ Thu Jun 01 00:00:00 2000 PDT
+(1 row)
+
+SELECT to_timestamp('2000 -10', 'YYYY TZH');
+         to_timestamp         
+------------------------------
+ Sat Jan 01 02:00:00 2000 PST
+(1 row)
+
+SELECT to_timestamp('2000 -10', 'YYYY  TZH');
+         to_timestamp         
+------------------------------
+ Fri Dec 31 06:00:00 1999 PST
  (1 row)
  
  SELECT to_date('2011 12  18', 'YYYY MM DD');
@@ -3014,13 +3107,13 @@ SELECT to_date('2011 12  18', 'YYYY MM  DD');
  SELECT to_date('2011 12  18', 'YYYY MM   DD');
    to_date   
  ------------
- 12-08-2011
+ 12-18-2011
  (1 row)
  
  SELECT to_date('2011 12 18', 'YYYY  MM DD');
    to_date   
  ------------
- 02-18-2011
+ 12-18-2011
  (1 row)
  
  SELECT to_date('2011  12 18', 'YYYY  MM DD');
diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql

index ebb196a1cfc36bc133a3b73861360cf97d409e91..807037be76ef6f326a23882037ef2cb522d50bda 100644 (file)
--- a/src/test/regress/sql/horology.sql
+++ b/src/test/regress/sql/horology.sql
@@ -392,15 +392,21 @@ SELECT to_timestamp('0097/Feb/16 --> 08:14:30', 'YYYY/Mon/DD --> HH:MI:SS');
  
  SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS');
  
+SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS');
+
  SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD');
  
+SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD');
+
+SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD');
+
  SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16',
-                    '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD');
+                    '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD');
  
  SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD');
  
  SELECT to_timestamp('15 "text between quote marks" 98 54 45',
-                    E'HH24 "\\text between quote marks\\"" YY MI SS');
+                    E'HH24 "\\"text between quote marks\\"" YY MI SS');
  
  SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY');
  
@@ -408,6 +414,12 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay');
  
  SELECT to_timestamp('97/Feb/16', 'YYMonDD');
  
+SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD');
+
+SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD');
+
+SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD');
+
  SELECT to_timestamp('19971116', 'YYYYMMDD');
  
  SELECT to_timestamp('20000-1116', 'YYYY-MMDD');
@@ -464,6 +476,17 @@ SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD HH24:MI:SS');
  SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD  HH24:MI:SS');
  SELECT to_timestamp('2011-12-18  23:38:15', 'YYYY-MM-DD   HH24:MI:SS');
  
+SELECT to_timestamp('2000+   JUN', 'YYYY/MON');
+SELECT to_timestamp('  2000 +JUN', 'YYYY/MON');
+SELECT to_timestamp(' 2000 +JUN', 'YYYY//MON');
+SELECT to_timestamp('2000  +JUN', 'YYYY//MON');
+SELECT to_timestamp('2000 + JUN', 'YYYY MON');
+SELECT to_timestamp('2000 ++ JUN', 'YYYY  MON');
+SELECT to_timestamp('2000 + + JUN', 'YYYY  MON');
+SELECT to_timestamp('2000 + + JUN', 'YYYY   MON');
+SELECT to_timestamp('2000 -10', 'YYYY TZH');
+SELECT to_timestamp('2000 -10', 'YYYY  TZH');
+
  SELECT to_date('2011 12  18', 'YYYY MM DD');
  SELECT to_date('2011 12  18', 'YYYY MM  DD');
  SELECT to_date('2011 12  18', 'YYYY MM   DD');
author	Alexander Korotkov <akorotkov@postgresql.org>
	Sun, 9 Sep 2018 18:19:51 +0000 (21:19 +0300)
committer	Alexander Korotkov <akorotkov@postgresql.org>
	Sun, 9 Sep 2018 18:19:51 +0000 (21:19 +0300)
doc/src/sgml/func.sgml		patch \| blob \| blame \| history
src/backend/utils/adt/formatting.c		patch \| blob \| blame \| history
src/test/regress/expected/horology.out		patch \| blob \| blame \| history
src/test/regress/sql/horology.sql		patch \| blob \| blame \| history