I've attached new version of the patch. It is a little bit simpler now than the previous one. The patch doesn't handle backslashes now, since there was a commit which fixes quoted-substring handling recently. Anyway I'm not sure that this handling was necessary.
I've checked queries from this thread. It seems that they give right timestamps and work like in Oracle (except different messages). The patch contains documentation and regression test fixes. On Sun, Nov 19, 2017 at 12:26:39PM -0500, Tom Lane wrote: > I don't much like the error message that you've got: > > +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); > +ERROR: unexpected character "/", expected ":" > +DETAIL: The given value did not match any of the allowed values for this > field. > > The DETAIL message seems to have been copied-and-pasted into a context > where it's not terribly accurate. I'd consider replacing it with > something along the lines of "HINT: In FX mode, punctuation in the input > string must exactly match the format string." This way the report will > contain a pretty clear statement of the new rule that was broken. (BTW, > it's a hint not a detail because it might be guessing wrong as to what > the real problem is.) > > regards, tom lane Messages have the following format now: SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); ERROR: unexpected character "/", expected ":" HINT: In FX mode, punctuation in the input string must exactly match the format string. -- Arthur Zakirov Postgres Professional: http://www.postgrespro.com Russian Postgres Company
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 698daf69ea..1c8541d552 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6166,7 +6166,8 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); <function>to_timestamp</function> and <function>to_date</function> skip multiple blank spaces in the input string unless the <literal>FX</literal> option is used. For example, - <literal>to_timestamp('2000 JUN', 'YYYY MON')</literal> works, but + <literal>to_timestamp('2000 JUN', 'YYYY MON')</literal> and + <literal>to_timestamp('2000 JUN', 'YYYY MON')</literal> work, but <literal>to_timestamp('2000 JUN', 'FXYYYY MON')</literal> returns an error because <function>to_timestamp</function> expects one space only. <literal>FX</literal> must be specified as the first item in @@ -6174,6 +6175,19 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); </para> </listitem> + <listitem> + <para> + <function>to_timestamp</function> and <function>to_date</function> don't + skip multiple printable non letter and non digit characters in the input + string, but skip them in the formatting string. For example, + <literal>to_timestamp('2000-JUN', 'YYYY/MON')</literal> and + <literal>to_timestamp('2000/JUN', 'YYYY//MON')</literal> work, but + <literal>to_timestamp('2000//JUN', 'YYYY/MON')</literal> + returns an error because count of the "/" character in the input string + doesn't match count of it in the formatting string. + </para> + </listitem> + <listitem> <para> Ordinary text is allowed in <function>to_char</function> diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index ec97de0ad2..81565aab96 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -171,6 +171,8 @@ typedef struct #define NODE_TYPE_END 1 #define NODE_TYPE_ACTION 2 #define NODE_TYPE_CHAR 3 +#define NODE_TYPE_SEPARATOR 4 +#define NODE_TYPE_SPACE 5 #define SUFFTYPE_PREFIX 1 #define SUFFTYPE_POSTFIX 2 @@ -953,6 +955,7 @@ typedef struct NUMProc static const KeyWord *index_seq_search(const char *str, const KeyWord *kw, const int *index); static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type); +static bool is_separator_char(const char *str); static void NUMDesc_prepare(NUMDesc *num, FormatNode *n); static void parse_format(FormatNode *node, const char *str, const KeyWord *kw, const KeySuffix *suf, const int *index, int ver, NUMDesc *Num); @@ -969,7 +972,6 @@ static void dump_node(FormatNode *node, int max); static const char *get_th(char *num, int type); static char *str_numth(char *dest, char *num, int type); static int adjust_partial_year_to_2020(int year); -static int strspace_len(char *str); static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode); static void from_char_set_int(int *dest, const int value, const FormatNode *node); static int from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node); @@ -1042,6 +1044,16 @@ suff_search(const char *str, const KeySuffix *suf, int type) return NULL; } +static bool +is_separator_char(const char *str) +{ + /* ASCII printable character, but not letter or digit */ + return (*str > 0x20 && *str < 0x7F && + !(*str >= 'A' && *str <= 'Z') && + !(*str >= 'a' && *str <= 'z') && + !(*str >= '0' && *str <= '9')); +} + /* ---------- * Prepare NUMDesc (number description struct) via FormatNode struct * ---------- @@ -1317,7 +1329,14 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, if (*str == '\\' && *(str + 1) == '"') str++; chlen = pg_mblen(str); - n->type = NODE_TYPE_CHAR; + + if (ver == DCH_TYPE && is_separator_char(str)) + n->type = NODE_TYPE_SEPARATOR; + else if (isspace((unsigned char) *str)) + n->type = NODE_TYPE_SPACE; + else + n->type = NODE_TYPE_CHAR; + memcpy(n->character, str, chlen); n->character[chlen] = '\0'; n->key = NULL; @@ -2115,19 +2134,6 @@ adjust_partial_year_to_2020(int year) } -static int -strspace_len(char *str) -{ - int len = 0; - - while (*str && isspace((unsigned char) *str)) - { - str++; - len++; - } - return len; -} - /* * Set the date mode of a from-char conversion. * @@ -2197,11 +2203,6 @@ from_char_parse_int_len(int *dest, char **src, const int len, FormatNode *node) char *init = *src; int used; - /* - * Skip any whitespace before parsing the integer. - */ - *src += strspace_len(*src); - Assert(len <= DCH_MAX_ITEM_SIZ); used = (int) strlcpy(copy, *src, len + 1); @@ -2975,17 +2976,60 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++) { - if (n->type != NODE_TYPE_ACTION) + if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR) + { + /* + * In non FX (fixed format) mode we don't insist that the consumed + * character matches the format's character. + */ + if (!fx_mode) + { + if (isspace((unsigned char) *s) || is_separator_char(s)) + s++; + + continue; + } + + /* + * In FX mode we insist that whitespace from the format string + * matches whitespace from the input string. + */ + if (n->type == NODE_TYPE_SPACE && !isspace((unsigned char) *s)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unexpected character \"%.*s\", expected \"%s\"", + pg_mblen(s), s, n->character), + errhint("In FX mode, punctuation in the input string " + "must exactly match the format string."))); + /* + * In FX mode we insist that separator character from the format + * string matches separator character from the input string. + */ + else if (n->type == NODE_TYPE_SEPARATOR && *n->character != *s) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unexpected character \"%.*s\", expected \"%s\"", + pg_mblen(s), s, n->character), + errhint("In FX mode, punctuation in the input string " + "must exactly match the format string."))); + + s++; + continue; + } + else if (n->type == NODE_TYPE_CHAR) { /* - * Separator, so consume one character from input string. Notice - * we don't insist that the consumed character match the format's - * character. + * Text character, so consume one character from input string. + * Notice we don't insist that the consumed character match the + * format's character. + * Text field ignores FX mode. */ s += pg_mblen(s); continue; } + /* n->type == NODE_TYPE_ACTION */ + /* Ignore spaces before fields when not in FX (fixed width) mode */ if (!fx_mode && n->key->id != DCH_FX) { @@ -2993,6 +3037,31 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) s++; } + if (!fx_mode) + { + /* + * Lost separator character from the input string. Separator + * character should match at least one space or separator character + * from the format string. + */ + if (is_separator_char(s)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unexpected character \"%c\"", *s), + errhint("Separator character from the input string " + "must match at least one space or separator " + "character from the format string."))); + } + /* + * In FX mode we don't expect a whitespace or a character. + */ + else if (isspace((unsigned char) *s) || is_separator_char(s)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_DATETIME_FORMAT), + errmsg("unexpected character \"%c\"", *s), + errhint("In FX mode, punctuation in the input string " + "must exactly match the format string."))); + from_char_set_mode(out, n->key->date_mode); switch (n->key->id) @@ -3232,6 +3301,13 @@ DCH_from_char(FormatNode *node, char *in, TmFromChar *out) SKIP_THth(s, n->suffix); break; } + + /* Ignore all spaces after fields */ + if (!fx_mode) + { + while (*s != '\0' && isspace((unsigned char) *s)) + s++; + } } } diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index 7b3d058425..54a42ee42e 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -2769,14 +2769,32 @@ SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS'); Sat Feb 16 08:14:30 0097 PST (1 row) +SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS'); + to_timestamp +------------------------------ + Fri Mar 18 23:38:15 2011 PDT +(1 row) + SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD'); to_timestamp ------------------------------ Sat Jan 12 00:00:00 1985 PST (1 row) +SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); + to_timestamp +------------------------------ + Sat Jan 12 00:00:00 1985 PST +(1 row) + +SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); + to_timestamp +------------------------------ + Sat Jan 12 00:00:00 1985 PST +(1 row) + SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', - '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD'); + '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD'); to_timestamp ------------------------------ Sun May 16 00:00:00 1976 PDT @@ -2789,7 +2807,7 @@ SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD'); (1 row) SELECT to_timestamp('15 "text between quote marks" 98 54 45', - E'HH24 "\\text between quote marks\\"" YY MI SS'); + E'HH24 "\\"text between quote marks\\"" YY MI SS'); to_timestamp ------------------------------ Thu Jan 01 15:54:45 1998 PST @@ -2808,8 +2826,23 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay'); (1 row) SELECT to_timestamp('97/Feb/16', 'YYMonDD'); -ERROR: invalid value "/Fe" for "Mon" -DETAIL: The given value did not match any of the allowed values for this field. +ERROR: unexpected character "/" +HINT: Separator character from the input string must match at least one space or separator character from the format string. +SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); +ERROR: unexpected character "/", expected ":" +HINT: In FX mode, punctuation in the input string must exactly match the format string. +SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); + to_timestamp +------------------------------ + Sun Feb 16 00:00:00 1997 PST +(1 row) + SELECT to_timestamp('19971116', 'YYYYMMDD'); to_timestamp ------------------------------ @@ -2936,7 +2969,7 @@ SELECT to_timestamp('2011-12-18 11:38 PM', 'YYYY-MM-DD HH12:MI PM'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST (1 row) SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); @@ -2966,9 +2999,33 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); to_timestamp ------------------------------ - Sun Dec 18 03:38:15 2011 PST + Sun Dec 18 23:38:15 2011 PST (1 row) +SELECT to_timestamp('2000----JUN', 'YYYY MON'); +ERROR: unexpected character "-" +HINT: Separator character from the input string must match at least one space or separator character from the format string. +SELECT to_timestamp('2000 + JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +ERROR: unexpected character "+" +HINT: Separator character from the input string must match at least one space or separator character from the format string. +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); + to_timestamp +------------------------------ + Thu Jun 01 00:00:00 2000 PDT +(1 row) + +SELECT to_timestamp('2000--JUN', 'YYYY/MON'); +ERROR: unexpected character "-" +HINT: Separator character from the input string must match at least one space or separator character from the format string. +SELECT to_timestamp('2000 JUN', 'FXYYYY MON'); +ERROR: unexpected character " " +HINT: In FX mode, punctuation in the input string must exactly match the format string. SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ @@ -2984,13 +3041,13 @@ SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 12-08-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); to_date ------------ - 02-18-2011 + 12-18-2011 (1 row) SELECT to_date('2011 12 18', 'YYYY MM DD'); diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql index a7bc9dcfc4..1dbaf8ee46 100644 --- a/src/test/regress/sql/horology.sql +++ b/src/test/regress/sql/horology.sql @@ -392,15 +392,21 @@ SELECT to_timestamp('0097/Feb/16 --> 08:14:30', 'YYYY/Mon/DD --> HH:MI:SS'); SELECT to_timestamp('97/2/16 8:14:30', 'FMYYYY/FMMM/FMDD FMHH:FMMI:FMSS'); +SELECT to_timestamp('2011$03!18 23_38_15', 'YYYY-MM-DD HH24:MI:SS'); + SELECT to_timestamp('1985 January 12', 'YYYY FMMonth DD'); +SELECT to_timestamp('1985 FMMonth 12', 'YYYY "FMMonth" DD'); + +SELECT to_timestamp('1985 \ 12', 'YYYY \\ DD'); + SELECT to_timestamp('My birthday-> Year: 1976, Month: May, Day: 16', - '"My birthday-> Year" YYYY, "Month:" FMMonth, "Day:" DD'); + '"My birthday-> Year:" YYYY, "Month:" FMMonth, "Day:" DD'); SELECT to_timestamp('1,582nd VIII 21', 'Y,YYYth FMRM DD'); SELECT to_timestamp('15 "text between quote marks" 98 54 45', - E'HH24 "\\text between quote marks\\"" YY MI SS'); + E'HH24 "\\"text between quote marks\\"" YY MI SS'); SELECT to_timestamp('05121445482000', 'MMDDHH24MISSYYYY'); @@ -408,6 +414,12 @@ SELECT to_timestamp('2000January09Sunday', 'YYYYFMMonthDDFMDay'); SELECT to_timestamp('97/Feb/16', 'YYMonDD'); +SELECT to_timestamp('97/Feb/16', 'YY:Mon:DD'); + +SELECT to_timestamp('97/Feb/16', 'FXYY:Mon:DD'); + +SELECT to_timestamp('97/Feb/16', 'FXYY/Mon/DD'); + SELECT to_timestamp('19971116', 'YYYYMMDD'); SELECT to_timestamp('20000-1116', 'YYYY-MMDD'); @@ -458,6 +470,13 @@ SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); SELECT to_timestamp('2011-12-18 23:38:15', 'YYYY-MM-DD HH24:MI:SS'); +SELECT to_timestamp('2000----JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +SELECT to_timestamp('2000 + + JUN', 'YYYY MON'); +SELECT to_timestamp('2000--JUN', 'YYYY/MON'); +SELECT to_timestamp('2000 JUN', 'FXYYYY MON'); + SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD'); SELECT to_date('2011 12 18', 'YYYY MM DD');