2016-02-10 19:26 GMT+01:00 Jim Nasby <[email protected]>:
> The following review has been posted through the commitfest application:
> make installcheck-world: tested, passed
> Implements feature: tested, passed
> Spec compliant: not tested
> Documentation: not tested
>
> NEEDS CATVERSION BUMP
>
> I editorialized the docs and some of the comments. In particular, I
> documented behavior of not truncating, and recommended casting to name[] if
> user cares about that. Added a unit test to verify that works. BTW, I saw
> mention in the thread about not truncated spaces, but the function *does*
> truncate them, unless they're inside quotes, where they're legitimate.
>
ok
>
> Also added test for invalid characters.
>
> I think "strict" would be more in line with other uses in code. There are
> currently no other occurrences of 'strictmode' in the code. There are loads
> of references to 'strict', but I didn't go through all of them to see if
> any were used as externally visible function parameter names.
>
I am sorry, I don't understand to this point. You unlike the name of
parameter "strictmode" ? Have you any proposal? Maybe "restrictive" ?
>
> qualname_str is used in exactly 1 place. Either it should be gotten rid
> of, or all the uses of text_to_cstring(qualname) should be changed to
> qualname_str.
>
fixed, qualname_str is used everywhere
>
> I think the code would have been clearer if instead of the big if (*nextp
> == '\"') it did the same "inquote" looping that is done elsewhere, but I
> don't have a strong opinion on it.
>
The almost all code +/- is related to human readable error messages. We can
move some code to separate static functions - read_quoted_ident,
read_unquoted_ident, but there will be some magic about parameters, and the
code will not be much better, than it is now.
>
> The new status of this patch is: Waiting on Author
>
Thank you for your work on documentation.
I am sending updated version of this patch.
Regards
Pavel
>
> --
> Sent via pgsql-hackers mailing list ([email protected])
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers
>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
new file mode 100644
index f9eea76..9eed19a
*** a/doc/src/sgml/func.sgml
--- b/doc/src/sgml/func.sgml
***************
*** 1821,1826 ****
--- 1821,1847 ----
<row>
<entry>
<indexterm>
+ <primary>parse_ident</primary>
+ </indexterm>
+ <literal><function>parse_ident(<parameter>str</parameter> <type>text</type>,
+ [ <parameter>strictmode</parameter> <type>boolean</type> DEFAULT true ] )</function></literal>
+ </entry>
+ <entry><type>text[]</type></entry>
+ <entry>Split <parameter>qualified identifier</parameter> into array <parameter>parts</parameter>.
+ When <parameter>strictmode</parameter> is false, extra characters after the identifier are ignored.
+ This is useful for parsing identifiers for objects like functions and arrays that may have trailing
+ characters. By default, extra characters after the last identifier are considered an error.
+ second parameter is false, then chars after last identifier are ignored. Note that this function
+ does not truncate quoted identifiers. If you care about that you should cast the result of this
+ function to name[].
+ </entry>
+ <entry><literal>parse_ident('"SomeSchema".someTable')</literal></entry>
+ <entry><literal>"SomeSchema,sometable"</literal></entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
<primary>pg_client_encoding</primary>
</indexterm>
<literal><function>pg_client_encoding()</function></literal>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
new file mode 100644
index 923fe58..61d5b80
*** a/src/backend/catalog/system_views.sql
--- b/src/backend/catalog/system_views.sql
*************** RETURNS jsonb
*** 965,967 ****
--- 965,974 ----
LANGUAGE INTERNAL
STRICT IMMUTABLE
AS 'jsonb_set';
+
+ CREATE OR REPLACE FUNCTION
+ parse_ident(str text, strictmode boolean DEFAULT true)
+ RETURNS text[]
+ LANGUAGE INTERNAL
+ STRICT IMMUTABLE
+ AS 'parse_ident';
diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c
new file mode 100644
index 2b4ab20..7aa5b76
*** a/src/backend/parser/scansup.c
--- b/src/backend/parser/scansup.c
*************** scanstr(const char *s)
*** 130,135 ****
--- 130,144 ----
char *
downcase_truncate_identifier(const char *ident, int len, bool warn)
{
+ return downcase_identifier(ident, len, warn, true);
+ }
+
+ /*
+ * a workhorse for downcase_truncate_identifier
+ */
+ char *
+ downcase_identifier(const char *ident, int len, bool warn, bool truncate)
+ {
char *result;
int i;
bool enc_is_single_byte;
*************** downcase_truncate_identifier(const char
*** 158,169 ****
}
result[i] = '\0';
! if (i >= NAMEDATALEN)
truncate_identifier(result, i, warn);
return result;
}
/*
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
*
--- 167,179 ----
}
result[i] = '\0';
! if (i >= NAMEDATALEN && truncate)
truncate_identifier(result, i, warn);
return result;
}
+
/*
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
*
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
new file mode 100644
index 43f36db..778b71a
*** a/src/backend/utils/adt/misc.c
--- b/src/backend/utils/adt/misc.c
***************
*** 21,32 ****
--- 21,35 ----
#include <unistd.h>
#include "access/sysattr.h"
+ #include "access/htup_details.h"
#include "catalog/catalog.h"
+ #include "catalog/namespace.h"
#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
#include "funcapi.h"
#include "miscadmin.h"
+ #include "parser/scansup.h"
#include "parser/keywords.h"
#include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h"
***************
*** 38,43 ****
--- 41,47 ----
#include "utils/ruleutils.h"
#include "tcop/tcopprot.h"
#include "utils/acl.h"
+ #include "utils/array.h"
#include "utils/builtins.h"
#include "utils/timestamp.h"
*************** pg_column_is_updatable(PG_FUNCTION_ARGS)
*** 719,721 ****
--- 723,895 ----
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
}
+
+
+ /*
+ * This simple parser utility are compatible with lexer implementation,
+ * used only in parse_ident function
+ */
+ static bool
+ is_ident_start(unsigned char c)
+ {
+ if (c == '_')
+ return true;
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ return true;
+
+ if (c >= 0200 && c <= 0377)
+ return true;
+
+ return false;
+ }
+
+ static bool
+ is_ident_cont(unsigned char c)
+ {
+ if (c >= '0' && c <= '9')
+ return true;
+
+ return is_ident_start(c);
+ }
+
+ /*
+ * parse_ident - parse SQL composed identifier to separate identifiers.
+ * When strict mode is active (second parameter), then any chars after
+ * last identifiers are disallowed.
+ */
+ Datum
+ parse_ident(PG_FUNCTION_ARGS)
+ {
+ text *qualname;
+ char *qualname_str;
+ bool strict_mode;
+ ArrayBuildState *astate = NULL;
+ char *nextp;
+ bool after_dot = false;
+
+ qualname = PG_GETARG_TEXT_PP(0);
+ qualname_str = text_to_cstring(qualname);
+ strict_mode = PG_GETARG_BOOL(1);
+
+ nextp = qualname_str;
+
+ /* skip leading whitespace */
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+
+ for (;;)
+ {
+ char *curname;
+ char *endp;
+ bool missing_ident;
+
+ missing_ident = true;
+
+ if (*nextp == '\"')
+ {
+ curname = nextp + 1;
+ for (;;)
+ {
+ endp = strchr(nextp + 1, '\"');
+ if (endp == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unclosed double quotes"),
+ errdetail("string \"%s\" is not valid identifier",
+ qualname_str)));
+ if (endp[1] != '\"')
+ break;
+ memmove(endp, endp + 1, strlen(endp));
+ nextp = endp;
+ }
+ nextp = endp + 1;
+ *endp = '\0';
+
+ if (endp - curname == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("identifier should not be empty: \"%s\"",
+ qualname_str)));
+
+ astate = accumArrayResult(astate,
+ CStringGetTextDatum(curname), false,
+ TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
+ }
+ else
+ {
+ if (is_ident_start((unsigned char) *nextp))
+ {
+ char *downname;
+ int len;
+ text *part;
+
+ curname = nextp++;
+ while (is_ident_cont((unsigned char) *nextp))
+ nextp++;
+
+ len = nextp - curname;
+
+ /*
+ * We don't would to truncate identifier implicitli in this
+ * moment, so we use downcase_identifier instead
+ * dowcase_truncate_identifier. The user can do truncating
+ * later if it is necessary for his purpouse.
+ */
+ downname = downcase_identifier(curname, len, false, false);
+ part = cstring_to_text_with_len(downname, len);
+ astate = accumArrayResult(astate,
+ PointerGetDatum(part), false,
+ TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
+ }
+ }
+
+ if (missing_ident)
+ {
+ /* choose text of adequate error message */
+ if (*nextp == '.')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing identifier before \".\" symbol: \"%s\"",
+ qualname_str)));
+ else if (after_dot)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing identifier after \".\" symbol: \"%s\"",
+ qualname_str)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("missing identifier: \"%s\"",
+ qualname_str)));
+ }
+
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+
+ if (*nextp == '.')
+ {
+ after_dot = true;
+ nextp++;
+ while (isspace((unsigned char) *nextp))
+ nextp++;
+ continue;
+ }
+ else if (*nextp == '\0')
+ {
+ break;
+ }
+ else
+ {
+ if (strict_mode)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("identifier contains disallowed characters: \"%s\"",
+ qualname_str)));
+ break;
+ }
+ }
+
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+ }
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
new file mode 100644
index 1c0ef9a..94b1a22
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
*************** DESCR("I/O");
*** 3457,3462 ****
--- 3457,3465 ----
DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
DESCR("convert namespace name to regnamespace");
+ DATA(insert OID = 3318 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strictmode}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
+ DESCR("parse qualified identifier to array of identifiers");
+
DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
DESCR("(internal)");
DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));
diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h
new file mode 100644
index 4f4164b..4f95c81
*** a/src/include/parser/scansup.h
--- b/src/include/parser/scansup.h
*************** extern char *scanstr(const char *s);
*** 20,25 ****
--- 20,28 ----
extern char *downcase_truncate_identifier(const char *ident, int len,
bool warn);
+ extern char *downcase_identifier(const char *ident, int len,
+ bool warn, bool truncate);
+
extern void truncate_identifier(char *ident, int len, bool warn);
extern bool scanner_isspace(char ch);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
new file mode 100644
index affcc01..b22a6b8
*** a/src/include/utils/builtins.h
--- b/src/include/utils/builtins.h
*************** extern Datum pg_typeof(PG_FUNCTION_ARGS)
*** 508,513 ****
--- 508,514 ----
extern Datum pg_collation_for(PG_FUNCTION_ARGS);
extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
+ extern Datum parse_ident(PG_FUNCTION_ARGS);
/* oid.c */
extern Datum oidin(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out
new file mode 100644
index b359d52..66569f4
*** a/src/test/regress/expected/name.out
--- b/src/test/regress/expected/name.out
*************** SELECT '' AS two, c.f1 FROM NAME_TBL c W
*** 124,126 ****
--- 124,178 ----
(2 rows)
DROP TABLE NAME_TBL;
+ DO $$
+ DECLARE r text[];
+ BEGIN
+ r := parse_ident('Schemax.Tabley');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+ r := parse_ident('"SchemaX"."TableY"');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+ END;
+ $$;
+ NOTICE: schemax.tabley
+ NOTICE: "SchemaX"."TableY"
+ SELECT parse_ident('foo.boo');
+ parse_ident
+ -------------
+ {foo,boo}
+ (1 row)
+
+ SELECT parse_ident('foo.boo[]'); -- should fail
+ ERROR: identifier contains disallowed characters: "foo.boo[]"
+ SELECT parse_ident('foo.boo[]', strictmode => false); -- ok
+ parse_ident
+ -------------
+ {foo,boo}
+ (1 row)
+
+ -- should fail
+ SELECT parse_ident(' ');
+ ERROR: missing identifier: " "
+ SELECT parse_ident(' .aaa');
+ ERROR: missing identifier before "." symbol: " .aaa"
+ SELECT parse_ident(' aaa . ');
+ ERROR: missing identifier after "." symbol: " aaa . "
+ SELECT parse_ident('aaa.a%b');
+ ERROR: identifier contains disallowed characters: "aaa.a%b"
+ SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
+ length | length
+ --------+--------
+ 414 | 289
+ (1 row)
+
+ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
+ parse_ident
+ -----------------------------------------------------------------------------------------------------------
+ {first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+ (1 row)
+
+ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
+ parse_ident
+ ------------------------------------------------------------------------------------------------------
+ {first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
+ (1 row)
+
diff --git a/src/test/regress/sql/name.sql b/src/test/regress/sql/name.sql
new file mode 100644
index 1c7a671..2e70e4e
*** a/src/test/regress/sql/name.sql
--- b/src/test/regress/sql/name.sql
*************** SELECT '' AS three, c.f1 FROM NAME_TBL c
*** 52,54 ****
--- 52,79 ----
SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
DROP TABLE NAME_TBL;
+
+ DO $$
+ DECLARE r text[];
+ BEGIN
+ r := parse_ident('Schemax.Tabley');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+ r := parse_ident('"SchemaX"."TableY"');
+ RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
+ END;
+ $$;
+
+ SELECT parse_ident('foo.boo');
+ SELECT parse_ident('foo.boo[]'); -- should fail
+ SELECT parse_ident('foo.boo[]', strictmode => false); -- ok
+
+ -- should fail
+ SELECT parse_ident(' ');
+ SELECT parse_ident(' .aaa');
+ SELECT parse_ident(' aaa . ');
+ SELECT parse_ident('aaa.a%b');
+
+ SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
+
+ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
+ SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers