On Sat, 2004-11-27 at 12:55 -0500, Tom Lane wrote:
> This seems like the most appropriate answer to me; I was thinking of
> doing that earlier when Fabien and I were fooling with plpgsql error
> reporting, but didn't get around to it.
Attached is a patch that implements a rough draft of this (it also
includes an improved version of the "unreachable code" patch that
includes your suggested fixes). Two questions about the patch:
(1) It doesn't report syntax errors in unreachable code. I suppose this
ought to be done, right?
(2) The syntax error message is wrong (we print a character offset and
query context that is relative to the CREATE FUNCTION statement, not the
individual SQL statement we're executing). I fooled around a bit with
defining a custom ereport() callback to print the right line number and
query context, but I couldn't get it right. Do you have any guidance on
the proper way to do this.
> As long as you only do this in check_function_bodies mode it seems
> safe enough. One possible problem (if it's done towards the end of
> parsing as you've suggested for dead-code checking) is that a syntax
> error in a SQL statement might confuse the plpgsql parser into
> misparsing statement boundaries, which could lead to a plpgsql parse
> error much further down, such as a "missing END" at the end of the
> function. The error would be more useful if reported immediately
> after the putative SQL statement is parsed. Not sure if that's
> hard or not. (The same remark applies to dead code checking, now
> that I think about it.)
In the case of dead code checking, I don't think it matters. Doing the
syntax check in gram.y might be a better idea, I'll take a look doing
that...
-Neil
#
# patch "src/pl/plpgsql/src/pl_comp.c"
# from [94dec38ffccf5fceed00c2b1007cf2b0c238af4b]
# to [3223e07f0046277dbea155774132975d6e636fa7]
#
# patch "src/test/regress/expected/plpgsql.out"
# from [1b0591090144d66281066d54d81ef9a238809b0d]
# to [320d79127d4f067eea8c09f0e17174721e96a9df]
#
# patch "src/test/regress/sql/plpgsql.sql"
# from [4ca1c085eda5ca4fa0397354d887a28d10236594]
# to [261d4fa85aef9eb40c2922e433af025a5b19f38a]
#
--- src/pl/plpgsql/src/pl_comp.c
+++ src/pl/plpgsql/src/pl_comp.c
@@ -52,6 +52,7 @@
#include "nodes/makefuncs.h"
#include "parser/gramparse.h"
#include "parser/parse_type.h"
+#include "parser/parser.h"
#include "tcop/tcopprot.h"
#include "utils/array.h"
#include "utils/builtins.h"
@@ -130,6 +131,7 @@
static void plpgsql_HashTableInsert(PLpgSQL_function *function,
PLpgSQL_func_hashkey *func_key);
static void plpgsql_HashTableDelete(PLpgSQL_function *function);
+static void check_function(PLpgSQL_function *function);
/*
* This routine is a crock, and so is everyplace that calls it. The problem
@@ -152,8 +154,10 @@
/* ----------
* plpgsql_compile Make an execution tree for a PL/pgSQL function.
*
- * If forValidator is true, we're only compiling for validation purposes,
- * and so some checks are skipped.
+ * If forValidator is true, we're only compiling for validation
+ * purposes; this means we skip some checks, as well as making some
+ * additional compile-time checks that we only want to do once (at
+ * function definition time), not very time the function is compiled.
*
* Note: it's important for this to fall through quickly if the function
* has already been compiled.
@@ -293,7 +297,7 @@
* Setup error traceback support for ereport()
*/
plerrcontext.callback = plpgsql_compile_error_callback;
- plerrcontext.arg = forValidator ? proc_source : (char *) NULL;
+ plerrcontext.arg = forValidator ? proc_source : NULL;
plerrcontext.previous = error_context_stack;
error_context_stack = &plerrcontext;
@@ -595,17 +599,16 @@
plpgsql_add_initdatums(NULL);
/*
- * Now parse the functions text
+ * Now parse the function's text
*/
parse_rc = plpgsql_yyparse();
if (parse_rc != 0)
elog(ERROR, "plpgsql parser returned %d", parse_rc);
plpgsql_scanner_finish();
- pfree(proc_source);
/*
- * If that was successful, complete the functions info.
+ * If that was successful, complete the function's info.
*/
function->fn_nargs = procStruct->pronargs;
for (i = 0; i < function->fn_nargs; i++)
@@ -616,12 +619,22 @@
function->datums[i] = plpgsql_Datums[i];
function->action = plpgsql_yylval.program;
+ /*
+ * Perform whatever additional compile-time checks we can. Note
+ * that we only do this when validating the function; this is so
+ * that (a) we don't bother the user with warnings when the
+ * function is invoked (b) we don't take the performance hit of
+ * doing the analysis more than once per function definition.
+ */
+ if (forValidator)
+ check_function(function);
+
/* Debug dump for completed functions */
if (plpgsql_DumpExecTree)
plpgsql_dumptree(function);
/*
- * add it to the hash table
+ * Add it to the hash table
*/
plpgsql_HashTableInsert(function, hashkey);
@@ -631,6 +644,7 @@
error_context_stack = plerrcontext.previous;
plpgsql_error_funcname = NULL;
plpgsql_error_lineno = 0;
+ pfree(proc_source);
return function;
}
@@ -664,8 +678,198 @@
plpgsql_error_funcname, plpgsql_error_lineno);
}
+/*
+ * The PL/PgSQL parser will often assume that an unrecognized keyword
+ * indicates the beginning of a SQL statement. That avoids the need to
+ * duplicate parts of the SQL grammar in the PL/PgSQL parser, but it
+ * also means we are very liberal in the input we accept. To try and
+ * catch some of the more obviously invalid input, we run strings that
+ * we've guessed to be SQL statements through the backend's parser.
+ *
+ * We only invoke the "raw" parser (not the analyzer); this doesn't do
+ * any database access, it just checks that the query is (mostly)
+ * syntactically correct.
+ *
+ * XXX: do we want to check that certain SQL statements cannot be
+ * used?
+ *
+ * XXX: fix line number in syntax error message
+ *
+ * XXX: fix the context that is printed on error
+ */
+static void
+check_sql_statement(char *stmt_string)
+{
+ MemoryContext tmpCxt;
+ MemoryContext oldCxt;
+ /* XXX: reset a passed-in temporary context, don't create/destroy */
+ tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+ "PL/PgSQL syntax check",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ oldCxt = MemoryContextSwitchTo(tmpCxt);
+
+ raw_parser(stmt_string);
+
+ MemoryContextSwitchTo(oldCxt);
+ MemoryContextDelete(tmpCxt);
+}
+
/*
+ * Emit a warning that the statement 'unreach' is unreachable, due to
+ * the effect of the preceding statement 'cause'.
+ */
+static void
+report_unreachable_stmt(PLpgSQL_stmt *unreach, PLpgSQL_stmt *cause)
+{
+ /*
+ * XXX: adjust the line number that is emitted along with the
+ * warning message. This is a kludge.
+ */
+ int old_lineno = plpgsql_error_lineno;
+ plpgsql_error_lineno = unreach->lineno;
+
+ /*
+ * XXX: error code?
+ */
+ ereport(WARNING,
+ (errmsg("%s is unreachable, due to %s near line %d",
+ plpgsql_stmt_typename(unreach),
+ plpgsql_stmt_typename(cause),
+ cause->lineno)));
+
+ plpgsql_error_lineno = old_lineno;
+}
+
+/*
+ * Given a list of PL/PgSQL statements, perform some compile-time
+ * checks on them.
+ *
+ * Note when checking for unreachable code, we return as soon as we
+ * have emitted "unreachable" warnings for a given sequence of
+ * statements. So that given:
+ *
+ * EXIT; EXIT; EXIT
+ *
+ * we will see the first "EXIT", issue warnings for the second and
+ * third unreachable EXIT statements, and then return, so that we
+ * don't issue bogus "unreachable" warnings when we see the second
+ * EXIT.
+ *
+ * XXX: currently we walk the PL/PgSQL execution tree by hand. It
+ * would probably be worth refactoring this to use something akin to
+ * the tree walker infrastructure in the backend.
+ */
+static void
+check_stmts(PLpgSQL_stmts *stmts)
+{
+ int i;
+
+ for (i = 0; i < stmts->stmts_used; i++)
+ {
+ PLpgSQL_stmt *stmt = stmts->stmts[i];
+
+ switch (stmt->cmd_type)
+ {
+ case PLPGSQL_STMT_RETURN:
+ if (i + 1 < stmts->stmts_used)
+ {
+ report_unreachable_stmt(stmts->stmts[i+1], stmt);
+ return;
+ }
+ case PLPGSQL_STMT_EXIT:
+ {
+ /*
+ * If the EXIT statement has a conditional, it is
+ * not guaranteed to exit the loop, so don't issue
+ * a warning.
+ */
+ PLpgSQL_stmt_exit *exit_stmt = (PLpgSQL_stmt_exit *) stmt;
+ if (exit_stmt->cond == NULL && i + 1 < stmts->stmts_used)
+ {
+ report_unreachable_stmt(stmts->stmts[i+1], stmt);
+ return;
+ }
+ }
+ break;
+ case PLPGSQL_STMT_RAISE:
+ {
+ PLpgSQL_stmt_raise *raise_stmt = (PLpgSQL_stmt_raise *) stmt;
+ /*
+ * Only RAISE EXCEPTION (converted to elog_level =
+ * ERROR by the parser) will exit the current
+ * block.
+ */
+ if (raise_stmt->elog_level == ERROR && i + 1 < stmts->stmts_used)
+ {
+ report_unreachable_stmt(stmts->stmts[i+1], stmt);
+ return;
+ }
+ }
+ break;
+ case PLPGSQL_STMT_BLOCK:
+ {
+ PLpgSQL_stmt_block *block_stmt = (PLpgSQL_stmt_block *) stmt;
+ check_stmts(block_stmt->body);
+
+ if (block_stmt->exceptions)
+ {
+ PLpgSQL_exceptions *exceptions;
+ int j;
+
+ exceptions = block_stmt->exceptions;
+ for (j = 0; j < exceptions->exceptions_used; j++)
+ check_stmts(exceptions->exceptions[j]->action);
+ }
+ }
+ break;
+ case PLPGSQL_STMT_IF:
+ check_stmts(((PLpgSQL_stmt_if *) stmt)->true_body);
+ check_stmts(((PLpgSQL_stmt_if *) stmt)->false_body);
+ break;
+ case PLPGSQL_STMT_LOOP:
+ check_stmts(((PLpgSQL_stmt_loop *) stmt)->body);
+ break;
+ case PLPGSQL_STMT_WHILE:
+ check_stmts(((PLpgSQL_stmt_while *) stmt)->body);
+ break;
+ case PLPGSQL_STMT_FORI:
+ check_stmts(((PLpgSQL_stmt_fori *) stmt)->body);
+ break;
+ case PLPGSQL_STMT_FORS:
+ {
+ PLpgSQL_stmt_fors *fors_stmt = (PLpgSQL_stmt_fors *) stmt;
+ check_sql_statement(fors_stmt->query->query);
+ check_stmts(fors_stmt->body);
+ }
+ break;
+ case PLPGSQL_STMT_DYNFORS:
+ check_stmts(((PLpgSQL_stmt_dynfors *) stmt)->body);
+ break;
+ case PLPGSQL_STMT_EXECSQL:
+ check_sql_statement(((PLpgSQL_stmt_execsql *) stmt)->sqlstmt->query);
+ break;
+ default:
+ /* do nothing */
+ break;
+ }
+ }
+}
+
+/*
+ * Issue warnings about various ill-advised constructs in the function
+ * body. We don't do very many compile-time checks at the moment, but
+ * a few is better than none...
+ */
+static void
+check_function(PLpgSQL_function *function)
+{
+ check_stmts(function->action->body);
+}
+
+/*
* Fetch the argument names, if any, from the proargnames field of the
* pg_proc tuple. Results are palloc'd.
*/
--- src/test/regress/expected/plpgsql.out
+++ src/test/regress/expected/plpgsql.out
@@ -2032,3 +2032,153 @@
DETAIL: Key (f1)=(2) is not present in table "master".
drop function trap_foreign_key(int);
drop function trap_foreign_key_2();
+--
+-- issue warnings about unreachable code
+--
+create function exit_warn() returns int as $$
+declare x int;
+begin
+ x := 5;
+ loop
+ x := x + 1;
+ exit;
+ x := x + 2;
+ end loop;
+ x := x + 3;
+ return x;
+end;$$ language 'plpgsql';
+WARNING: assignment is unreachable, due to exit near line 6
+CONTEXT: compile of PL/pgSQL function "exit_warn" near line 7
+create function exit_no_warn() returns int as $$
+declare x int;
+begin
+ x := 5;
+ loop
+ x := x + 5;
+ exit when x > 20;
+ x := x - 1;
+ end loop;
+ x := x + 3;
+ return x;
+end;$$ language 'plpgsql';
+create function return_warn() returns int as $$
+declare
+ a int;
+ b int;
+begin
+ a := 3;
+ b := 2;
+ if a > b then
+ return 5;
+ begin
+ return 10;
+ end;
+ end if;
+ return 15;
+end;$$ language 'plpgsql';
+WARNING: block variables initialization is unreachable, due to return near line 8
+CONTEXT: compile of PL/pgSQL function "return_warn" near line 9
+create function return_warn2() returns int as $$
+begin
+ if 5 > 5 then
+ return 5;
+ return 10;
+ end if;
+ return 10;
+ return 15;
+ return 20;
+end;$$ language 'plpgsql';
+WARNING: return is unreachable, due to return near line 3
+CONTEXT: compile of PL/pgSQL function "return_warn2" near line 4
+WARNING: return is unreachable, due to return near line 6
+CONTEXT: compile of PL/pgSQL function "return_warn2" near line 7
+create function raise_warn() returns int as $$
+declare x int;
+begin
+ x := 10;
+ begin
+ raise exception 'some random exception';
+ return x;
+ exception
+ when RAISE_EXCEPTION then NULL;
+ end;
+
+ begin
+ raise exception 'foo';
+ exception
+ when RAISE_EXCEPTION then
+ return x;
+ x := x + 1;
+ end;
+
+ begin
+ raise notice 'not an exception';
+ return x;
+ end;
+end;$$ language 'plpgsql';
+WARNING: return is unreachable, due to raise near line 5
+CONTEXT: compile of PL/pgSQL function "raise_warn" near line 6
+WARNING: assignment is unreachable, due to return near line 15
+CONTEXT: compile of PL/pgSQL function "raise_warn" near line 16
+-- note that we only want to emit warnings at CREATE FUNCTION time, not
+-- when the function is invoked.
+SELECT exit_warn();
+ exit_warn
+-----------
+ 9
+(1 row)
+
+SELECT return_warn();
+ return_warn
+-------------
+ 5
+(1 row)
+
+SELECT return_warn2();
+ return_warn2
+--------------
+ 10
+(1 row)
+
+SELECT raise_warn();
+ raise_warn
+------------
+ 10
+(1 row)
+
+--
+-- reject function definitions that contain malformed SQL queries at
+-- compile-time, where possible
+--
+create function bad_sql1() returns int as $$
+declare a int;
+begin
+ a := 5;
+ Johnny Yuma;
+ a := 10;
+ return a;
+end$$ language 'plpgsql';
+ERROR: syntax error at or near "Johnny" at character 45
+LINE 1: create function bad_sql1() returns int as $$
+ ^
+create function bad_sql2() returns int as $$
+declare r record;
+begin
+ for r in I fought the law, the law won LOOP
+ raise notice 'in loop';
+ end loop;
+ return 5;
+end;$$ language 'plpgsql';
+ERROR: syntax error at or near "I" at character 46
+LINE 2: declare r record;
+ ^
+create function bad_sql3() returns int as $$
+declare x int;
+begin
+ return 10;
+ -- report syntax errors even in unreachable code
+ ekgekrgkjge djngrg;
+ return 15;
+end;$$ language 'plpgsql';
+WARNING: SQL statement is unreachable, due to return near line 3
+CONTEXT: compile of PL/pgSQL function "bad_sql3" near line 5
--- src/test/regress/sql/plpgsql.sql
+++ src/test/regress/sql/plpgsql.sql
@@ -1764,3 +1764,123 @@
drop function trap_foreign_key(int);
drop function trap_foreign_key_2();
+
+--
+-- issue warnings about unreachable code
+--
+create function exit_warn() returns int as $$
+declare x int;
+begin
+ x := 5;
+ loop
+ x := x + 1;
+ exit;
+ x := x + 2;
+ end loop;
+ x := x + 3;
+ return x;
+end;$$ language 'plpgsql';
+
+create function exit_no_warn() returns int as $$
+declare x int;
+begin
+ x := 5;
+ loop
+ x := x + 5;
+ exit when x > 20;
+ x := x - 1;
+ end loop;
+ x := x + 3;
+ return x;
+end;$$ language 'plpgsql';
+
+create function return_warn() returns int as $$
+declare
+ a int;
+ b int;
+begin
+ a := 3;
+ b := 2;
+ if a > b then
+ return 5;
+ begin
+ return 10;
+ end;
+ end if;
+ return 15;
+end;$$ language 'plpgsql';
+
+create function return_warn2() returns int as $$
+begin
+ if 5 > 5 then
+ return 5;
+ return 10;
+ end if;
+ return 10;
+ return 15;
+ return 20;
+end;$$ language 'plpgsql';
+
+create function raise_warn() returns int as $$
+declare x int;
+begin
+ x := 10;
+ begin
+ raise exception 'some random exception';
+ return x;
+ exception
+ when RAISE_EXCEPTION then NULL;
+ end;
+
+ begin
+ raise exception 'foo';
+ exception
+ when RAISE_EXCEPTION then
+ return x;
+ x := x + 1;
+ end;
+
+ begin
+ raise notice 'not an exception';
+ return x;
+ end;
+end;$$ language 'plpgsql';
+
+-- note that we only want to emit warnings at CREATE FUNCTION time, not
+-- when the function is invoked.
+SELECT exit_warn();
+SELECT return_warn();
+SELECT return_warn2();
+SELECT raise_warn();
+
+--
+-- reject function definitions that contain malformed SQL queries at
+-- compile-time, where possible
+--
+
+create function bad_sql1() returns int as $$
+declare a int;
+begin
+ a := 5;
+ Johnny Yuma;
+ a := 10;
+ return a;
+end$$ language 'plpgsql';
+
+create function bad_sql2() returns int as $$
+declare r record;
+begin
+ for r in I fought the law, the law won LOOP
+ raise notice 'in loop';
+ end loop;
+ return 5;
+end;$$ language 'plpgsql';
+
+create function bad_sql3() returns int as $$
+declare x int;
+begin
+ return 10;
+ -- report syntax errors even in unreachable code
+ ekgekrgkjge djngrg;
+ return 15;
+end;$$ language 'plpgsql';
---------------------------(end of broadcast)---------------------------
TIP 5: Have you checked our extensive FAQ?
http://www.postgresql.org/docs/faqs/FAQ.html