Re: [HACKERS] merge command - GSoC progress

Simon Riggs Wed, 04 Aug 2010 09:13:57 -0700

On Wed, 2010-08-04 at 15:36 +0100, Simon Riggs wrote:
> On Wed, 2010-08-04 at 17:23 +0800, Boxuan Zhai wrote:
> > Dear Robert,
> >  
> > I am just considering that there may be some logical mistakes for my
> > rule rewriting strategy of MERGE actions. 
> >  
> > In my current design, if we find that an action type, say UPDATE, is
> > replaced by INSTEAD rules, we will remove all the actions of this type
> > from the MERGE command, as if they are not be specified by user from
> > the beginning. See the test example in my pages for this situation.
> > https://wiki.postgresql.org/wiki/MergeTestExamples#With_INSTEAD_rules
> 
> It seems sensible to use the test files that I wrote for MERGE in 2008,
> published to -hackers at that time.


Even more sensible for me to include it as a patch, with the files in
the right places and the schedules updated.

-- 
 Simon Riggs           www.2ndQuadrant.com
 PostgreSQL Development, 24x7 Support, Training and Services

diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out
new file mode 100644
index 0000000..18e3891
--- /dev/null
+++ b/src/test/regress/expected/merge.out
@@ -0,0 +1,279 @@
+--
+-- MERGE
+--
+CREATE TABLE target (id integer, balance integer);
+CREATE TABLE source (id integer, balance integer);
+INSERT INTO target VALUES (1, 10);
+INSERT INTO target VALUES (2, 20);
+INSERT INTO target VALUES (3, 30);
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+  2 |      20
+  3 |      30
+(3 rows)
+
+--
+-- initial tests
+--
+-- empty source means 0 rows touched
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED THEN
+	UPDATE SET balance = t.balance + s.balance
+;
+-- insert some source rows to work from
+INSERT INTO source VALUES (2, 5);
+INSERT INTO source VALUES (3, 20);
+INSERT INTO source VALUES (4, 40);
+SELECT * FROM source;
+ id | balance 
+----+---------
+  2 |       5
+  3 |      20
+  4 |      40
+(3 rows)
+
+-- do a simple equivalent of an UPDATE join
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED THEN
+	UPDATE SET balance = t.balance + s.balance
+;
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+  2 |      25
+  3 |      50
+(3 rows)
+
+ROLLBACK;
+-- do a simple equivalent of an INSERT SELECT
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+  2 |      20
+  3 |      30
+  4 |      40
+(4 rows)
+
+ROLLBACK;
+-- now the classic UPSERT
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+  2 |      25
+  3 |      50
+  4 |      40
+(4 rows)
+
+ROLLBACK;
+--
+-- Non-standard functionality
+-- 
+-- do a simple equivalent of a DELETE join
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED THEN
+	DELETE
+;
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+(1 row)
+
+ROLLBACK;
+-- now the classic UPSERT, with a DELETE
+-- the Standard doesn't allow the DELETE clause for some reason,
+-- though other implementations do
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED AND s.balance > 10 THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+  3 |      50
+  4 |      40
+(3 rows)
+
+ROLLBACK;
+-- Prepare the test data to generate multiple matching rows for a single target
+INSERT INTO source VALUES (3, 5);
+SELECT * FROM source ORDER BY id, balance;
+ id | balance 
+----+---------
+  2 |       5
+  3 |       5
+  3 |      20
+  4 |      40
+(4 rows)
+
+-- we now have a duplicate key in source, so when we join to
+-- target we will generate 2 matching rows, not one
+-- In the following statement row id=3 will be both updated
+-- and deleted by this statement and so will cause a run-time error
+-- when the second change to that row is detected
+-- This next SQL statement
+--  fails according to standard
+--  fails in PostgreSQL implementation
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED AND s.balance > 10 THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+ERROR:  multiple actions on single target row
+ 
+ROLLBACK;
+
+-- This next SQL statement
+--  fails according to standard
+--  suceeds in PostgreSQL implementation by simply ignoring the second
+--  matching row since it activates no WHEN clause
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED AND s.balance > 10 THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+ROLLBACK;
+-- Now lets prepare the test data to generate 2 non-matching rows
+DELETE FROM source WHERE id = 3 AND balance = 5;
+INSERT INTO source VALUES (4, 5);
+SELECT * FROM source;
+ id | balance 
+----+---------
+  2 |       5
+  3 |      20
+  4 |       5
+  4 |      40
+(4 rows)
+
+-- This next SQL statement
+--  suceeds according to standard (yes, it is inconsistent)
+--  suceeds in PostgreSQL implementation, though could easily fail if
+--  there was an appropriate unique constraint
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+  2 |      20
+  3 |      30
+  4 |       5
+  4 |      40
+(5 rows)
+
+ROLLBACK;
+-- This next SQL statement works, but since there is no WHEN clause that
+-- applies to non-matching rows, SQL standard requires us to generate
+-- rows with DEFAULT VALUES for all columns, which is why we support the
+-- syntax DO NOTHING (similar to the way Rules work) in addition
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN NOT MATCHED AND s.balance > 100 THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+  2 |      20
+  3 |      30
+    |
+    |
+(5 rows)
+
+ROLLBACK;
+-- This next SQL statement suceeds, but does nothing since there are
+-- only non-matching rows that do not activate a WHEN clause, so we
+-- provide syntax to just ignore them, rather than allowing data quality
+-- problems
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN NOT MATCHED AND s.balance > 100 THEN
+	INSERT VALUES (s.id, s.balance)
+WHEN NOT MATCHED
+	DO NOTHING
+;
+SELECT * FROM target;
+ id | balance 
+----+---------
+  1 |      10
+  2 |      20
+  3 |      30
+(3 rows)
+
+ROLLBACK;
+--
+-- Weirdness
+--
+-- MERGE statement containing WHEN clauses that are never executable
+-- NOT an error under the standard
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED AND s.balance > 0 THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+WHEN NOT MATCHED THEN /* never executed because of order of WHEN clauses */
+	INSERT VALUES (s.id, s.balance + 10)
+WHEN MATCHED THEN /* never executed because of order of WHEN clauses */
+	UPDATE SET balance = t.balance + s.balance
+;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 7529777..61c7da6 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -91,7 +91,7 @@ test: select_views portals_p2 foreign_key cluster dependency guc bitmapops combo
 # NB: temp.sql does a reconnect which transiently uses 2 connections,
 # so keep this parallel group to at most 19 tests
 # ----------
-test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid conversion truncate alter_table sequence polymorphism rowtypes returning largeobject with xml
+test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid conversion truncate alter_table sequence polymorphism rowtypes returning largeobject with xml merge
 
 # run stats by itself because its delay may be insufficient under heavy load
 test: stats
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 5f185f9..0fe7006 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -122,4 +122,5 @@ test: returning
 test: largeobject
 test: with
 test: xml
+test: merge
 test: stats
diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql
new file mode 100644
index 0000000..7ecd02e
--- /dev/null
+++ b/src/test/regress/sql/merge.sql
@@ -0,0 +1,200 @@
+--
+-- MERGE
+--
+CREATE TABLE target (id integer, balance integer);
+CREATE TABLE source (id integer, balance integer);
+INSERT INTO target VALUES (1, 10);
+INSERT INTO target VALUES (2, 20);
+INSERT INTO target VALUES (3, 30);
+SELECT * FROM target;
+
+--
+-- initial tests
+--
+-- empty source means 0 rows touched
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED THEN
+	UPDATE SET balance = t.balance + s.balance
+;
+-- insert some source rows to work from
+INSERT INTO source VALUES (2, 5);
+INSERT INTO source VALUES (3, 20);
+INSERT INTO source VALUES (4, 40);
+SELECT * FROM source;
+
+-- do a simple equivalent of an UPDATE join
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED THEN
+	UPDATE SET balance = t.balance + s.balance
+;
+
+ROLLBACK;
+-- do a simple equivalent of an INSERT SELECT
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+
+ROLLBACK;
+-- now the classic UPSERT
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+
+ROLLBACK;
+--
+-- Non-standard functionality
+-- 
+-- do a simple equivalent of a DELETE join
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED THEN
+	DELETE
+;
+SELECT * FROM target;
+
+ROLLBACK;
+-- now the classic UPSERT, with a DELETE
+-- the Standard doesn't allow the DELETE clause for some reason,
+-- though other implementations do
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED AND s.balance > 10 THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+
+ROLLBACK;
+-- Prepare the test data to generate multiple matching rows for a single target
+INSERT INTO source VALUES (3, 5);
+SELECT * FROM source ORDER BY id, balance;
+
+-- we now have a duplicate key in source, so when we join to
+-- target we will generate 2 matching rows, not one
+-- In the following statement row id=3 will be both updated
+-- and deleted by this statement and so will cause a run-time error
+-- when the second change to that row is detected
+-- This next SQL statement
+--  fails according to standard
+--  fails in PostgreSQL implementation
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED AND s.balance > 10 THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+ERROR:  multiple actions on single target row
+ 
+ROLLBACK;
+
+-- This next SQL statement
+--  fails according to standard
+--  suceeds in PostgreSQL implementation by simply ignoring the second
+--  matching row since it activates no WHEN clause
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED AND s.balance > 10 THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+ROLLBACK;
+-- Now lets prepare the test data to generate 2 non-matching rows
+DELETE FROM source WHERE id = 3 AND balance = 5;
+INSERT INTO source VALUES (4, 5);
+SELECT * FROM source;
+
+-- This next SQL statement
+--  suceeds according to standard (yes, it is inconsistent)
+--  suceeds in PostgreSQL implementation, though could easily fail if
+--  there was an appropriate unique constraint
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+
+ROLLBACK;
+-- This next SQL statement works, but since there is no WHEN clause that
+-- applies to non-matching rows, SQL standard requires us to generate
+-- rows with DEFAULT VALUES for all columns, which is why we support the
+-- syntax DO NOTHING (similar to the way Rules work) in addition
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN NOT MATCHED AND s.balance > 100 THEN
+	INSERT VALUES (s.id, s.balance)
+;
+SELECT * FROM target;
+
+ROLLBACK;
+-- This next SQL statement suceeds, but does nothing since there are
+-- only non-matching rows that do not activate a WHEN clause, so we
+-- provide syntax to just ignore them, rather than allowing data quality
+-- problems
+BEGIN;
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN NOT MATCHED AND s.balance > 100 THEN
+	INSERT VALUES (s.id, s.balance)
+WHEN NOT MATCHED
+	DO NOTHING
+;
+SELECT * FROM target;
+
+ROLLBACK;
+--
+-- Weirdness
+--
+-- MERGE statement containing WHEN clauses that are never executable
+-- NOT an error under the standard
+MERGE into target t
+USING (select * from source) AS s
+ON t.id = s.id
+WHEN MATCHED AND s.balance > 0 THEN
+	UPDATE SET balance = t.balance + s.balance
+WHEN MATCHED THEN
+	DELETE
+WHEN NOT MATCHED THEN
+	INSERT VALUES (s.id, s.balance)
+WHEN NOT MATCHED THEN /* never executed because of order of WHEN clauses */
+	INSERT VALUES (s.id, s.balance + 10)
+WHEN MATCHED THEN /* never executed because of order of WHEN clauses */
+	UPDATE SET balance = t.balance + s.balance
+;

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] merge command - GSoC progress

Reply via email to