This is an automated email from the ASF dual-hosted git repository.

avamingli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 007a778c70a0208f66cd34b2a44cfefea8cbc5e9
Author: Thomas Munro <[email protected]>
AuthorDate: Fri Apr 14 10:52:58 2023 +1200

    Fix PHJ match bit initialization.
    
    Hash join tuples reuse the HOT status bit to indicate match status
    during hash join execution. Correct reuse requires clearing the bit in
    all tuples. Serial hash join and parallel multi-batch hash join do so
    upon inserting the tuple into the hashtable. Single batch parallel hash
    join and batch 0 of unexpected multi-batch hash joins forgot to do this.
    
    It hadn't come up before because hashtable tuple match bits are only
    used for right and full outer joins and parallel ROJ and FOJ were
    unsupported. 11c2d6fdf5 introduced support for parallel ROJ/FOJ but
    neglected to ensure the match bits were reset.
    
    Author: Melanie Plageman <[email protected]>
    Reported-by: Richard Guo <[email protected]>
    Discussion: 
https://postgr.es/m/flat/CAMbWs48Nde1Mv%3DBJv6_vXmRKHMuHZm2Q_g4F6Z3_pn%2B3EV6BGQ%40mail.gmail.com
---
 src/backend/executor/nodeHash.c         |  1 +
 src/test/regress/expected/join_hash.out | 37 +++++++++++++++++++++++++++++++++
 src/test/regress/sql/join_hash.sql      | 27 ++++++++++++++++++++++++
 3 files changed, 65 insertions(+)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 39db08164e0..1a305de21e6 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -2004,6 +2004,7 @@ retry:
                /* Store the hash value in the HashJoinTuple header. */
                hashTuple->hashvalue = hashvalue;
                memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+               HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
 
                /* Push it onto the front of the bucket's list */
                ExecParallelHashPushTuple(&hashtable->buckets.shared[bucketno],
diff --git a/src/test/regress/expected/join_hash.out 
b/src/test/regress/expected/join_hash.out
index 250704efbd7..b1f780ff7b8 100644
--- a/src/test/regress/expected/join_hash.out
+++ b/src/test/regress/expected/join_hash.out
@@ -1031,6 +1031,43 @@ explain (costs off) select * from join_hash_t_small, 
join_hash_t_big where a = b
 (7 rows)
 
 rollback to settings;
+-- Hash join reuses the HOT status bit to indicate match status. This can only
+-- be guaranteed to produce correct results if all the hash join tuple match
+-- bits are reset before reuse. This is done upon loading them into the
+-- hashtable.
+SAVEPOINT settings;
+SET enable_parallel_hash = on;
+SET min_parallel_table_scan_size = 0;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+CREATE TABLE hjtest_matchbits_t1(id int);
+CREATE TABLE hjtest_matchbits_t2(id int);
+INSERT INTO hjtest_matchbits_t1 VALUES (1);
+INSERT INTO hjtest_matchbits_t2 VALUES (2);
+-- Update should create a HOT tuple. If this status bit isn't cleared, we won't
+-- correctly emit the NULL-extended unmatching tuple in full hash join.
+UPDATE hjtest_matchbits_t2 set id = 2;
+SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id 
= t2.id;
+ id | id 
+----+----
+  1 |   
+    |  2
+(2 rows)
+
+-- Test serial full hash join.
+-- Resetting parallel_setup_cost should force a serial plan.
+-- Just to be safe, however, set enable_parallel_hash to off, as parallel full
+-- hash joins are only supported with shared hashtables.
+RESET parallel_setup_cost;
+SET enable_parallel_hash = off;
+SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id 
= t2.id;
+ id | id 
+----+----
+  1 |   
+    |  2
+(2 rows)
+
+ROLLBACK TO settings;
 rollback;
 -- Verify that hash key expressions reference the correct
 -- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's
diff --git a/src/test/regress/sql/join_hash.sql 
b/src/test/regress/sql/join_hash.sql
index 01961d1ce6e..0858e14040f 100644
--- a/src/test/regress/sql/join_hash.sql
+++ b/src/test/regress/sql/join_hash.sql
@@ -539,6 +539,33 @@ rollback to settings;
 
 rollback;
 
+-- Hash join reuses the HOT status bit to indicate match status. This can only
+-- be guaranteed to produce correct results if all the hash join tuple match
+-- bits are reset before reuse. This is done upon loading them into the
+-- hashtable.
+SAVEPOINT settings;
+SET enable_parallel_hash = on;
+SET min_parallel_table_scan_size = 0;
+SET parallel_setup_cost = 0;
+SET parallel_tuple_cost = 0;
+CREATE TABLE hjtest_matchbits_t1(id int);
+CREATE TABLE hjtest_matchbits_t2(id int);
+INSERT INTO hjtest_matchbits_t1 VALUES (1);
+INSERT INTO hjtest_matchbits_t2 VALUES (2);
+-- Update should create a HOT tuple. If this status bit isn't cleared, we won't
+-- correctly emit the NULL-extended unmatching tuple in full hash join.
+UPDATE hjtest_matchbits_t2 set id = 2;
+SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id 
= t2.id;
+-- Test serial full hash join.
+-- Resetting parallel_setup_cost should force a serial plan.
+-- Just to be safe, however, set enable_parallel_hash to off, as parallel full
+-- hash joins are only supported with shared hashtables.
+RESET parallel_setup_cost;
+SET enable_parallel_hash = off;
+SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id 
= t2.id;
+ROLLBACK TO settings;
+
+rollback;
 
 -- Verify that hash key expressions reference the correct
 -- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to