This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 09f70e8589afd7c5a5a33ea9a0d6482934ea252c
Author: Chris Hajas <[email protected]>
AuthorDate: Thu Jan 18 14:33:22 2024 -0800

    Fix "unrecognized join type" error with LASJ Not-In and network types
    
    During the GP7 merge, separate join selectivity functions were added for 
network
    types. However, GPDB has a separate join, JOIN_LASJ_NOTIN, that does not
    exist in Postgres and thus was not added as a possible case.
    
    I followed the same pattern in selfuncs.c, treating it as a semi/anti
    join for calculating the selectivity.
---
 src/backend/utils/adt/network_selfuncs.c        |  1 +
 src/test/regress/expected/join_gp.out           | 23 ++++++++++++++++++
 src/test/regress/expected/join_gp_optimizer.out | 31 +++++++++++++++++++++++++
 src/test/regress/sql/join_gp.sql                |  6 +++++
 4 files changed, 61 insertions(+)

diff --git a/src/backend/utils/adt/network_selfuncs.c 
b/src/backend/utils/adt/network_selfuncs.c
index dca2c63212..db1df35d86 100644
--- a/src/backend/utils/adt/network_selfuncs.c
+++ b/src/backend/utils/adt/network_selfuncs.c
@@ -224,6 +224,7 @@ networkjoinsel(PG_FUNCTION_ARGS)
                        break;
                case JOIN_SEMI:
                case JOIN_ANTI:
+               case JOIN_LASJ_NOTIN:
                        /* Here, it's important that we pass the outer var on 
the left. */
                        if (!join_is_reversed)
                                selec = networkjoinsel_semi(operator, 
&vardata1, &vardata2);
diff --git a/src/test/regress/expected/join_gp.out 
b/src/test/regress/expected/join_gp.out
index e1d8aa6831..af13a59e0b 100644
--- a/src/test/regress/expected/join_gp.out
+++ b/src/test/regress/expected/join_gp.out
@@ -2579,3 +2579,26 @@ drop table if exists bar_PT2;
 drop table if exists bar_PT3;
 drop table if exists bar_List_PT1;
 drop table if exists bar_List_PT2;
+-- Test that left-anti-semi-join not-in works with netowrk types
+CREATE TABLE inverse (cidr inet);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 
'cidr' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO inverse values ('192.168.100.199');
+explain SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse));
+                                               QUERY PLAN                      
                         
+--------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)  
(cost=10000000000.00..10027702610.21 rows=16 width=4)
+   ->  Nested Loop Left Anti Semi (Not-In) Join  
(cost=10000000000.00..10027702610.00 rows=5 width=4)
+         Join Filter: (inverse.cidr <<= inverse_1.cidr)
+         ->  Seq Scan on inverse  (cost=0.00..210.00 rows=17600 width=32)
+         ->  Materialize  (cost=0.00..1178.00 rows=52800 width=32)
+               ->  Broadcast Motion 3:3  (slice2; segments: 3)  
(cost=0.00..914.00 rows=52800 width=32)
+                     ->  Seq Scan on inverse inverse_1  (cost=0.00..210.00 
rows=17600 width=32)
+ Optimizer: Postgres query optimizer
+(8 rows)
+
+SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse));
+ ?column? 
+----------
+(0 rows)
+
diff --git a/src/test/regress/expected/join_gp_optimizer.out 
b/src/test/regress/expected/join_gp_optimizer.out
index e1e35cb45b..9ebbaefc89 100644
--- a/src/test/regress/expected/join_gp_optimizer.out
+++ b/src/test/regress/expected/join_gp_optimizer.out
@@ -2473,3 +2473,34 @@ drop table if exists bar_PT2;
 drop table if exists bar_PT3;
 drop table if exists bar_List_PT1;
 drop table if exists bar_List_PT2;
+-- Test that left-anti-semi-join not-in works with netowrk types
+CREATE TABLE inverse (cidr inet);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 
'cidr' as the Cloudberry Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO inverse values ('192.168.100.199');
+explain SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse));
+                                                                               
                                 QUERY PLAN                                     
                                                                            
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Result  (cost=0.00..1324032.93 rows=1 width=4)
+   ->  Gather Motion 3:1  (slice1; segments: 3)  (cost=0.00..1324032.93 rows=1 
width=1)
+         ->  Result  (cost=0.00..1324032.93 rows=1 width=1)
+               Filter: (NOT CASE WHEN ((count((true))) > '0'::bigint) THEN 
CASE WHEN ((sum((CASE WHEN ((inverse_1.cidr <<= inverse.cidr) IS NULL) THEN 1 
ELSE 0 END))) = (count((true)))) THEN NULL::boolean ELSE true END ELSE false 
END)
+               ->  GroupAggregate  (cost=0.00..1324032.93 rows=1 width=16)
+                     Group Key: inverse_1.cidr, inverse_1.ctid, 
inverse_1.gp_segment_id
+                     ->  Sort  (cost=0.00..1324032.93 rows=1 width=23)
+                           Sort Key: inverse_1.cidr, inverse_1.ctid, 
inverse_1.gp_segment_id
+                           ->  Nested Loop Left Join  (cost=0.00..1324032.93 
rows=1 width=27)
+                                 Join Filter: ((inverse_1.cidr <<= 
inverse.cidr) IS NOT FALSE)
+                                 ->  Seq Scan on inverse inverse_1  
(cost=0.00..431.00 rows=1 width=18)
+                                 ->  Materialize  (cost=0.00..431.00 rows=1 
width=9)
+                                       ->  Result  (cost=0.00..431.00 rows=1 
width=9)
+                                             ->  Broadcast Motion 3:3  
(slice2; segments: 3)  (cost=0.00..431.00 rows=1 width=8)
+                                                   ->  Seq Scan on inverse  
(cost=0.00..431.00 rows=1 width=8)
+ Optimizer: Pivotal Optimizer (GPORCA)
+(16 rows)
+
+SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse));
+ ?column? 
+----------
+(0 rows)
+
diff --git a/src/test/regress/sql/join_gp.sql b/src/test/regress/sql/join_gp.sql
index f7584a2b4a..643c829a65 100644
--- a/src/test/regress/sql/join_gp.sql
+++ b/src/test/regress/sql/join_gp.sql
@@ -966,3 +966,9 @@ drop table if exists bar_PT2;
 drop table if exists bar_PT3;
 drop table if exists bar_List_PT1;
 drop table if exists bar_List_PT2;
+
+-- Test that left-anti-semi-join not-in works with netowrk types
+CREATE TABLE inverse (cidr inet);
+INSERT INTO inverse values ('192.168.100.199');
+explain SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse));
+SELECT 1 FROM inverse WHERE NOT (cidr <<= ANY(SELECT * FROM inverse));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to