(cloudberry) branch main updated: Revert "Ban enums as distribution and partition keys"

avamingli Wed, 13 Aug 2025 07:23:38 -0700

This is an automated email from the ASF dual-hosted git repository.

avamingli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git



The following commit(s) were added to refs/heads/main by this push:
     new 39ef2bdd10b Revert "Ban enums as distribution and partition keys"
39ef2bdd10b is described below

commit 39ef2bdd10be535e2d588bfd9ff2e2e52b936169
Author: Brent Doil <[email protected]>
AuthorDate: Fri Jan 26 10:32:27 2024 -0500

    Revert "Ban enums as distribution and partition keys"
    
    This reverts commit a863997e6459e907979478f013b588447385ca07.
    
    The issues with restoring enums in distribution and partition
    keys has been resolved in pg_dump by the cherry-pick of
    7e7c5b683985c85fb990c4d49ab960cbc83434b4.
    
    A gpbackup fix disables gp_enable_segment_copy_checking before
    loading a table/matview with an enum hash distribution/partition,
    then reorganizing the table after it's loaded and resetting
    the GUC.
    
    With these two changes it's safe to unban enums as distribution
    and hash keys.
---
 contrib/btree_gin/expected/enum.out                |  29 ++--
 contrib/btree_gin/sql/enum.sql                     |  25 ++-
 src/backend/commands/tablecmds.c                   |   5 -
 src/backend/parser/parse_utilcmd.c                 |   7 -
 src/test/regress/expected/enum.out                 | 170 ++++++++++-----------
 src/test/regress/expected/enum_dist_part_ban.out   |  24 ---
 .../regress/expected/gpdist_legacy_opclasses.out   |  27 ++--
 .../expected/gpdist_legacy_opclasses_optimizer.out |  28 ++--
 src/test/regress/expected/partition_prune.out      |   4 +-
 .../regress/expected/partition_prune_optimizer.out |   4 +-
 src/test/regress/greenplum_schedule                |   2 -
 src/test/regress/sql/enum.sql                      |  22 +--
 src/test/regress/sql/enum_dist_part_ban.sql        |  22 ---
 src/test/regress/sql/gpdist_legacy_opclasses.sql   |   8 +-
 src/test/regress/sql/partition_prune.sql           |   2 +-
 15 files changed, 145 insertions(+), 234 deletions(-)

diff --git a/contrib/btree_gin/expected/enum.out 
b/contrib/btree_gin/expected/enum.out
index 2b0ddf11eb3..c4ac1174ea2 100644
--- a/contrib/btree_gin/expected/enum.out
+++ b/contrib/btree_gin/expected/enum.out
@@ -1,14 +1,11 @@
 set enable_seqscan=off;
 CREATE TYPE rainbow AS ENUM ('r','o','y','g','b','i','v');
 CREATE TABLE test_enum (
-   h int,
    i rainbow
 );
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'h' 
as the Greenplum Database data distribution key for this table.
-HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
-INSERT INTO test_enum VALUES (1, 'v'),(2, 'y'),(3, 'r'),(4, 'g'),(5, 'o'),(6, 
'i'),(7, 'b');
+INSERT INTO test_enum VALUES ('v'),('y'),('r'),('g'),('o'),('i'),('b');
 CREATE INDEX idx_enum ON test_enum USING gin (i);
-SELECT i FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
  i 
 ---
  r
@@ -16,7 +13,7 @@ SELECT i FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
  y
 (3 rows)
 
-SELECT i FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
  i 
 ---
  r
@@ -25,13 +22,13 @@ SELECT i FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
  g
 (4 rows)
 
-SELECT i FROM test_enum WHERE i='g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i='g'::rainbow ORDER BY i;
  i 
 ---
  g
 (1 row)
 
-SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
  i 
 ---
  g
@@ -40,7 +37,7 @@ SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
  v
 (4 rows)
 
-SELECT i FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
  i 
 ---
  b
@@ -48,7 +45,7 @@ SELECT i FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
  v
 (3 rows)
 
-explain (costs off) SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
+explain (costs off) SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
                      QUERY PLAN                      
 -----------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
@@ -59,17 +56,11 @@ explain (costs off) SELECT i FROM test_enum WHERE 
i>='g'::rainbow ORDER BY i;
                Recheck Cond: (i >= 'g'::rainbow)
                ->  Bitmap Index Scan on idx_enum
                      Index Cond: (i >= 'g'::rainbow)
- Optimizer: Pivotal Optimizer (GPORCA)
+ Optimizer: Postgres query optimizer
 (9 rows)
 
 -- make sure we handle the non-evenly-numbered oid case for enums
 create type e as enum ('0', '2', '3');
 alter type e add value '1' after '0';
-CREATE TABLE t (
-   h int,
-   i e
-);
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'h' 
as the Greenplum Database data distribution key for this table.
-HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
-insert into t select j, (j % 4)::text::e from generate_series(0, 100000) as j;
-create index on t using gin (i);
+create table t as select (i % 4)::text::e from generate_series(0, 100000) as i;
+create index on t using gin (e);
diff --git a/contrib/btree_gin/sql/enum.sql b/contrib/btree_gin/sql/enum.sql
index 9876be88c85..f35162f8f58 100644
--- a/contrib/btree_gin/sql/enum.sql
+++ b/contrib/btree_gin/sql/enum.sql
@@ -1,30 +1,25 @@
 set enable_seqscan=off;
+
 CREATE TYPE rainbow AS ENUM ('r','o','y','g','b','i','v');
 
 CREATE TABLE test_enum (
-   h int,
    i rainbow
 );
 
-INSERT INTO test_enum VALUES (1, 'v'),(2, 'y'),(3, 'r'),(4, 'g'),(5, 'o'),(6, 
'i'),(7, 'b');
+INSERT INTO test_enum VALUES ('v'),('y'),('r'),('g'),('o'),('i'),('b');
 
 CREATE INDEX idx_enum ON test_enum USING gin (i);
 
-SELECT i FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
-SELECT i FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
-SELECT i FROM test_enum WHERE i='g'::rainbow ORDER BY i;
-SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
-SELECT i FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i<'g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i<='g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i='g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
+SELECT * FROM test_enum WHERE i>'g'::rainbow ORDER BY i;
 
-explain (costs off) SELECT i FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
+explain (costs off) SELECT * FROM test_enum WHERE i>='g'::rainbow ORDER BY i;
 
 -- make sure we handle the non-evenly-numbered oid case for enums
 create type e as enum ('0', '2', '3');
 alter type e add value '1' after '0';
-
-CREATE TABLE t (
-   h int,
-   i e
-);
-insert into t select j, (j % 4)::text::e from generate_series(0, 100000) as j;
-create index on t using gin (i);
+create table t as select (i % 4)::text::e from generate_series(0, 100000) as i;
+create index on t using gin (e);
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 573c57af8de..e731e37142c 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -21176,11 +21176,6 @@ ComputePartitionAttrs(ParseState *pstate, Relation 
rel, List *partParams, AttrNu
                        }
                }
 
-               if (strategy == PARTITION_STRATEGY_HASH && 
type_is_enum(atttype))
-                               ereport(ERROR,
-                                               
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                                errmsg("cannot use ENUM column 
\"%s\" in PARTITION BY statement for hash partitions", pelem->name)));
-
                /*
                 * Apply collation override if any
                 */
diff --git a/src/backend/parser/parse_utilcmd.c 
b/src/backend/parser/parse_utilcmd.c
index 49063b5f7fd..223c6fcb48b 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -3137,13 +3137,6 @@ getPolicyForDistributedBy(DistributedBy *distributedBy, 
TupleDesc tupdesc)
                                        if (strcmp(colname, 
NameStr(attr->attname)) == 0)
                                        {
                                                Oid                     opclass;
-                                               Oid                     typid;
-
-                                               typid = 
getBaseType(attr->atttypid);
-                                               if (type_is_enum(typid))
-                                                       ereport(ERROR,
-                                                                       
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                                                        
errmsg("cannot use ENUM column \"%s\" in DISTRIBUTED BY statement", colname)));
 
                                                opclass = 
cdb_get_opclass_for_column_def(dkelem->opclass, attr->atttypid);
 
diff --git a/src/test/regress/expected/enum.out 
b/src/test/regress/expected/enum.out
index 3b8af8008e1..686ab49742d 100644
--- a/src/test/regress/expected/enum.out
+++ b/src/test/regress/expected/enum.out
@@ -198,71 +198,71 @@ ORDER BY enumsortorder;
 --
 -- Basic table creation, row selection
 --
-CREATE TABLE enumtest (i int, col rainbow);
-INSERT INTO enumtest values (1, 'red'), (2, 'orange'), (3, 'yellow'), (4, 
'green');
-COPY enumtest (i, col) FROM stdin;
+CREATE TABLE enumtest (col rainbow);
+INSERT INTO enumtest values ('red'), ('orange'), ('yellow'), ('green');
+COPY enumtest FROM stdin;
 SELECT * FROM enumtest;
- i |  col   
----+--------
- 5 | blue
- 6 | purple
- 2 | orange
- 3 | yellow
- 4 | green
- 1 | red
+  col   
+--------
+ red
+ orange
+ yellow
+ green
+ blue
+ purple
 (6 rows)
 
 --
 -- Operators, no index
 --
 SELECT * FROM enumtest WHERE col = 'orange';
- i |  col   
----+--------
- 2 | orange
+  col   
+--------
+ orange
 (1 row)
 
 SELECT * FROM enumtest WHERE col <> 'orange' ORDER BY col;
- i |  col   
----+--------
- 1 | red
- 3 | yellow
- 4 | green
- 5 | blue
- 6 | purple
+  col   
+--------
+ red
+ yellow
+ green
+ blue
+ purple
 (5 rows)
 
 SELECT * FROM enumtest WHERE col > 'yellow' ORDER BY col;
- i |  col   
----+--------
- 4 | green
- 5 | blue
- 6 | purple
+  col   
+--------
+ green
+ blue
+ purple
 (3 rows)
 
 SELECT * FROM enumtest WHERE col >= 'yellow' ORDER BY col;
- i |  col   
----+--------
- 3 | yellow
- 4 | green
- 5 | blue
- 6 | purple
+  col   
+--------
+ yellow
+ green
+ blue
+ purple
 (4 rows)
 
 SELECT * FROM enumtest WHERE col < 'green' ORDER BY col;
- i |  col   
----+--------
- 1 | red
- 2 | orange
- 3 | yellow
+  col   
+--------
+ red
+ orange
+ yellow
 (3 rows)
 
 SELECT * FROM enumtest WHERE col <= 'green' ORDER BY col;
- i |  col   
----+--------
- 1 | red
- 2 | orange
- 3 | yellow
- 4 | green
+  col   
+--------
+ red
+ orange
+ yellow
+ green
 (4 rows)
 
 --
@@ -311,53 +311,53 @@ SET enable_bitmapscan = off;
 --
 CREATE INDEX enumtest_btree ON enumtest USING btree (col);
 SELECT * FROM enumtest WHERE col = 'orange';
- i |  col   
----+--------
- 2 | orange
+  col   
+--------
+ orange
 (1 row)
 
 SELECT * FROM enumtest WHERE col <> 'orange' ORDER BY col;
- i |  col   
----+--------
- 1 | red
- 3 | yellow
- 4 | green
- 5 | blue
- 6 | purple
+  col   
+--------
+ red
+ yellow
+ green
+ blue
+ purple
 (5 rows)
 
 SELECT * FROM enumtest WHERE col > 'yellow' ORDER BY col;
- i |  col   
----+--------
- 4 | green
- 5 | blue
- 6 | purple
+  col   
+--------
+ green
+ blue
+ purple
 (3 rows)
 
 SELECT * FROM enumtest WHERE col >= 'yellow' ORDER BY col;
- i |  col   
----+--------
- 3 | yellow
- 4 | green
- 5 | blue
- 6 | purple
+  col   
+--------
+ yellow
+ green
+ blue
+ purple
 (4 rows)
 
 SELECT * FROM enumtest WHERE col < 'green' ORDER BY col;
- i |  col   
----+--------
- 1 | red
- 2 | orange
- 3 | yellow
+  col   
+--------
+ red
+ orange
+ yellow
 (3 rows)
 
 SELECT * FROM enumtest WHERE col <= 'green' ORDER BY col;
- i |  col   
----+--------
- 1 | red
- 2 | orange
- 3 | yellow
- 4 | green
+  col   
+--------
+ red
+ orange
+ yellow
+ green
 (4 rows)
 
 SELECT min(col) FROM enumtest;
@@ -384,9 +384,9 @@ DROP INDEX enumtest_btree;
 --
 CREATE INDEX enumtest_hash ON enumtest USING hash (col);
 SELECT * FROM enumtest WHERE col = 'orange';
- i |  col   
----+--------
- 2 | orange
+  col   
+--------
+ orange
 (1 row)
 
 DROP INDEX enumtest_hash;
@@ -537,11 +537,11 @@ DROP FUNCTION echo_me(rainbow);
 --
 -- RI triggers on enum types
 --
-CREATE TABLE enumtest_parent (i int PRIMARY KEY, id rainbow);
-CREATE TABLE enumtest_child (i int REFERENCES enumtest_parent, parent rainbow);
-INSERT INTO enumtest_parent VALUES (1, 'red');
-INSERT INTO enumtest_child VALUES (1, 'red');
-INSERT INTO enumtest_child VALUES (2, 'blue');  -- fail
+CREATE TABLE enumtest_parent (id rainbow PRIMARY KEY);
+CREATE TABLE enumtest_child (parent rainbow REFERENCES enumtest_parent);
+INSERT INTO enumtest_parent VALUES ('red');
+INSERT INTO enumtest_child VALUES ('red');
+INSERT INTO enumtest_child VALUES ('blue');  -- fail
 -- start_ignore
 -- foreign keys are not checked in GPDB, hence these pass.
 -- end_ignore
@@ -550,9 +550,9 @@ DELETE FROM enumtest_parent;  -- fail
 -- cross-type RI should fail
 --
 CREATE TYPE bogus AS ENUM('good', 'bad', 'ugly');
-CREATE TABLE enumtest_bogus_child(i int, parent bogus REFERENCES 
enumtest_parent);
+CREATE TABLE enumtest_bogus_child(parent bogus REFERENCES enumtest_parent);
 ERROR:  foreign key constraint "enumtest_bogus_child_parent_fkey" cannot be 
implemented
-DETAIL:  Key columns "parent" and "i" are of incompatible types: bogus and 
integer.
+DETAIL:  Key columns "parent" and "id" are of incompatible types: bogus and 
rainbow.
 DROP TYPE bogus;
 -- check renaming a value
 ALTER TYPE rainbow RENAME VALUE 'red' TO 'crimson';
diff --git a/src/test/regress/expected/enum_dist_part_ban.out 
b/src/test/regress/expected/enum_dist_part_ban.out
deleted file mode 100644
index addc5b89a5e..00000000000
--- a/src/test/regress/expected/enum_dist_part_ban.out
+++ /dev/null
@@ -1,24 +0,0 @@
--- test that distributing or hash partitioning by an enum field or expression 
is blocked
-CREATE DATABASE ban_enum;
-\c ban_enum
--- create a test enum
-create type colorEnum as enum ('r', 'g', 'b');
--- hash partition by enum column name
-create table part (a int, b colorEnum) partition by hash(b);
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Apache Cloudberry data distribution key for this table.
-HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
-ERROR:  cannot use ENUM column "b" in PARTITION BY statement for hash 
partitions
--- hash partition by enum column expression
-create table part (a int, b colorEnum) partition by hash((b));
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Apache Cloudberry data distribution key for this table.
-HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
-ERROR:  cannot use ENUM column "(null)" in PARTITION BY statement for hash 
partitions
--- distribute by enum column
-create table distr (a colorEnum, b int);
-NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Apache Cloudberry data distribution key for this table.
-HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
-ERROR:  cannot use ENUM column "a" in DISTRIBUTED BY statement
--- clean up database
-drop type colorEnum;
-\c regression
-DROP DATABASE ban_enum;
diff --git a/src/test/regress/expected/gpdist_legacy_opclasses.out 
b/src/test/regress/expected/gpdist_legacy_opclasses.out
index 920bcb7f9ad..58b353c86d7 100644
--- a/src/test/regress/expected/gpdist_legacy_opclasses.out
+++ b/src/test/regress/expected/gpdist_legacy_opclasses.out
@@ -296,34 +296,27 @@ explain (costs off) select * from modern_int a inner join 
legacy_domain_over_int
  Optimizer: Postgres query optimizer
 (9 rows)
 
--- Distributing by enum has been banned, so this test is updated to instead 
distribute by a dummy int column
--- Banned because in backup/restore scenarios the data will be in the "wrong" 
segment as oids for each enum
--- entry are re-generated and hashing them will result in arbitrary segment 
assignment.
 create type colors as enum ('red', 'green', 'blue');
-create table legacy_enum(col1 int, color colors) distributed by(col1);
-insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue');
+create table legacy_enum(color colors) distributed by(color cdbhash_enum_ops);
+insert into legacy_enum values ('red'), ('green'), ('blue');
 explain (costs off) select * from legacy_enum a inner join legacy_enum b on 
a.color = b.color;
                  QUERY PLAN                  
 ---------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: (a.color = b.color)
-         ->  Redistribute Motion 3:3  (slice2; segments: 3)
-               Hash Key: a.color
-               ->  Seq Scan on legacy_enum a
+         ->  Seq Scan on legacy_enum a
          ->  Hash
-               ->  Redistribute Motion 3:3  (slice3; segments: 3)
-                     Hash Key: b.color
-                     ->  Seq Scan on legacy_enum b
+               ->  Seq Scan on legacy_enum b
  Optimizer: Postgres query optimizer
-(11 rows)
+(7 rows)
 
 select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
- col1 | color | col1 | color 
-------+-------+------+-------
-    3 | blue  |    3 | blue
-    2 | green |    2 | green
-    1 | red   |    1 | red
+ color | color 
+-------+-------
+ blue  | blue
+ red   | red
+ green | green
 (3 rows)
 
 --
diff --git a/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out 
b/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out
index e02b9a3f00e..17ac0786a8f 100644
--- a/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out
+++ b/src/test/regress/expected/gpdist_legacy_opclasses_optimizer.out
@@ -295,31 +295,27 @@ explain (costs off) select * from modern_int a inner join 
legacy_domain_over_int
  Optimizer: Postgres query optimizer
 (9 rows)
 
--- Distributing by enum has been banned, so this test is updated to instead 
distribute by a dummy int column
--- Banned because in backup/restore scenarios the data will be in the "wrong" 
segment as oids for each enum
--- entry are re-generated and hashing them will result in arbitrary segment 
assignment.
 create type colors as enum ('red', 'green', 'blue');
-create table legacy_enum(col1 int, color colors) distributed by(col1);
-insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue');
+create table legacy_enum(color colors) distributed by(color cdbhash_enum_ops);
+insert into legacy_enum values ('red'), ('green'), ('blue');
 explain (costs off) select * from legacy_enum a inner join legacy_enum b on 
a.color = b.color;
-                 QUERY PLAN                  
----------------------------------------------
+                          QUERY PLAN                          
+--------------------------------------------------------------
  Gather Motion 3:1  (slice1; segments: 3)
    ->  Hash Join
          Hash Cond: ((a.color)::anyenum = (b.color)::anyenum)
          ->  Seq Scan on legacy_enum a
          ->  Hash
-               ->  Broadcast Motion 3:3  (slice2; segments: 3)
-                     ->  Seq Scan on legacy_enum b
- Optimizer: Pivotal Optimizer (GPORCA) version 3.41.0
-(8 rows)
+               ->  Seq Scan on legacy_enum b
+ Optimizer: GPORCA
+(7 rows)
 
 select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
- col1 | color | col1 | color 
-------+-------+------+-------
-    2 | green |    2 | green
-    3 | blue  |    3 | blue
-    1 | red   |    1 | red
+ color | color 
+-------+-------
+ blue  | blue
+ red   | red
+ green | green
 (3 rows)
 
 --
diff --git a/src/test/regress/expected/partition_prune.out 
b/src/test/regress/expected/partition_prune.out
index b3673b2ce48..75e646374be 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -3770,13 +3770,13 @@ explain (costs off) select * from pph_arrpart where a 
in ('{4, 5}', '{1}');
 drop table pph_arrpart;
 -- enum type list partition key
 create type pp_colors as enum ('green', 'blue', 'black');
-create table pp_enumpart (col1 int, a pp_colors) partition by list (a);
+create table pp_enumpart (a pp_colors) partition by list (a);
 create table pp_enumpart_green partition of pp_enumpart for values in 
('green');
 create table pp_enumpart_blue partition of pp_enumpart for values in ('blue');
 explain (costs off) select * from pp_enumpart where a = 'blue';
                 QUERY PLAN                
 ------------------------------------------
- Gather Motion 3:1  (slice1; segments: 3)
+ Gather Motion 1:1  (slice1; segments: 1)
    ->  Seq Scan on pp_enumpart_blue pp_enumpart
          Filter: (a = 'blue'::pp_colors)
  Optimizer: Postgres query optimizer
diff --git a/src/test/regress/expected/partition_prune_optimizer.out 
b/src/test/regress/expected/partition_prune_optimizer.out
index f3640ade989..72e87807c8a 100644
--- a/src/test/regress/expected/partition_prune_optimizer.out
+++ b/src/test/regress/expected/partition_prune_optimizer.out
@@ -3597,7 +3597,7 @@ explain (costs off) select * from pph_arrpart where a in 
('{4, 5}', '{1}');
 drop table pph_arrpart;
 -- enum type list partition key
 create type pp_colors as enum ('green', 'blue', 'black');
-create table pp_enumpart (col1 int, a pp_colors) partition by list (a);
+create table pp_enumpart (a pp_colors) partition by list (a);
 NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' 
as the Apache Cloudberry data distribution key for this table.
 HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
 create table pp_enumpart_green partition of pp_enumpart for values in 
('green');
@@ -3605,7 +3605,7 @@ create table pp_enumpart_blue partition of pp_enumpart 
for values in ('blue');
 explain (costs off) select * from pp_enumpart where a = 'blue';
                 QUERY PLAN                
 ------------------------------------------
- Gather Motion 3:1  (slice1; segments: 3)
+ Gather Motion 1:1  (slice1; segments: 1)
    ->  Dynamic Seq Scan on pp_enumpart
          Number of partitions to scan: 1 (out of 2)
          Filter: (a = 'blue'::pp_colors)
diff --git a/src/test/regress/greenplum_schedule 
b/src/test/regress/greenplum_schedule
index f2f66e6cd6e..0bf96b38a50 100755
--- a/src/test/regress/greenplum_schedule
+++ b/src/test/regress/greenplum_schedule
@@ -377,8 +377,6 @@ test: dynamic_table
 # DML tests for AO/CO unique indexes.
 test: uao_dml/uao_dml_unique_index_delete_row 
uao_dml/uao_dml_unique_index_delete_column 
uao_dml/uao_dml_unique_index_update_row 
uao_dml/uao_dml_unique_index_update_column
 
-# test that distributing or hash partitioning by an enum field or expression 
is blocked
-test: enum_dist_part_ban
 
 # run pg_hba raleted testing
 test: hba_conf
diff --git a/src/test/regress/sql/enum.sql b/src/test/regress/sql/enum.sql
index 495e3a8034e..fef8f994f40 100644
--- a/src/test/regress/sql/enum.sql
+++ b/src/test/regress/sql/enum.sql
@@ -120,11 +120,11 @@ ORDER BY enumsortorder;
 --
 -- Basic table creation, row selection
 --
-CREATE TABLE enumtest (i int, col rainbow);
-INSERT INTO enumtest values (1, 'red'), (2, 'orange'), (3, 'yellow'), (4, 
'green');
-COPY enumtest (i, col) FROM stdin;
-5      blue
-6      purple
+CREATE TABLE enumtest (col rainbow);
+INSERT INTO enumtest values ('red'), ('orange'), ('yellow'), ('green');
+COPY enumtest FROM stdin;
+blue
+purple
 \.
 SELECT * FROM enumtest;
 
@@ -244,17 +244,17 @@ DROP FUNCTION echo_me(rainbow);
 --
 -- RI triggers on enum types
 --
-CREATE TABLE enumtest_parent (i int PRIMARY KEY, id rainbow);
-CREATE TABLE enumtest_child (i int REFERENCES enumtest_parent, parent rainbow);
-INSERT INTO enumtest_parent VALUES (1, 'red');
-INSERT INTO enumtest_child VALUES (1, 'red');
-INSERT INTO enumtest_child VALUES (2, 'blue');  -- fail
+CREATE TABLE enumtest_parent (id rainbow PRIMARY KEY);
+CREATE TABLE enumtest_child (parent rainbow REFERENCES enumtest_parent);
+INSERT INTO enumtest_parent VALUES ('red');
+INSERT INTO enumtest_child VALUES ('red');
+INSERT INTO enumtest_child VALUES ('blue');  -- fail
 DELETE FROM enumtest_parent;  -- fail
 --
 -- cross-type RI should fail
 --
 CREATE TYPE bogus AS ENUM('good', 'bad', 'ugly');
-CREATE TABLE enumtest_bogus_child(i int, parent bogus REFERENCES 
enumtest_parent);
+CREATE TABLE enumtest_bogus_child(parent bogus REFERENCES enumtest_parent);
 DROP TYPE bogus;
 
 -- check renaming a value
diff --git a/src/test/regress/sql/enum_dist_part_ban.sql 
b/src/test/regress/sql/enum_dist_part_ban.sql
deleted file mode 100644
index 23ff33fc589..00000000000
--- a/src/test/regress/sql/enum_dist_part_ban.sql
+++ /dev/null
@@ -1,22 +0,0 @@
--- test that distributing or hash partitioning by an enum field or expression 
is blocked
-
-CREATE DATABASE ban_enum;
-\c ban_enum
-
--- create a test enum
-create type colorEnum as enum ('r', 'g', 'b');
-
--- hash partition by enum column name
-create table part (a int, b colorEnum) partition by hash(b);
-
--- hash partition by enum column expression
-create table part (a int, b colorEnum) partition by hash((b));
-
--- distribute by enum column
-create table distr (a colorEnum, b int);
-
-
--- clean up database
-drop type colorEnum;
-\c regression
-DROP DATABASE ban_enum;
diff --git a/src/test/regress/sql/gpdist_legacy_opclasses.sql 
b/src/test/regress/sql/gpdist_legacy_opclasses.sql
index 394cca51d47..2170e93c64f 100644
--- a/src/test/regress/sql/gpdist_legacy_opclasses.sql
+++ b/src/test/regress/sql/gpdist_legacy_opclasses.sql
@@ -168,13 +168,9 @@ explain (costs off) select * from legacy_domain_over_int a 
inner join legacy_dom
 explain (costs off) select * from legacy_int a inner join 
legacy_domain_over_int b on a.id = b.id;
 explain (costs off) select * from modern_int a inner join 
legacy_domain_over_int b on a.id = b.id;
 
-
--- Distributing by enum has been banned, so this test is updated to instead 
distribute by a dummy int column
--- Banned because in backup/restore scenarios the data will be in the "wrong" 
segment as oids for each enum
--- entry are re-generated and hashing them will result in arbitrary segment 
assignment.
 create type colors as enum ('red', 'green', 'blue');
-create table legacy_enum(col1 int, color colors) distributed by(col1);
-insert into legacy_enum values (1, 'red'), (2, 'green'), (3, 'blue');
+create table legacy_enum(color colors) distributed by(color cdbhash_enum_ops);
+insert into legacy_enum values ('red'), ('green'), ('blue');
 
 explain (costs off) select * from legacy_enum a inner join legacy_enum b on 
a.color = b.color;
 select * from legacy_enum a inner join legacy_enum b on a.color = b.color;
diff --git a/src/test/regress/sql/partition_prune.sql 
b/src/test/regress/sql/partition_prune.sql
index a496f1a7985..f9e3b1f2013 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -937,7 +937,7 @@ drop table pph_arrpart;
 
 -- enum type list partition key
 create type pp_colors as enum ('green', 'blue', 'black');
-create table pp_enumpart (col1 int, a pp_colors) partition by list (a);
+create table pp_enumpart (a pp_colors) partition by list (a);
 create table pp_enumpart_green partition of pp_enumpart for values in 
('green');
 create table pp_enumpart_blue partition of pp_enumpart for values in ('blue');
 explain (costs off) select * from pp_enumpart where a = 'blue';


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) branch main updated: Revert "Ban enums as distribution and partition keys"

Reply via email to