This is an automated email from the ASF dual-hosted git repository.
yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new 85e70a0ffa5 ORCA: Validate hash function existence in IsOpHashJoinable
85e70a0ffa5 is described below
commit 85e70a0ffa59f4753c7e3523ce97092cf3e14fb7
Author: Jianghua Yang <[email protected]>
AuthorDate: Thu Jan 1 00:36:06 2026 +0800
ORCA: Validate hash function existence in IsOpHashJoinable
Fix execution-time error when operators have oprcanhash=true but lack actual
hash functions. Previously, ORCA would generate HashAgg plans that failed at
runtime with "could not find hash function for hash operator" errors.
This occurred when operators were marked with the 'hashes' option but only
registered in btree operator families, not hash operator families. The fix
adds validation using get_op_hash_functions() to ensure hash functions exist
before allowing HashAgg plans, moving error detection from execution to
planning time.
---
src/backend/gpopt/gpdbwrappers.cpp | 19 +++-
src/test/regress/expected/gp_hashagg.out | 166 +++++++++++++++++++++++++++++++
src/test/regress/sql/gp_hashagg.sql | 138 +++++++++++++++++++++++++
3 files changed, 322 insertions(+), 1 deletion(-)
diff --git a/src/backend/gpopt/gpdbwrappers.cpp
b/src/backend/gpopt/gpdbwrappers.cpp
index 4e636a0c653..aca95b2cc0a 100644
--- a/src/backend/gpopt/gpdbwrappers.cpp
+++ b/src/backend/gpopt/gpdbwrappers.cpp
@@ -1613,7 +1613,24 @@ gpdb::IsOpHashJoinable(Oid opno, Oid inputtype)
GP_WRAP_START;
{
/* catalog tables: pg_operator */
- return op_hashjoinable(opno, inputtype);
+ if (!op_hashjoinable(opno, inputtype))
+ return false;
+
+ /*
+ * Even if oprcanhash is true, we need to verify that hash
functions
+ * actually exist for this operator. This is because oprcanhash
can be
+ * set to true while the operator is only registered in a btree
opfamily
+ * and not in a hash opfamily, which would cause execution-time
errors
+ * when trying to build hash tables.
+ *
+ * See get_op_hash_functions() in lsyscache.c which requires
operators
+ * to be registered in a hash opfamily (amopmethod ==
HASH_AM_OID).
+ */
+ RegProcedure hash_proc;
+ if (!get_op_hash_functions(opno, &hash_proc, NULL))
+ return false;
+
+ return true;
}
GP_WRAP_END;
return false;
diff --git a/src/test/regress/expected/gp_hashagg.out
b/src/test/regress/expected/gp_hashagg.out
index 210ec705398..513aeb09966 100644
--- a/src/test/regress/expected/gp_hashagg.out
+++ b/src/test/regress/expected/gp_hashagg.out
@@ -274,3 +274,169 @@ EXPLAIN (COSTS OFF, VERBOSE) :qry;
(1 row)
+drop schema if exists tinc_base_types cascade;
+NOTICE: schema "tinc_base_types" does not exist, skipping
+create schema tinc_base_types;
+set search_path=tinc_base_types;
+CREATE FUNCTION int42_in(cstring)
+ RETURNS int42
+ AS 'int4in'
+ LANGUAGE internal IMMUTABLE STRICT;
+NOTICE: type "int42" is not yet defined
+DETAIL: Creating a shell type definition.
+CREATE FUNCTION int42_out(int42)
+ RETURNS cstring
+ AS 'int4out'
+ LANGUAGE internal IMMUTABLE STRICT;
+NOTICE: argument type int42 is only a shell
+CREATE TYPE int42 (
+ internallength = 4,
+ input = int42_in,
+ output = int42_out,
+ alignment = int4,
+ default = 42,
+ passedbyvalue
+);
+CREATE TABLE aTABLE(k int, a int42);
+NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'k'
as the Apache Cloudberry data distribution key for this table.
+HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make
sure column(s) chosen are the optimal data distribution key to minimize skew.
+INSERT INTO aTABLE VALUES(1, '21');
+INSERT INTO aTABLE VALUES(2, '22');
+INSERT INTO aTABLE VALUES(3, '23');
+INSERT INTO aTABLE VALUES(4, '24');
+SELECT * FROM aTABLE;
+ k | a
+---+----
+ 2 | 22
+ 3 | 23
+ 4 | 24
+ 1 | 21
+(4 rows)
+
+CREATE OR REPLACE FUNCTION my_lt(int42, int42)
+RETURNS boolean AS
+'int4lt'
+language internal;
+CREATE OR REPLACE FUNCTION my_lteq(int42, int42)
+RETURNS boolean AS
+'int4le'
+language internal;
+CREATE OR REPLACE FUNCTION my_gt(int42, int42)
+RETURNS boolean AS
+'int4gt'
+language internal;
+CREATE OR REPLACE FUNCTION my_gteq(int42, int42)
+RETURNS boolean AS
+'int4ge'
+language internal;
+CREATE OR REPLACE FUNCTION my_eq(int42, int42)
+RETURNS boolean AS
+'int4eq'
+language internal;
+CREATE OPERATOR < (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_lt,
+ hashes
+);
+CREATE OPERATOR <= (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_lteq,
+ hashes
+);
+CREATE OPERATOR = (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_eq,
+ hashes
+);
+CREATE OPERATOR > (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_gt,
+ hashes
+);
+CREATE OPERATOR >= (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_gteq,
+ hashes
+);
+CREATE OR REPLACE FUNCTION my_comp_func(int42, int42)
+RETURNS int AS
+$$
+BEGIN
+IF $1 < $2 THEN
+ return -1;
+ELSIF $1 = $2 THEN
+ return 0;
+ELSE
+ return 1;
+END IF;
+END;
+$$ LANGUAGE PLPGSQL;
+CREATE OPERATOR CLASS my_operator_class_comp_type
+DEFAULT
+FOR TYPE int42 USING btree
+AS
+OPERATOR 1 <,
+OPERATOR 2 <=,
+OPERATOR 3 =,
+OPERATOR 4 >=,
+OPERATOR 5 >,
+FUNCTION 1 my_comp_func(int42, int42);
+CREATE OPERATOR CLASS my_operator_class_comp_type_hash_lt
+DEFAULT
+FOR TYPE int42 USING hash
+AS
+OPERATOR 1 <,
+FUNCTION 1 my_comp_func(int42, int42);
+ERROR: hash function 1 must have one argument
+CREATE OPERATOR CLASS my_operator_class_comp_type_hash_eq
+FOR TYPE int42 USING hash
+AS
+OPERATOR 1 =,
+FUNCTION 1 my_comp_func(int42, int42);
+ERROR: hash function 1 must have one argument
+select * from atable group by a,k;
+ k | a
+---+----
+ 1 | 21
+ 2 | 22
+ 3 | 23
+ 4 | 24
+(4 rows)
+
+-- Before fix: This would fail at runtime with:
+-- ERROR: could not find hash function for hash operator XXXXX
(execGrouping.c:118)
+-- After fix: ORCA should detect missing hash function at planning time
+-- and either use GroupAgg or report a clear planning error
+SET optimizer_enable_groupagg=off;
+select * from atable group by a,k;
+ k | a
+---+----
+ 1 | 21
+ 2 | 22
+ 3 | 23
+ 4 | 24
+(4 rows)
+
+drop schema if exists tinc_base_types cascade;
+NOTICE: drop cascades to 16 other objects
+DETAIL: drop cascades to function int42_out(int42)
+drop cascades to type int42
+drop cascades to function int42_in(cstring)
+drop cascades to table atable
+drop cascades to function my_lt(int42,int42)
+drop cascades to function my_lteq(int42,int42)
+drop cascades to function my_gt(int42,int42)
+drop cascades to function my_gteq(int42,int42)
+drop cascades to function my_eq(int42,int42)
+drop cascades to operator <(int42,int42)
+drop cascades to operator <=(int42,int42)
+drop cascades to operator =(int42,int42)
+drop cascades to operator >(int42,int42)
+drop cascades to operator >=(int42,int42)
+drop cascades to function my_comp_func(int42,int42)
+drop cascades to operator family my_operator_class_comp_type for access method
btree
diff --git a/src/test/regress/sql/gp_hashagg.sql
b/src/test/regress/sql/gp_hashagg.sql
index 36000de8af8..c9b05c1936a 100644
--- a/src/test/regress/sql/gp_hashagg.sql
+++ b/src/test/regress/sql/gp_hashagg.sql
@@ -140,3 +140,141 @@ HAVING max(c) = '31'
$$ AS qry \gset
EXPLAIN (COSTS OFF, VERBOSE) :qry;
:qry;
+
+drop schema if exists tinc_base_types cascade;
+
+create schema tinc_base_types;
+set search_path=tinc_base_types;
+
+
+CREATE FUNCTION int42_in(cstring)
+ RETURNS int42
+ AS 'int4in'
+ LANGUAGE internal IMMUTABLE STRICT;
+CREATE FUNCTION int42_out(int42)
+ RETURNS cstring
+ AS 'int4out'
+ LANGUAGE internal IMMUTABLE STRICT;
+CREATE TYPE int42 (
+ internallength = 4,
+ input = int42_in,
+ output = int42_out,
+ alignment = int4,
+ default = 42,
+ passedbyvalue
+);
+
+CREATE TABLE aTABLE(k int, a int42);
+INSERT INTO aTABLE VALUES(1, '21');
+INSERT INTO aTABLE VALUES(2, '22');
+INSERT INTO aTABLE VALUES(3, '23');
+INSERT INTO aTABLE VALUES(4, '24');
+SELECT * FROM aTABLE;
+
+CREATE OR REPLACE FUNCTION my_lt(int42, int42)
+RETURNS boolean AS
+'int4lt'
+language internal;
+
+CREATE OR REPLACE FUNCTION my_lteq(int42, int42)
+RETURNS boolean AS
+'int4le'
+language internal;
+
+CREATE OR REPLACE FUNCTION my_gt(int42, int42)
+RETURNS boolean AS
+'int4gt'
+language internal;
+
+CREATE OR REPLACE FUNCTION my_gteq(int42, int42)
+RETURNS boolean AS
+'int4ge'
+language internal;
+
+CREATE OR REPLACE FUNCTION my_eq(int42, int42)
+RETURNS boolean AS
+'int4eq'
+language internal;
+
+CREATE OPERATOR < (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_lt,
+ hashes
+);
+
+CREATE OPERATOR <= (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_lteq,
+ hashes
+);
+
+CREATE OPERATOR = (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_eq,
+ hashes
+);
+
+CREATE OPERATOR > (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_gt,
+ hashes
+);
+
+CREATE OPERATOR >= (
+ leftarg = int42,
+ rightarg = int42,
+ procedure = my_gteq,
+ hashes
+);
+
+CREATE OR REPLACE FUNCTION my_comp_func(int42, int42)
+RETURNS int AS
+$$
+BEGIN
+IF $1 < $2 THEN
+ return -1;
+ELSIF $1 = $2 THEN
+ return 0;
+ELSE
+ return 1;
+END IF;
+END;
+$$ LANGUAGE PLPGSQL;
+
+CREATE OPERATOR CLASS my_operator_class_comp_type
+DEFAULT
+FOR TYPE int42 USING btree
+AS
+OPERATOR 1 <,
+OPERATOR 2 <=,
+OPERATOR 3 =,
+OPERATOR 4 >=,
+OPERATOR 5 >,
+FUNCTION 1 my_comp_func(int42, int42);
+
+CREATE OPERATOR CLASS my_operator_class_comp_type_hash_lt
+DEFAULT
+FOR TYPE int42 USING hash
+AS
+OPERATOR 1 <,
+FUNCTION 1 my_comp_func(int42, int42);
+
+CREATE OPERATOR CLASS my_operator_class_comp_type_hash_eq
+FOR TYPE int42 USING hash
+AS
+OPERATOR 1 =,
+FUNCTION 1 my_comp_func(int42, int42);
+
+select * from atable group by a,k;
+-- Before fix: This would fail at runtime with:
+-- ERROR: could not find hash function for hash operator XXXXX
(execGrouping.c:118)
+-- After fix: ORCA should detect missing hash function at planning time
+-- and either use GroupAgg or report a clear planning error
+SET optimizer_enable_groupagg=off;
+select * from atable group by a,k;
+
+drop schema if exists tinc_base_types cascade;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]