This is an automated email from the ASF dual-hosted git repository. yjhjstz pushed a commit to branch yjhjstz/fix/orca-use-after-free-having-decorrelation in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 69a2a53294f12c601cca4fe8a257cd462d24b8f3 Author: Jianghua Yang <[email protected]> AuthorDate: Sat Mar 14 03:51:08 2026 +0800 ORCA: Fix incorrect decorrelation of GROUP BY () HAVING <outer_ref> Force correlated execution (SubPlan) for scalar subqueries with GROUP BY () and a correlated HAVING clause. Previously ORCA decorrelated such subqueries into Left Outer Join + COALESCE(count,0), which incorrectly returned 0 instead of NULL when the HAVING condition was false. Add FHasCorrelatedSelectAboveGbAgg() to detect the pattern where NormalizeHaving() has converted the HAVING clause into a CLogicalSelect with outer refs above a CLogicalGbAgg with empty grouping columns. When detected, set m_fCorrelatedExecution = true in Psd() to bypass the COALESCE decorrelation path. Update groupingsets_optimizer.out expected output to reflect the new ORCA SubPlan format instead of Postgres planner fallback. Reported-in: https://github.com/apache/cloudberry/issues/1618 --- .../libgpopt/src/xforms/CSubqueryHandler.cpp | 115 ++++++++++++++++++++- .../regress/expected/groupingsets_optimizer.out | 28 ++--- 2 files changed, 128 insertions(+), 15 deletions(-) diff --git a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp index 791ee8345a9..a3c333c5ab1 100644 --- a/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp +++ b/src/backend/gporca/libgpopt/src/xforms/CSubqueryHandler.cpp @@ -308,6 +308,93 @@ CSubqueryHandler::FProjectCountSubquery(CExpression *pexprSubquery, } +//--------------------------------------------------------------------------- +// @function: +// FContainsEmptyGbAgg +// +// @doc: +// Return true if pexpr contains a GbAgg with empty grouping columns +// (i.e., GROUP BY ()) +// +//--------------------------------------------------------------------------- +static BOOL +FContainsEmptyGbAgg(CExpression *pexpr) +{ + if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid()) + { + return 0 == CLogicalGbAgg::PopConvert(pexpr->Pop())->Pdrgpcr()->Size(); + } + const ULONG arity = pexpr->Arity(); + for (ULONG ul = 0; ul < arity; ul++) + { + CExpression *pexprChild = (*pexpr)[ul]; + if (pexprChild->Pop()->FLogical() && FContainsEmptyGbAgg(pexprChild)) + { + return true; + } + } + return false; +} + + +//--------------------------------------------------------------------------- +// @function: +// FHasCorrelatedSelectAboveGbAgg +// +// @doc: +// Return true if pexpr has a CLogicalSelect with outer references in its +// filter predicate that sits above a GROUP BY () aggregate. This pattern +// arises when a correlated scalar subquery has a correlated HAVING clause, +// e.g. "SELECT count(*) FROM t GROUP BY () HAVING outer_col". +// +// When such a pattern exists the scalar subquery must NOT be decorrelated +// with COALESCE(count,0) semantics: if the HAVING condition is false the +// subquery should return NULL (no rows), not 0. +// +//--------------------------------------------------------------------------- +static BOOL +FHasCorrelatedSelectAboveGbAgg(CExpression *pexpr) +{ + // Stop recursion at a GbAgg boundary: we are looking for a Select + // that sits *above* a GbAgg, so once we reach the GbAgg there is + // nothing more to check in this branch. + if (COperator::EopLogicalGbAgg == pexpr->Pop()->Eopid()) + { + return false; + } + + if (COperator::EopLogicalSelect == pexpr->Pop()->Eopid() && + pexpr->HasOuterRefs()) + { + // The Select has outer references somewhere in its subtree. + // Check whether they originate from the filter (child 1) rather + // than from the logical child (child 0). If the logical child has + // no outer refs but the Select as a whole does, the outer refs must + // come from the filter predicate — exactly the correlated-HAVING + // pattern we want to detect. + CExpression *pexprLogicalChild = (*pexpr)[0]; + if (!pexprLogicalChild->HasOuterRefs() && + FContainsEmptyGbAgg(pexprLogicalChild)) + { + return true; + } + } + + // Recurse into logical children only. + const ULONG arity = pexpr->Arity(); + for (ULONG ul = 0; ul < arity; ul++) + { + CExpression *pexprChild = (*pexpr)[ul]; + if (pexprChild->Pop()->FLogical() && + FHasCorrelatedSelectAboveGbAgg(pexprChild)) + { + return true; + } + } + return false; +} + + //--------------------------------------------------------------------------- // @function: // CSubqueryHandler::SSubqueryDesc::SetCorrelatedExecution @@ -382,6 +469,21 @@ CSubqueryHandler::Psd(CMemoryPool *mp, CExpression *pexprSubquery, // set flag of correlated execution psd->SetCorrelatedExecution(); + // A correlated scalar subquery of the form + // SELECT count(*) FROM t GROUP BY () HAVING <outer_ref_condition> + // must execute as a correlated SubPlan. After NormalizeHaving() the HAVING + // clause becomes a CLogicalSelect with outer refs sitting above the GbAgg. + // If we decorrelate such a subquery the join filter replaces the HAVING + // condition, but a LEFT JOIN returns 0 (not NULL) for count(*) when no + // rows match — which is semantically wrong. Forcing correlated execution + // preserves the correct NULL-when-no-rows semantics. + if (!psd->m_fCorrelatedExecution && psd->m_fHasCountAgg && + psd->m_fHasOuterRefs && + FHasCorrelatedSelectAboveGbAgg(pexprInner)) + { + psd->m_fCorrelatedExecution = true; + } + return psd; } @@ -753,8 +855,19 @@ CSubqueryHandler::FCreateOuterApplyForScalarSubquery( *ppexprNewOuter = pexprPrj; BOOL fGeneratedByQuantified = popSubquery->FGeneratedByQuantified(); + + // When GROUP BY () has a correlated HAVING clause (now represented as a + // CLogicalSelect with outer refs sitting above the GbAgg), the subquery + // must return NULL — not 0 — when the HAVING condition is false. + // Applying COALESCE(count,0) would incorrectly convert that NULL to 0, + // so we skip the special count(*) semantics in that case. + BOOL fCorrelatedHavingAboveEmptyGby = + (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() && + FHasCorrelatedSelectAboveGbAgg((*pexprSubquery)[0])); + if (fGeneratedByQuantified || - (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size())) + (fHasCountAggMatchingColumn && 0 == pgbAgg->Pdrgpcr()->Size() && + !fCorrelatedHavingAboveEmptyGby)) { CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda(); const IMDTypeInt8 *pmdtypeint8 = md_accessor->PtMDType<IMDTypeInt8>(); diff --git a/src/test/regress/expected/groupingsets_optimizer.out b/src/test/regress/expected/groupingsets_optimizer.out index 08ef4c1a68c..3718069c36c 100644 --- a/src/test/regress/expected/groupingsets_optimizer.out +++ b/src/test/regress/expected/groupingsets_optimizer.out @@ -958,21 +958,21 @@ select v.c, (select count(*) from gstest2 group by () having v.c) explain (costs off) select v.c, (select count(*) from gstest2 group by () having v.c) from (values (false),(true)) v(c) order by v.c; - QUERY PLAN --------------------------------------------------------------------------- - Sort - Sort Key: "*VALUES*".column1 - -> Values Scan on "*VALUES*" - SubPlan 1 - -> Aggregate - Group Key: () - Filter: "*VALUES*".column1 - -> Result - One-Time Filter: "*VALUES*".column1 - -> Materialize - -> Gather Motion 3:1 (slice1; segments: 3) + QUERY PLAN +-------------------------------------------------------------------- + Result + -> Sort + Sort Key: "Values".column1 + -> Values Scan on "Values" + SubPlan 1 + -> Result + One-Time Filter: "Values".column1 + -> Finalize Aggregate + -> Materialize + -> Gather Motion 3:1 (slice1; segments: 3) + -> Partial Aggregate -> Seq Scan on gstest2 - Optimizer: Postgres query optimizer + Optimizer: GPORCA (13 rows) -- HAVING with GROUPING queries --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
