This is an automated email from the ASF dual-hosted git repository.

avamingli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit dba1a324760c0e307701be85ce7764401a322278
Author: Zhang Mingli <[email protected]>
AuthorDate: Thu Mar 5 15:45:39 2026 +0800

    Support AQUMV exact-match for multi-table JOIN queries
    
    Add a new AQUMV code path that rewrites multi-table JOIN queries to
    scan materialized views when the query exactly matches the MV
    definition. This compares the saved raw parse tree against stored
    viewQuery from gp_matview_aux, bypassing the single-table AQUMV
    logic entirely.
    
    This enables significant query acceleration for common analytical
    patterns: instead of repeatedly computing expensive multi-table joins
    at query time, the planner can directly read pre-computed results
    from the materialized view, turning O(N*M) join operations into a
    simple sequential scan.
    
    For example, given:
    
      CREATE MATERIALIZED VIEW mv AS
        SELECT t1.a, t2.b FROM t1 JOIN t2 ON t1.a = t2.a;
    
      -- Before (GUC off): original join plan
      Gather Motion 3:1
        ->  Hash Join
              Hash Cond: (t1.a = t2.a)
              ->  Seq Scan on t1
              ->  Hash
                    ->  Seq Scan on t2
    
      -- After (GUC on): rewritten to MV scan
      Gather Motion 3:1
        ->  Seq Scan on mv
---
 src/backend/optimizer/plan/aqumv.c         |  345 +++++++
 src/backend/optimizer/plan/planner.c       |   18 +
 src/include/nodes/pathnodes.h              |    2 +
 src/include/optimizer/aqumv.h              |    1 +
 src/test/regress/expected/matview_data.out | 1341 +++++++++++++++++++++++++++-
 src/test/regress/sql/matview_data.sql      |  535 +++++++++++
 6 files changed, 2206 insertions(+), 36 deletions(-)

diff --git a/src/backend/optimizer/plan/aqumv.c 
b/src/backend/optimizer/plan/aqumv.c
index 4a576061780..2084bcbc181 100644
--- a/src/backend/optimizer/plan/aqumv.c
+++ b/src/backend/optimizer/plan/aqumv.c
@@ -996,3 +996,348 @@ groupby_query_rewrite(PlannerInfo *subroot,
        subroot->append_rel_list = NIL;
        return true;
 }
+
+/*
+ * aqumv_query_is_exact_match
+ *
+ * Compare two Query trees for semantic identity.  Both should be at the
+ * same preprocessing stage (raw parser output).  Returns true only if
+ * they are structurally identical in all query-semantics fields.
+ */
+static bool
+aqumv_query_is_exact_match(Query *raw_parse, Query *viewQuery)
+{
+       /* Both must be CMD_SELECT */
+       if (raw_parse->commandType != CMD_SELECT ||
+               viewQuery->commandType != CMD_SELECT)
+               return false;
+
+       /* Same number of range table entries */
+       if (list_length(raw_parse->rtable) != list_length(viewQuery->rtable))
+               return false;
+
+       /* Compare range tables (table OIDs, join types, aliases structure) */
+       if (!equal(raw_parse->rtable, viewQuery->rtable))
+               return false;
+
+       /* Compare join tree (FROM clause + WHERE quals) */
+       if (!equal(raw_parse->jointree, viewQuery->jointree))
+               return false;
+
+       /* Compare target list entries: expressions and sort/group refs */
+       if (list_length(raw_parse->targetList) != 
list_length(viewQuery->targetList))
+               return false;
+       {
+               ListCell *lc1, *lc2;
+               forboth(lc1, raw_parse->targetList, lc2, viewQuery->targetList)
+               {
+                       TargetEntry *tle1 = lfirst_node(TargetEntry, lc1);
+                       TargetEntry *tle2 = lfirst_node(TargetEntry, lc2);
+                       if (!equal(tle1->expr, tle2->expr))
+                               return false;
+                       if (tle1->resjunk != tle2->resjunk)
+                               return false;
+                       if (tle1->ressortgroupref != tle2->ressortgroupref)
+                               return false;
+               }
+       }
+
+       /* Compare GROUP BY, HAVING, ORDER BY, DISTINCT, LIMIT */
+       if (!equal(raw_parse->groupClause, viewQuery->groupClause))
+               return false;
+       if (!equal(raw_parse->havingQual, viewQuery->havingQual))
+               return false;
+       if (!equal(raw_parse->sortClause, viewQuery->sortClause))
+               return false;
+       if (!equal(raw_parse->distinctClause, viewQuery->distinctClause))
+               return false;
+       if (!equal(raw_parse->limitCount, viewQuery->limitCount))
+               return false;
+       if (!equal(raw_parse->limitOffset, viewQuery->limitOffset))
+               return false;
+
+       /* Compare boolean flags */
+       if (raw_parse->hasAggs != viewQuery->hasAggs)
+               return false;
+       if (raw_parse->hasWindowFuncs != viewQuery->hasWindowFuncs)
+               return false;
+       if (raw_parse->hasDistinctOn != viewQuery->hasDistinctOn)
+               return false;
+
+       return true;
+}
+
+/*
+ * answer_query_using_materialized_views_for_join
+ *
+ * Handle multi-table JOIN queries via exact-match comparison.
+ * This is completely independent from the single-table AQUMV code path.
+ *
+ * We compare the saved raw parse tree (before any planner preprocessing)
+ * against the stored viewQuery from gp_matview_aux.  On exact match,
+ * rewrite the query to a simple SELECT FROM mv.
+ */
+RelOptInfo *
+answer_query_using_materialized_views_for_join(PlannerInfo *root, AqumvContext 
aqumv_context)
+{
+       RelOptInfo              *current_rel = aqumv_context->current_rel;
+       query_pathkeys_callback qp_callback = aqumv_context->qp_callback;
+       Query                   *parse = root->parse;
+       Query                   *raw_parse = root->aqumv_raw_parse;
+       RelOptInfo              *mv_final_rel = current_rel;
+       Relation                matviewRel;
+       Relation                mvauxDesc;
+       TupleDesc               mvaux_tupdesc;
+       SysScanDesc             mvscan;
+       HeapTuple               tup;
+       Form_gp_matview_aux mvaux_tup;
+       bool                    need_close = false;
+
+       /* Must have the saved raw parse tree. */
+       if (raw_parse == NULL)
+               return mv_final_rel;
+
+       /* Must be a join query (more than one table in FROM). */
+       if (list_length(raw_parse->rtable) <= 1)
+               return mv_final_rel;
+
+       /* Basic eligibility checks (same as single-table AQUMV). */
+       if (parse->commandType != CMD_SELECT ||
+               parse->rowMarks != NIL ||
+               parse->scatterClause != NIL ||
+               parse->cteList != NIL ||
+               parse->setOperations != NULL ||
+               parse->hasModifyingCTE ||
+               parse->parentStmtType == PARENTSTMTTYPE_REFRESH_MATVIEW ||
+               parse->parentStmtType == PARENTSTMTTYPE_CTAS ||
+               contain_mutable_functions((Node *) raw_parse) ||
+               parse->hasSubLinks)
+               return mv_final_rel;
+
+       mvauxDesc = table_open(GpMatviewAuxId, AccessShareLock);
+       mvaux_tupdesc = RelationGetDescr(mvauxDesc);
+
+       mvscan = systable_beginscan(mvauxDesc, InvalidOid, false,
+                                                               NULL, 0, NULL);
+
+       while (HeapTupleIsValid(tup = systable_getnext(mvscan)))
+       {
+               Datum           view_query_datum;
+               char            *view_query_str;
+               bool            is_null;
+               Query           *viewQuery;
+               RangeTblEntry *mvrte;
+               PlannerInfo     *subroot;
+               TupleDesc       mv_tupdesc;
+
+               CHECK_FOR_INTERRUPTS();
+               if (need_close)
+                       table_close(matviewRel, AccessShareLock);
+
+               mvaux_tup = (Form_gp_matview_aux) GETSTRUCT(tup);
+               matviewRel = table_open(mvaux_tup->mvoid, AccessShareLock);
+               need_close = true;
+
+               if (!RelationIsPopulated(matviewRel))
+                       continue;
+
+               /* MV must be up-to-date (IVM is always current). */
+               if (!RelationIsIVM(matviewRel) &&
+                       
!MatviewIsGeneralyUpToDate(RelationGetRelid(matviewRel)))
+                       continue;
+
+               /* Get a copy of view query. */
+               view_query_datum = heap_getattr(tup,
+                                                                               
Anum_gp_matview_aux_view_query,
+                                                                               
mvaux_tupdesc,
+                                                                               
&is_null);
+
+               view_query_str = TextDatumGetCString(view_query_datum);
+               viewQuery = copyObject(stringToNode(view_query_str));
+               pfree(view_query_str);
+               Assert(IsA(viewQuery, Query));
+
+               /* Skip single-table viewQueries (handled by existing AQUMV). */
+               if (list_length(viewQuery->rtable) <= 1)
+                       continue;
+
+               /* Exact match comparison between raw parse and view query. */
+               if (!aqumv_query_is_exact_match(raw_parse, viewQuery))
+                       continue;
+
+               /*
+                * We have an exact match.  Rewrite viewQuery to:
+                *   SELECT mv.col1, mv.col2, ... FROM mv
+                */
+               mv_tupdesc = RelationGetDescr(matviewRel);
+
+               /* Build new target list referencing MV columns. */
+               {
+                       List       *new_tlist = NIL;
+                       ListCell   *lc;
+                       int                     attnum = 0;
+
+                       foreach(lc, viewQuery->targetList)
+                       {
+                               TargetEntry *old_tle = lfirst_node(TargetEntry, 
lc);
+                               TargetEntry *new_tle;
+                               Var                     *newVar;
+                               Form_pg_attribute attr;
+
+                               if (old_tle->resjunk)
+                                       continue;
+
+                               attnum++;
+                               attr = TupleDescAttr(mv_tupdesc, attnum - 1);
+
+                               newVar = makeVar(1,
+                                                               attr->attnum,
+                                                               attr->atttypid,
+                                                               attr->atttypmod,
+                                                               
attr->attcollation,
+                                                               0);
+                               newVar->location = -1;
+
+                               new_tle = makeTargetEntry((Expr *) newVar,
+                                                                               
  (AttrNumber) attnum,
+                                                                               
  old_tle->resname,
+                                                                               
  false);
+                               new_tlist = lappend(new_tlist, new_tle);
+                       }
+
+                       viewQuery->targetList = new_tlist;
+               }
+
+               /* Create new RTE for the MV. */
+               mvrte = makeNode(RangeTblEntry);
+               mvrte->rtekind = RTE_RELATION;
+               mvrte->relid = RelationGetRelid(matviewRel);
+               mvrte->relkind = RELKIND_MATVIEW;
+               mvrte->rellockmode = AccessShareLock;
+               mvrte->inh = false;
+               mvrte->inFromCl = true;
+
+               /* Build eref with column names from the MV's TupleDesc. */
+               {
+                       Alias  *eref = 
makeAlias(RelationGetRelationName(matviewRel), NIL);
+                       int             i;
+                       for (i = 0; i < mv_tupdesc->natts; i++)
+                       {
+                               Form_pg_attribute attr = 
TupleDescAttr(mv_tupdesc, i);
+                               if (!attr->attisdropped)
+                                       eref->colnames = lappend(eref->colnames,
+                                                                               
         makeString(pstrdup(NameStr(attr->attname))));
+                               else
+                                       eref->colnames = lappend(eref->colnames,
+                                                                               
         makeString(pstrdup("")));
+                       }
+                       mvrte->eref = eref;
+                       mvrte->alias = 
makeAlias(RelationGetRelationName(matviewRel), NIL);
+               }
+
+               viewQuery->rtable = list_make1(mvrte);
+               viewQuery->jointree = 
makeFromExpr(list_make1(makeNode(RangeTblRef)), NULL);
+               ((RangeTblRef *) 
linitial(viewQuery->jointree->fromlist))->rtindex = 1;
+
+               /* Clear aggregation/grouping/sorting state — all materialized. 
*/
+               viewQuery->hasAggs = false;
+               viewQuery->groupClause = NIL;
+               viewQuery->havingQual = NULL;
+               viewQuery->sortClause = NIL;
+               viewQuery->distinctClause = NIL;
+               viewQuery->hasDistinctOn = false;
+               viewQuery->hasWindowFuncs = false;
+               viewQuery->hasTargetSRFs = false;
+               viewQuery->limitCount = parse->limitCount;
+               viewQuery->limitOffset = parse->limitOffset;
+               viewQuery->limitOption = parse->limitOption;
+
+               /* Create subroot for planning the MV scan. */
+               subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
+               memcpy(subroot, root, sizeof(PlannerInfo));
+               subroot->parent_root = root;
+               subroot->eq_classes = NIL;
+               subroot->plan_params = NIL;
+               subroot->outer_params = NULL;
+               subroot->init_plans = NIL;
+               subroot->agginfos = NIL;
+               subroot->aggtransinfos = NIL;
+               subroot->parse = viewQuery;
+               subroot->tuple_fraction = root->tuple_fraction;
+               subroot->limit_tuples = root->limit_tuples;
+               subroot->append_rel_list = NIL;
+               subroot->hasHavingQual = false;
+               subroot->hasNonPartialAggs = false;
+               subroot->hasNonSerialAggs = false;
+               subroot->numOrderedAggs = 0;
+               subroot->hasNonCombine = false;
+               subroot->numPureOrderedAggs = 0;
+
+               subroot->processed_tlist = NIL;
+               preprocess_targetlist(subroot);
+
+               /* Compute final locus for the MV scan. */
+               {
+                       PathTarget *newtarget = 
make_pathtarget_from_tlist(subroot->processed_tlist);
+                       subroot->final_locus = 
cdbllize_get_final_locus(subroot, newtarget);
+               }
+
+               /*
+                * Plan the MV scan.
+                *
+                * We need a clean qp_extra with no groupClause or 
activeWindows,
+                * because the rewritten viewQuery is a simple SELECT from the 
MV
+                * with no GROUP BY, windowing, etc.  The standard_qp_callback 
uses
+                * qp_extra->groupClause to compute group_pathkeys, which would 
fail
+                * if it still contained the original query's GROUP BY 
expressions.
+                *
+                * standard_qp_extra is { List *activeWindows; List 
*groupClause; },
+                * so a zeroed struct of that size works correctly (both fields 
NIL).
+                */
+               {
+                       char    clean_qp_extra[2 * sizeof(List *)];
+                       memset(clean_qp_extra, 0, sizeof(clean_qp_extra));
+                       mv_final_rel = query_planner(subroot, qp_callback, 
clean_qp_extra);
+               }
+
+               /* Cost-based decision: use MV only if cheaper. */
+               if (mv_final_rel->cheapest_total_path->total_cost < 
current_rel->cheapest_total_path->total_cost)
+               {
+                       root->parse = viewQuery;
+                       root->processed_tlist = subroot->processed_tlist;
+                       root->agginfos = subroot->agginfos;
+                       root->aggtransinfos = subroot->aggtransinfos;
+                       root->simple_rte_array = subroot->simple_rte_array;
+                       root->simple_rel_array = subroot->simple_rel_array;
+                       root->simple_rel_array_size = 
subroot->simple_rel_array_size;
+                       root->hasNonPartialAggs = subroot->hasNonPartialAggs;
+                       root->hasNonSerialAggs = subroot->hasNonSerialAggs;
+                       root->numOrderedAggs = subroot->numOrderedAggs;
+                       root->hasNonCombine = subroot->hasNonCombine;
+                       root->numPureOrderedAggs = subroot->numPureOrderedAggs;
+                       root->hasHavingQual = subroot->hasHavingQual;
+                       root->group_pathkeys = subroot->group_pathkeys;
+                       root->sort_pathkeys = subroot->sort_pathkeys;
+                       root->query_pathkeys = subroot->query_pathkeys;
+                       root->distinct_pathkeys = subroot->distinct_pathkeys;
+                       root->eq_classes = subroot->eq_classes;
+                       root->append_rel_list = subroot->append_rel_list;
+                       current_rel = mv_final_rel;
+                       table_close(matviewRel, NoLock);
+                       need_close = false;
+                       break;
+               }
+               else
+               {
+                       /* MV is not cheaper, reset and try next. */
+                       mv_final_rel = current_rel;
+               }
+       }
+
+       if (need_close)
+               table_close(matviewRel, AccessShareLock);
+       systable_endscan(mvscan);
+       table_close(mvauxDesc, AccessShareLock);
+
+       return current_rel;
+}
diff --git a/src/backend/optimizer/plan/planner.c 
b/src/backend/optimizer/plan/planner.c
index da5acb23ebf..89e3611c3a5 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -952,6 +952,17 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
        root->partColsUpdated = false;
        root->is_correlated_subplan = false;
 
+       /*
+        * Save a copy of the raw parse tree for AQUMV join exact-match.
+        * This must be done before any preprocessing modifies the parse tree.
+        */
+       if (Gp_role == GP_ROLE_DISPATCH &&
+               enable_answer_query_using_materialized_views &&
+               parent_root == NULL)
+               root->aqumv_raw_parse = copyObject(parse);
+       else
+               root->aqumv_raw_parse = NULL;
+
        /*
         * If there is a WITH list, process each WITH query and either convert 
it
         * to RTE_SUBQUERY RTE(s) or build an initplan SubPlan structure for it.
@@ -1935,6 +1946,13 @@ grouping_planner(PlannerInfo *root, double 
tuple_fraction)
 
                        /* Do the real work. */
                        current_rel = 
answer_query_using_materialized_views(root, aqumv_context);
+
+                       /* Try join AQUMV if single-table didn't rewrite. */
+                       if (current_rel == aqumv_context->current_rel)
+                       {
+                               current_rel = 
answer_query_using_materialized_views_for_join(root, aqumv_context);
+                       }
+
                        /* parse tree may be rewriten. */
                        parse = root->parse;
                }
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index b02a2ccf0c1..3cd21590ac9 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -505,6 +505,8 @@ struct PlannerInfo
        int                     numPureOrderedAggs; /* CDB: number that use 
ORDER BY/WITHIN GROUP, not counting DISTINCT */
        bool            hasNonCombine;  /* CDB: any agg func w/o a combine 
func? */
        bool            is_from_orca; /* true if this PlannerInfo was created 
from Orca*/
+
+       Query      *aqumv_raw_parse;    /* Raw parse tree for AQUMV join 
exact-match */
 };
 
 /*
diff --git a/src/include/optimizer/aqumv.h b/src/include/optimizer/aqumv.h
index 6e51d4dbc92..2bb4122cf11 100644
--- a/src/include/optimizer/aqumv.h
+++ b/src/include/optimizer/aqumv.h
@@ -44,5 +44,6 @@ typedef struct AqumvContextData {
 typedef AqumvContextData *AqumvContext;
 
 extern RelOptInfo* answer_query_using_materialized_views(PlannerInfo *root, 
AqumvContextData *aqumv_context);
+extern RelOptInfo* answer_query_using_materialized_views_for_join(PlannerInfo 
*root, AqumvContextData *aqumv_context);
 
 #endif   /* AQUMV_H */
diff --git a/src/test/regress/expected/matview_data.out 
b/src/test/regress/expected/matview_data.out
index 85697e90072..e415ceaf363 100644
--- a/src/test/regress/expected/matview_data.out
+++ b/src/test/regress/expected/matview_data.out
@@ -791,6 +791,1274 @@ drop materialized view mv_join2;
 drop table jt3;
 drop table jt2;
 drop table jt1;
+--
+-- Test AQUMV (Answer Query Using Materialized Views) with join queries.
+-- Each matching test shows EXPLAIN + SELECT with GUC off (original plan),
+-- then EXPLAIN + SELECT with GUC on (MV rewrite). Results must match.
+--
+create table aqj_t1(a int, b int) distributed by (a);
+create table aqj_t2(a int, b int) distributed by (a);
+create table aqj_t3(a int, b int) distributed by (a);
+insert into aqj_t1 select i, i*10 from generate_series(1, 100) i;
+insert into aqj_t2 select i, i*100 from generate_series(1, 100) i;
+insert into aqj_t3 select i, i*1000 from generate_series(1, 100) i;
+analyze aqj_t1;
+analyze aqj_t2;
+analyze aqj_t3;
+-- 1. Two-table INNER JOIN exact match
+create materialized view mv_aqj_join2 as
+  select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'a' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_join2;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (aqj_t1.a = aqj_t2.a)
+         ->  Seq Scan on aqj_t1
+         ->  Hash
+               ->  Seq Scan on aqj_t2
+ Optimizer: Postgres query optimizer
+(7 rows)
+
+select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order 
by 1 limit 5;
+ a |  b  
+---+-----
+ 1 | 100
+ 2 | 200
+ 3 | 300
+ 4 | 400
+ 5 | 500
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_join2
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order 
by 1 limit 5;
+ a |  b  
+---+-----
+ 1 | 100
+ 2 | 200
+ 3 | 300
+ 4 | 400
+ 5 | 500
+(5 rows)
+
+-- 2. Join with WHERE clause
+create materialized view mv_aqj_where as
+  select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a 
where aqj_t1.a > 5;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'a' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_where;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a where aqj_t1.a > 5;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (aqj_t1.a = aqj_t2.a)
+         ->  Seq Scan on aqj_t1
+               Filter: (a > 5)
+         ->  Hash
+               ->  Seq Scan on aqj_t2
+                     Filter: (a > 5)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where 
aqj_t1.a > 5 order by 1 limit 5;
+ a  |  b   
+----+------
+  6 |  600
+  7 |  700
+  8 |  800
+  9 |  900
+ 10 | 1000
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a where aqj_t1.a > 5;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_where
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where 
aqj_t1.a > 5 order by 1 limit 5;
+ a  |  b   
+----+------
+  6 |  600
+  7 |  700
+  8 |  800
+  9 |  900
+ 10 | 1000
+(5 rows)
+
+-- 3. Join with GROUP BY + aggregate
+create materialized view mv_aqj_agg as
+  select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = 
aqj_t2.a group by aqj_t1.a;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'a' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_agg;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a group by aqj_t1.a;
+                   QUERY PLAN                   
+------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  HashAggregate
+         Group Key: aqj_t1.a
+         ->  Hash Join
+               Hash Cond: (aqj_t1.a = aqj_t2.a)
+               ->  Seq Scan on aqj_t1
+               ->  Hash
+                     ->  Seq Scan on aqj_t2
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = 
aqj_t2.a group by aqj_t1.a order by 1 limit 5;
+ a | cnt 
+---+-----
+ 1 |   1
+ 2 |   1
+ 3 |   1
+ 4 |   1
+ 5 |   1
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a group by aqj_t1.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_agg
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = 
aqj_t2.a group by aqj_t1.a order by 1 limit 5;
+ a | cnt 
+---+-----
+ 1 |   1
+ 2 |   1
+ 3 |   1
+ 4 |   1
+ 5 |   1
+(5 rows)
+
+-- 4. Non-match: different WHERE clause (should show Hash Join, not MV)
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a where aqj_t1.a > 10;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (aqj_t1.a = aqj_t2.a)
+         ->  Seq Scan on aqj_t1
+               Filter: (a > 10)
+         ->  Hash
+               ->  Seq Scan on aqj_t2
+                     Filter: (a > 10)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+-- 5. Non-match: different target list
+explain(costs off) select aqj_t1.b, aqj_t2.a from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (aqj_t1.a = aqj_t2.a)
+         ->  Seq Scan on aqj_t1
+         ->  Hash
+               ->  Seq Scan on aqj_t2
+ Optimizer: Postgres query optimizer
+(7 rows)
+
+-- 6. Non-match: different join type (INNER vs LEFT)
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 left join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Left Join
+         Hash Cond: (aqj_t1.a = aqj_t2.a)
+         ->  Seq Scan on aqj_t1
+         ->  Hash
+               ->  Seq Scan on aqj_t2
+ Optimizer: Postgres query optimizer
+(7 rows)
+
+-- 7. Three-table join
+create materialized view mv_aqj_join3 as
+  select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'a' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_join3;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a;
+                   QUERY PLAN                   
+------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (aqj_t1.a = aqj_t3.a)
+         ->  Hash Join
+               Hash Cond: (aqj_t1.a = aqj_t2.a)
+               ->  Seq Scan on aqj_t1
+               ->  Hash
+                     ->  Seq Scan on aqj_t2
+         ->  Hash
+               ->  Seq Scan on aqj_t3
+ Optimizer: Postgres query optimizer
+(11 rows)
+
+select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a
+  order by 1 limit 5;
+ a |  b  |  c   
+---+-----+------
+ 1 | 100 | 1000
+ 2 | 200 | 2000
+ 3 | 300 | 3000
+ 4 | 400 | 4000
+ 5 | 500 | 5000
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_join3
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a
+  order by 1 limit 5;
+ a |  b  |  c   
+---+-----+------
+ 1 | 100 | 1000
+ 2 | 200 | 2000
+ 3 | 300 | 3000
+ 4 | 400 | 4000
+ 5 | 500 | 5000
+(5 rows)
+
+-- 8. Implicit join (FROM t1, t2 WHERE ...)
+create materialized view mv_aqj_implicit as
+  select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'a' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_implicit;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (aqj_t1.a = aqj_t2.a)
+         ->  Seq Scan on aqj_t1
+         ->  Hash
+               ->  Seq Scan on aqj_t2
+ Optimizer: Postgres query optimizer
+(7 rows)
+
+select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order 
by 1 limit 5;
+ a |  b  
+---+-----
+ 1 | 100
+ 2 | 200
+ 3 | 300
+ 4 | 400
+ 5 | 500
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_implicit
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order 
by 1 limit 5;
+ a |  b  
+---+-----
+ 1 | 100
+ 2 | 200
+ 3 | 300
+ 4 | 400
+ 5 | 500
+(5 rows)
+
+-- 9. MV not up-to-date: after INSERT on base table
+insert into aqj_t1 values(999, 9990);
+set enable_answer_query_using_materialized_views = on;
+-- Should NOT use mv_aqj_join2 (status is 'i')
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (aqj_t1.a = aqj_t2.a)
+         ->  Seq Scan on aqj_t1
+         ->  Hash
+               ->  Seq Scan on aqj_t2
+ Optimizer: Postgres query optimizer
+(7 rows)
+
+-- 10. After REFRESH: should use MV again
+refresh materialized view mv_aqj_join2;
+analyze mv_aqj_join2;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_join2
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+-- 11. GUC off: should NOT use MV
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (aqj_t1.a = aqj_t2.a)
+         ->  Seq Scan on aqj_t1
+         ->  Hash
+               ->  Seq Scan on aqj_t2
+ Optimizer: Postgres query optimizer
+(7 rows)
+
+--
+-- More complex join AQUMV test cases with richer schemas
+--
+create table aqj_orders(
+  order_id int,
+  customer_id int,
+  amount numeric(10,2),
+  status text,
+  order_date date
+) distributed by (order_id);
+create table aqj_customers(
+  customer_id int,
+  name text,
+  region text,
+  credit_limit numeric(10,2)
+) distributed by (customer_id);
+create table aqj_products(
+  product_id int,
+  name text,
+  category text,
+  price numeric(10,2)
+) distributed by (product_id);
+create table aqj_order_items(
+  item_id int,
+  order_id int,
+  product_id int,
+  quantity int
+) distributed by (item_id);
+insert into aqj_customers select i, 'cust_' || i, case when i % 3 = 0 then 
'east' when i % 3 = 1 then 'west' else 'north' end, (i * 100)::numeric(10,2) 
from generate_series(1, 50) i;
+insert into aqj_orders select i, (i % 50) + 1, (i * 10.5)::numeric(10,2), case 
when i % 4 = 0 then 'shipped' when i % 4 = 1 then 'pending' when i % 4 = 2 then 
'delivered' else 'cancelled' end, '2024-01-01'::date + (i % 365) from 
generate_series(1, 200) i;
+insert into aqj_products select i, 'prod_' || i, case when i % 5 = 0 then 
'electronics' when i % 5 = 1 then 'books' when i % 5 = 2 then 'clothing' when i 
% 5 = 3 then 'food' else 'toys' end, (i * 5.99)::numeric(10,2) from 
generate_series(1, 30) i;
+insert into aqj_order_items select i, (i % 200) + 1, (i % 30) + 1, (i % 10) + 
1 from generate_series(1, 500) i;
+analyze aqj_customers;
+analyze aqj_orders;
+analyze aqj_products;
+analyze aqj_order_items;
+-- 12. Join with multiple columns + WHERE on text column
+create materialized view mv_aqj_orders_cust as
+  select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'order_id' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_orders_cust;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (o.customer_id = c.customer_id)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: o.customer_id
+               ->  Seq Scan on aqj_orders o
+                     Filter: (status = 'shipped'::text)
+         ->  Hash
+               ->  Seq Scan on aqj_customers c
+ Optimizer: Postgres query optimizer
+(10 rows)
+
+select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped'
+  order by o.order_id limit 5;
+ order_id | amount |  name   | region 
+----------+--------+---------+--------
+        4 |  42.00 | cust_5  | north
+        8 |  84.00 | cust_9  | east
+       12 | 126.00 | cust_13 | west
+       16 | 168.00 | cust_17 | north
+       20 | 210.00 | cust_21 | east
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_orders_cust
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped'
+  order by o.order_id limit 5;
+ order_id | amount |  name   | region 
+----------+--------+---------+--------
+        4 |  42.00 | cust_5  | north
+        8 |  84.00 | cust_9  | east
+       12 | 126.00 | cust_13 | west
+       16 | 168.00 | cust_17 | north
+       20 | 210.00 | cust_21 | east
+(5 rows)
+
+-- 13. Four-table join
+create materialized view mv_aqj_order_details as
+  select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'order_id' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_order_details;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id;
+                                  QUERY PLAN                                  
+------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (oi.product_id = p.product_id)
+         ->  Hash Join
+               Hash Cond: (o.order_id = oi.order_id)
+               ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                     ->  Hash Join
+                           Hash Cond: (o.customer_id = c.customer_id)
+                           ->  Redistribute Motion 3:3  (slice3; segments: 3)
+                                 Hash Key: o.customer_id
+                                 ->  Seq Scan on aqj_orders o
+                           ->  Hash
+                                 ->  Seq Scan on aqj_customers c
+               ->  Hash
+                     ->  Seq Scan on aqj_order_items oi
+         ->  Hash
+               ->  Broadcast Motion 3:3  (slice4; segments: 3)
+                     ->  Seq Scan on aqj_products p
+ Optimizer: Postgres query optimizer
+(19 rows)
+
+select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  order by o.order_id, p.name limit 5;
+ order_id | customer_name | product_name | quantity | price  
+----------+---------------+--------------+----------+--------
+        1 | cust_2        | prod_11      |        1 |  65.89
+        1 | cust_2        | prod_21      |        1 | 125.79
+        2 | cust_3        | prod_12      |        2 |  71.88
+        2 | cust_3        | prod_2       |        2 |  11.98
+        2 | cust_3        | prod_22      |        2 | 131.78
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_order_details
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  order by o.order_id, p.name limit 5;
+ order_id | customer_name | product_name | quantity | price  
+----------+---------------+--------------+----------+--------
+        1 | cust_2        | prod_11      |        1 |  65.89
+        1 | cust_2        | prod_21      |        1 | 125.79
+        2 | cust_3        | prod_12      |        2 |  71.88
+        2 | cust_3        | prod_2       |        2 |  11.98
+        2 | cust_3        | prod_22      |        2 | 131.78
+(5 rows)
+
+-- 14. GROUP BY on join with multiple aggregates: sum, count, avg
+create materialized view mv_aqj_cust_summary as
+  select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'region' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_cust_summary;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region;
+                                   QUERY PLAN                                  
  
+---------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Finalize HashAggregate
+         Group Key: c.region
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: c.region
+               ->  Streaming Partial HashAggregate
+                     Group Key: c.region
+                     ->  Hash Join
+                           Hash Cond: (o.customer_id = c.customer_id)
+                           ->  Seq Scan on aqj_orders o
+                           ->  Hash
+                                 ->  Broadcast Motion 3:3  (slice3; segments: 
3)
+                                       ->  Seq Scan on aqj_customers c
+ Optimizer: Postgres query optimizer
+(14 rows)
+
+select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region
+  order by c.region;
+ region | order_count | total_amount |      avg_amount       
+--------+-------------+--------------+-----------------------
+ east   |          64 |     66864.00 | 1044.7500000000000000
+ north  |          68 |     71400.00 | 1050.0000000000000000
+ west   |          68 |     72786.00 | 1070.3823529411764706
+(3 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_cust_summary
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region
+  order by c.region;
+ region | order_count | total_amount |      avg_amount       
+--------+-------------+--------------+-----------------------
+ east   |          64 |     66864.00 | 1044.7500000000000000
+ north  |          68 |     71400.00 | 1050.0000000000000000
+ west   |          68 |     72786.00 | 1070.3823529411764706
+(3 rows)
+
+-- 15. Join with expression in target list (arithmetic + function)
+create materialized view mv_aqj_expr as
+  select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, 
upper(c.region) as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'order_id' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_expr;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, 
c.name, upper(c.region) as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id;
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (o.customer_id = c.customer_id)
+         ->  Seq Scan on aqj_orders o
+         ->  Hash
+               ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                     ->  Seq Scan on aqj_customers c
+ Optimizer: Postgres query optimizer
+(8 rows)
+
+select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) 
as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  order by o.order_id limit 5;
+ order_id | amount_with_tax |  name  | region_upper 
+----------+-----------------+--------+--------------
+        1 |          11.550 | cust_2 | NORTH
+        2 |          23.100 | cust_3 | EAST
+        3 |          34.650 | cust_4 | WEST
+        4 |          46.200 | cust_5 | NORTH
+        5 |          57.750 | cust_6 | EAST
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, 
c.name, upper(c.region) as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_expr
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) 
as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  order by o.order_id limit 5;
+ order_id | amount_with_tax |  name  | region_upper 
+----------+-----------------+--------+--------------
+        1 |          11.550 | cust_2 | NORTH
+        2 |          23.100 | cust_3 | EAST
+        3 |          34.650 | cust_4 | WEST
+        4 |          46.200 | cust_5 | NORTH
+        5 |          57.750 | cust_6 | EAST
+(5 rows)
+
+-- 16. Non-match: same tables + expressions, but extra WHERE (should NOT match 
mv_aqj_expr)
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, 
c.name, upper(c.region) as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where c.region = 'east';
+                          QUERY PLAN                           
+---------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (o.customer_id = c.customer_id)
+         ->  Seq Scan on aqj_orders o
+         ->  Hash
+               ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                     ->  Seq Scan on aqj_customers c
+                           Filter: (region = 'east'::text)
+ Optimizer: Postgres query optimizer
+(9 rows)
+
+-- 17. Non-match: same tables but different aggregate target list
+explain(costs off)
+  select c.region, sum(o.amount) as total_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region;
+                                   QUERY PLAN                                  
  
+---------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Finalize HashAggregate
+         Group Key: c.region
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: c.region
+               ->  Streaming Partial HashAggregate
+                     Group Key: c.region
+                     ->  Hash Join
+                           Hash Cond: (o.customer_id = c.customer_id)
+                           ->  Seq Scan on aqj_orders o
+                           ->  Hash
+                                 ->  Broadcast Motion 3:3  (slice3; segments: 
3)
+                                       ->  Seq Scan on aqj_customers c
+ Optimizer: Postgres query optimizer
+(14 rows)
+
+-- 18. Non-match: different join order (o JOIN c vs c JOIN o)
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_customers c join aqj_orders o on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (o.customer_id = c.customer_id)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: o.customer_id
+               ->  Seq Scan on aqj_orders o
+                     Filter: (status = 'shipped'::text)
+         ->  Hash
+               ->  Seq Scan on aqj_customers c
+ Optimizer: Postgres query optimizer
+(10 rows)
+
+-- 19. Join with compound WHERE (multiple AND conditions)
+create materialized view mv_aqj_compound_where as
+  select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'order_id' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_compound_where;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50;
+                                   QUERY PLAN                                  
  
+---------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (o.customer_id = c.customer_id)
+         ->  Seq Scan on aqj_orders o
+               Filter: ((amount > '50'::numeric) AND (status = 
'pending'::text))
+         ->  Hash
+               ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                     ->  Seq Scan on aqj_customers c
+                           Filter: (region = 'west'::text)
+ Optimizer: Postgres query optimizer
+(10 rows)
+
+select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50
+  order by o.order_id limit 5;
+ order_id | amount |  name   
+----------+--------+---------
+        9 |  94.50 | cust_10
+       21 | 220.50 | cust_22
+       33 | 346.50 | cust_34
+       45 | 472.50 | cust_46
+       53 | 556.50 | cust_4
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_compound_where
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50
+  order by o.order_id limit 5;
+ order_id | amount |  name   
+----------+--------+---------
+        9 |  94.50 | cust_10
+       21 | 220.50 | cust_22
+       33 | 346.50 | cust_34
+       45 | 472.50 | cust_46
+       53 | 556.50 | cust_4
+(5 rows)
+
+-- 20. Self-join
+create materialized view mv_aqj_selfjoin as
+  select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount 
as amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'id1' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_selfjoin;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount 
as amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id;
+                            QUERY PLAN                            
+------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (o1.customer_id = o2.customer_id)
+         Join Filter: (o1.order_id < o2.order_id)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: o1.customer_id
+               ->  Seq Scan on aqj_orders o1
+         ->  Hash
+               ->  Redistribute Motion 3:3  (slice3; segments: 3)
+                     Hash Key: o2.customer_id
+                     ->  Seq Scan on aqj_orders o2
+ Optimizer: Postgres query optimizer
+(12 rows)
+
+select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as 
amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id
+  order by o1.order_id, o2.order_id limit 5;
+ id1 | id2 | amt1  |  amt2   
+-----+-----+-------+---------
+   1 |  51 | 10.50 |  535.50
+   1 | 101 | 10.50 | 1060.50
+   1 | 151 | 10.50 | 1585.50
+   2 |  52 | 21.00 |  546.00
+   2 | 102 | 21.00 | 1071.00
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount 
as amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_selfjoin
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as 
amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id
+  order by o1.order_id, o2.order_id limit 5;
+ id1 | id2 | amt1  |  amt2   
+-----+-----+-------+---------
+   1 |  51 | 10.50 |  535.50
+   1 | 101 | 10.50 | 1060.50
+   1 | 151 | 10.50 | 1585.50
+   2 |  52 | 21.00 |  546.00
+   2 | 102 | 21.00 | 1071.00
+(5 rows)
+
+-- 21. GROUP BY with multi-column key on join
+create materialized view mv_aqj_grp_multi as
+  select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'region, status' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_grp_multi;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status;
+                                   QUERY PLAN                                  
  
+---------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Finalize HashAggregate
+         Group Key: c.region, o.status
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: c.region, o.status
+               ->  Streaming Partial HashAggregate
+                     Group Key: c.region, o.status
+                     ->  Hash Join
+                           Hash Cond: (o.customer_id = c.customer_id)
+                           ->  Seq Scan on aqj_orders o
+                           ->  Hash
+                                 ->  Broadcast Motion 3:3  (slice3; segments: 
3)
+                                       ->  Seq Scan on aqj_customers c
+ Optimizer: Postgres query optimizer
+(14 rows)
+
+select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status
+  order by c.region, o.status limit 6;
+ region |  status   | cnt |  total   
+--------+-----------+-----+----------
+ east   | cancelled |  16 | 16968.00
+ east   | delivered |  16 | 16464.00
+ east   | pending   |  16 | 16968.00
+ east   | shipped   |  16 | 16464.00
+ north  | cancelled |  18 | 19425.00
+ north  | delivered |  16 | 16800.00
+(6 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_grp_multi
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status
+  order by c.region, o.status limit 6;
+ region |  status   | cnt |  total   
+--------+-----------+-----+----------
+ east   | cancelled |  16 | 16968.00
+ east   | delivered |  16 | 16464.00
+ east   | pending   |  16 | 16968.00
+ east   | shipped   |  16 | 16464.00
+ north  | cancelled |  18 | 19425.00
+ north  | delivered |  16 | 16800.00
+(6 rows)
+
+-- 22. Four-table join with WHERE and aggregate
+create materialized view mv_aqj_3way_agg as
+  select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category;
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'region, category' as the Apache Cloudberry data distribution key for this 
table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_3way_agg;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category;
+                                              QUERY PLAN                       
                       
+------------------------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Finalize HashAggregate
+         Group Key: c.region, p.category
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: c.region, p.category
+               ->  Streaming Partial HashAggregate
+                     Group Key: c.region, p.category
+                     ->  Hash Join
+                           Hash Cond: (oi.product_id = p.product_id)
+                           ->  Hash Join
+                                 Hash Cond: (oi.order_id = o.order_id)
+                                 ->  Seq Scan on aqj_order_items oi
+                                 ->  Hash
+                                       ->  Broadcast Motion 3:3  (slice3; 
segments: 3)
+                                             ->  Hash Join
+                                                   Hash Cond: (o.customer_id = 
c.customer_id)
+                                                   ->  Redistribute Motion 3:3 
 (slice4; segments: 3)
+                                                         Hash Key: 
o.customer_id
+                                                         ->  Seq Scan on 
aqj_orders o
+                                                               Filter: (status 
= 'delivered'::text)
+                                                   ->  Hash
+                                                         ->  Seq Scan on 
aqj_customers c
+                           ->  Hash
+                                 ->  Broadcast Motion 3:3  (slice5; segments: 
3)
+                                       ->  Seq Scan on aqj_products p
+ Optimizer: Postgres query optimizer
+(26 rows)
+
+select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category
+  order by c.region, p.category limit 6;
+ region |  category   | total_qty | line_count 
+--------+-------------+-----------+------------
+ east   | books       |        30 |          5
+ east   | clothing    |        20 |         10
+ east   | electronics |        50 |          5
+ east   | food        |        80 |         10
+ east   | toys        |        40 |         10
+ north  | books       |        60 |         10
+(6 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category;
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_3way_agg
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category
+  order by c.region, p.category limit 6;
+ region |  category   | total_qty | line_count 
+--------+-------------+-----------+------------
+ east   | books       |        30 |          5
+ east   | clothing    |        20 |         10
+ east   | electronics |        50 |          5
+ east   | food        |        80 |         10
+ east   | toys        |        40 |         10
+ north  | books       |        60 |         10
+(6 rows)
+
+-- 23. Implicit four-table join (comma style)
+create materialized view mv_aqj_implicit3 as
+  select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending';
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 
'order_id' as the Apache Cloudberry data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make 
sure column(s) chosen are the optimal data distribution key to minimize skew.
+analyze mv_aqj_implicit3;
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending';
+                                     QUERY PLAN                                
     
+------------------------------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (oi.product_id = p.product_id)
+         ->  Hash Join
+               Hash Cond: (oi.order_id = o.order_id)
+               ->  Seq Scan on aqj_order_items oi
+               ->  Hash
+                     ->  Broadcast Motion 3:3  (slice2; segments: 3)
+                           ->  Hash Join
+                                 Hash Cond: (o.customer_id = c.customer_id)
+                                 ->  Redistribute Motion 3:3  (slice3; 
segments: 3)
+                                       Hash Key: o.customer_id
+                                       ->  Seq Scan on aqj_orders o
+                                             Filter: (status = 'pending'::text)
+                                 ->  Hash
+                                       ->  Seq Scan on aqj_customers c
+         ->  Hash
+               ->  Broadcast Motion 3:3  (slice4; segments: 3)
+                     ->  Seq Scan on aqj_products p
+ Optimizer: Postgres query optimizer
+(20 rows)
+
+select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending'
+  order by o.order_id, p.name limit 5;
+ order_id |  name  | product_name 
+----------+--------+--------------
+        1 | cust_2 | prod_11
+        1 | cust_2 | prod_21
+        5 | cust_6 | prod_15
+        5 | cust_6 | prod_25
+        5 | cust_6 | prod_5
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending';
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_implicit3
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending'
+  order by o.order_id, p.name limit 5;
+ order_id |  name  | product_name 
+----------+--------+--------------
+        1 | cust_2 | prod_11
+        1 | cust_2 | prod_21
+        5 | cust_6 | prod_15
+        5 | cust_6 | prod_25
+        5 | cust_6 | prod_5
+(5 rows)
+
+-- 24. Result correctness across DML + REFRESH cycle
+insert into aqj_orders values(201, 1, 9999.99, 'shipped', '2025-12-31');
+set enable_answer_query_using_materialized_views = on;
+-- Stale: should NOT use MV
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Hash Join
+         Hash Cond: (o.customer_id = c.customer_id)
+         ->  Redistribute Motion 3:3  (slice2; segments: 3)
+               Hash Key: o.customer_id
+               ->  Seq Scan on aqj_orders o
+                     Filter: (status = 'shipped'::text)
+         ->  Hash
+               ->  Seq Scan on aqj_customers c
+ Optimizer: Postgres query optimizer
+(10 rows)
+
+-- Refresh and verify MV is used again
+refresh materialized view mv_aqj_orders_cust;
+analyze mv_aqj_orders_cust;
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+                QUERY PLAN                
+------------------------------------------
+ Gather Motion 3:1  (slice1; segments: 3)
+   ->  Seq Scan on mv_aqj_orders_cust
+ Optimizer: Postgres query optimizer
+(3 rows)
+
+-- The new row should appear in results via MV scan
+select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped' and o.order_id = 201;
+ order_id | amount  |  name  | region 
+----------+---------+--------+--------
+      201 | 9999.99 | cust_1 | west
+(1 row)
+
+-- 25. Post-DML comprehensive: refresh all, then verify GUC off vs on results
+refresh materialized view mv_aqj_order_details;
+refresh materialized view mv_aqj_expr;
+refresh materialized view mv_aqj_selfjoin;
+refresh materialized view mv_aqj_grp_multi;
+refresh materialized view mv_aqj_3way_agg;
+refresh materialized view mv_aqj_implicit3;
+analyze mv_aqj_order_details;
+analyze mv_aqj_expr;
+analyze mv_aqj_selfjoin;
+analyze mv_aqj_grp_multi;
+analyze mv_aqj_3way_agg;
+analyze mv_aqj_implicit3;
+-- Verify four-table join results after DML+refresh
+set enable_answer_query_using_materialized_views = off;
+select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  order by o.order_id, p.name limit 5;
+ order_id | customer_name | product_name | quantity | price  
+----------+---------------+--------------+----------+--------
+        1 | cust_2        | prod_11      |        1 |  65.89
+        1 | cust_2        | prod_21      |        1 | 125.79
+        2 | cust_3        | prod_12      |        2 |  71.88
+        2 | cust_3        | prod_2       |        2 |  11.98
+        2 | cust_3        | prod_22      |        2 | 131.78
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  order by o.order_id, p.name limit 5;
+ order_id | customer_name | product_name | quantity | price  
+----------+---------------+--------------+----------+--------
+        1 | cust_2        | prod_11      |        1 |  65.89
+        1 | cust_2        | prod_21      |        1 | 125.79
+        2 | cust_3        | prod_12      |        2 |  71.88
+        2 | cust_3        | prod_2       |        2 |  11.98
+        2 | cust_3        | prod_22      |        2 | 131.78
+(5 rows)
+
+-- Verify expression MV results after DML+refresh
+set enable_answer_query_using_materialized_views = off;
+select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) 
as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  order by o.order_id limit 5;
+ order_id | amount_with_tax |  name  | region_upper 
+----------+-----------------+--------+--------------
+        1 |          11.550 | cust_2 | NORTH
+        2 |          23.100 | cust_3 | EAST
+        3 |          34.650 | cust_4 | WEST
+        4 |          46.200 | cust_5 | NORTH
+        5 |          57.750 | cust_6 | EAST
+(5 rows)
+
+set enable_answer_query_using_materialized_views = on;
+select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) 
as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  order by o.order_id limit 5;
+ order_id | amount_with_tax |  name  | region_upper 
+----------+-----------------+--------+--------------
+        1 |          11.550 | cust_2 | NORTH
+        2 |          23.100 | cust_3 | EAST
+        3 |          34.650 | cust_4 | WEST
+        4 |          46.200 | cust_5 | NORTH
+        5 |          57.750 | cust_6 | EAST
+(5 rows)
+
+-- Verify multi-key GROUP BY results after DML+refresh
+set enable_answer_query_using_materialized_views = off;
+select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status
+  order by c.region, o.status limit 6;
+ region |  status   | cnt |  total   
+--------+-----------+-----+----------
+ east   | cancelled |  16 | 16968.00
+ east   | delivered |  16 | 16464.00
+ east   | pending   |  16 | 16968.00
+ east   | shipped   |  16 | 16464.00
+ north  | cancelled |  18 | 19425.00
+ north  | delivered |  16 | 16800.00
+(6 rows)
+
+set enable_answer_query_using_materialized_views = on;
+select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status
+  order by c.region, o.status limit 6;
+ region |  status   | cnt |  total   
+--------+-----------+-----+----------
+ east   | cancelled |  16 | 16968.00
+ east   | delivered |  16 | 16464.00
+ east   | pending   |  16 | 16968.00
+ east   | shipped   |  16 | 16464.00
+ north  | cancelled |  18 | 19425.00
+ north  | delivered |  16 | 16800.00
+(6 rows)
+
+-- Clean up AQUMV join test objects
+drop materialized view mv_aqj_implicit3;
+drop materialized view mv_aqj_3way_agg;
+drop materialized view mv_aqj_grp_multi;
+drop materialized view mv_aqj_selfjoin;
+drop materialized view mv_aqj_compound_where;
+drop materialized view mv_aqj_expr;
+drop materialized view mv_aqj_cust_summary;
+drop materialized view mv_aqj_order_details;
+drop materialized view mv_aqj_orders_cust;
+drop materialized view mv_aqj_implicit;
+drop materialized view mv_aqj_join3;
+drop materialized view mv_aqj_agg;
+drop materialized view mv_aqj_where;
+drop materialized view mv_aqj_join2;
+drop table aqj_order_items;
+drop table aqj_products;
+drop table aqj_customers;
+drop table aqj_orders;
+drop table aqj_t3;
+drop table aqj_t2;
+drop table aqj_t1;
 -- test drop table
 select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 
'mv2', 'mv3');
  mvname | datastatus 
@@ -925,12 +2193,12 @@ HINT:  The 'DISTRIBUTED BY' clause determines the 
distribution of data. Make sur
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par    | u
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
 (6 rows)
 
 insert into par_1_prt_1 values (1, 1, 1);
@@ -938,9 +2206,9 @@ insert into par_1_prt_1 values (1, 1, 1);
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
+ mv_par1_2 | u
  mv_par2   | u
  mv_par2_1 | u
- mv_par1_2 | u
  mv_par1_1 | i
  mv_par1   | i
  mv_par    | i
@@ -951,12 +2219,12 @@ insert into par values (1, 2, 2);
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | i
- mv_par2_1 | u
  mv_par1_2 | u
+ mv_par2_1 | u
  mv_par1_1 | i
  mv_par1   | i
  mv_par    | i
+ mv_par2   | i
 (6 rows)
 
 refresh materialized view mv_par;
@@ -971,11 +2239,11 @@ insert into par_1_prt_2_2_prt_1 values (1, 2, 1);
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | i
- mv_par2_1 | i
  mv_par1_2 | u
  mv_par1   | u
  mv_par1_1 | u
+ mv_par2_1 | i
+ mv_par2   | i
  mv_par    | i
 (6 rows)
 
@@ -986,11 +2254,11 @@ truncate par_1_prt_2;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | e
- mv_par2_1 | e
  mv_par1_2 | u
  mv_par1   | u
  mv_par1_1 | u
+ mv_par2_1 | e
+ mv_par2   | e
  mv_par    | e
 (6 rows)
 
@@ -1000,11 +2268,11 @@ truncate par_1_prt_2;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | e
- mv_par2_1 | e
  mv_par1_2 | u
  mv_par1   | u
  mv_par1_1 | u
+ mv_par2_1 | e
+ mv_par2   | e
  mv_par    | e
 (6 rows)
 
@@ -1018,9 +2286,9 @@ vacuum full par_1_prt_1_2_prt_1;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
+ mv_par1_2 | u
  mv_par2   | u
  mv_par2_1 | u
- mv_par1_2 | u
  mv_par1_1 | r
  mv_par1   | r
  mv_par    | r
@@ -1038,8 +2306,8 @@ select mvname, datastatus from gp_matview_aux where 
mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
  mv_par2   | r
- mv_par2_1 | r
  mv_par    | r
+ mv_par2_1 | r
  mv_par1_2 | r
  mv_par1   | r
  mv_par1_1 | r
@@ -1058,10 +2326,10 @@ NOTICE:  table has parent, setting distribution columns 
to match parent table
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
  mv_par1   | e
  mv_par    | e
 (6 rows)
@@ -1089,10 +2357,10 @@ alter table par_1_prt_1 detach partition 
par_1_prt_1_2_prt_1;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
  mv_par1   | e
  mv_par    | e
 (6 rows)
@@ -1107,10 +2375,10 @@ alter table par_1_prt_1 attach partition new_par for 
values from (4) to (5);
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
  mv_par1   | e
  mv_par    | e
 (6 rows)
@@ -1130,12 +2398,12 @@ begin;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par    | u
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
 (6 rows)
 
 insert into par values(1, 1, 1), (1, 1, 2);
@@ -1155,23 +2423,23 @@ begin;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par    | u
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
 (6 rows)
 
 insert into par_1_prt_2_2_prt_1 values(2, 2, 1);
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | i
- mv_par2_1 | i
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2_1 | i
+ mv_par2   | i
  mv_par    | i
 (6 rows)
 
@@ -1180,23 +2448,23 @@ begin;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par    | u
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
 (6 rows)
 
 delete from par where b = 2  and c = 1;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | e
- mv_par2_1 | e
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2_1 | e
+ mv_par2   | e
  mv_par    | e
 (6 rows)
 
@@ -1205,21 +2473,21 @@ begin;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par    | u
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
 (6 rows)
 
 delete from par_1_prt_1_2_prt_2;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
+ mv_par1_1 | u
  mv_par2   | u
  mv_par2_1 | u
- mv_par1_1 | u
  mv_par1_2 | e
  mv_par1   | e
  mv_par    | e
@@ -1231,12 +2499,12 @@ begin;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par    | u
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
 (6 rows)
 
 update par set c = 2 where b = 1 and c = 1;
@@ -1257,12 +2525,12 @@ begin;
 select mvname, datastatus from gp_matview_aux where mvname like 'mv_par%';
   mvname   | datastatus 
 -----------+------------
- mv_par2   | u
- mv_par2_1 | u
  mv_par    | u
  mv_par1   | u
  mv_par1_1 | u
  mv_par1_2 | u
+ mv_par2   | u
+ mv_par2_1 | u
 (6 rows)
 
 update par set c = 2, a = 2 where  b = 1 and c = 1;
@@ -1281,6 +2549,7 @@ abort;
 -- Test report warning if extend protocol data is not consumed.
 --start_ignore
 drop extension gp_inject_fault;
+ERROR:  extension "gp_inject_fault" does not exist
 create extension gp_inject_fault;
 --end_ignore
 select gp_inject_fault_infinite('consume_extend_protocol_data', 'skip', dbid)
diff --git a/src/test/regress/sql/matview_data.sql 
b/src/test/regress/sql/matview_data.sql
index 059a5a97bf4..8b5a56986e0 100644
--- a/src/test/regress/sql/matview_data.sql
+++ b/src/test/regress/sql/matview_data.sql
@@ -343,6 +343,541 @@ drop table jt3;
 drop table jt2;
 drop table jt1;
 
+--
+-- Test AQUMV (Answer Query Using Materialized Views) with join queries.
+-- Each matching test shows EXPLAIN + SELECT with GUC off (original plan),
+-- then EXPLAIN + SELECT with GUC on (MV rewrite). Results must match.
+--
+create table aqj_t1(a int, b int) distributed by (a);
+create table aqj_t2(a int, b int) distributed by (a);
+create table aqj_t3(a int, b int) distributed by (a);
+insert into aqj_t1 select i, i*10 from generate_series(1, 100) i;
+insert into aqj_t2 select i, i*100 from generate_series(1, 100) i;
+insert into aqj_t3 select i, i*1000 from generate_series(1, 100) i;
+analyze aqj_t1;
+analyze aqj_t2;
+analyze aqj_t3;
+
+-- 1. Two-table INNER JOIN exact match
+create materialized view mv_aqj_join2 as
+  select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a;
+analyze mv_aqj_join2;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order 
by 1 limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a order 
by 1 limit 5;
+
+-- 2. Join with WHERE clause
+create materialized view mv_aqj_where as
+  select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a 
where aqj_t1.a > 5;
+analyze mv_aqj_where;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a where aqj_t1.a > 5;
+select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where 
aqj_t1.a > 5 order by 1 limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a where aqj_t1.a > 5;
+select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a where 
aqj_t1.a > 5 order by 1 limit 5;
+
+-- 3. Join with GROUP BY + aggregate
+create materialized view mv_aqj_agg as
+  select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = 
aqj_t2.a group by aqj_t1.a;
+analyze mv_aqj_agg;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a group by aqj_t1.a;
+select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = 
aqj_t2.a group by aqj_t1.a order by 1 limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a group by aqj_t1.a;
+select aqj_t1.a, count(*) as cnt from aqj_t1 join aqj_t2 on aqj_t1.a = 
aqj_t2.a group by aqj_t1.a order by 1 limit 5;
+
+-- 4. Non-match: different WHERE clause (should show Hash Join, not MV)
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a where aqj_t1.a > 10;
+
+-- 5. Non-match: different target list
+explain(costs off) select aqj_t1.b, aqj_t2.a from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+
+-- 6. Non-match: different join type (INNER vs LEFT)
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 left join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+
+-- 7. Three-table join
+create materialized view mv_aqj_join3 as
+  select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a;
+analyze mv_aqj_join3;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a;
+select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a
+  order by 1 limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a;
+select aqj_t1.a, aqj_t2.b, aqj_t3.b as c
+  from aqj_t1 join aqj_t2 on aqj_t1.a = aqj_t2.a join aqj_t3 on aqj_t2.a = 
aqj_t3.a
+  order by 1 limit 5;
+
+-- 8. Implicit join (FROM t1, t2 WHERE ...)
+create materialized view mv_aqj_implicit as
+  select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a;
+analyze mv_aqj_implicit;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where 
aqj_t1.a = aqj_t2.a;
+select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order 
by 1 limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where 
aqj_t1.a = aqj_t2.a;
+select aqj_t1.a, aqj_t2.b from aqj_t1, aqj_t2 where aqj_t1.a = aqj_t2.a order 
by 1 limit 5;
+
+-- 9. MV not up-to-date: after INSERT on base table
+insert into aqj_t1 values(999, 9990);
+set enable_answer_query_using_materialized_views = on;
+-- Should NOT use mv_aqj_join2 (status is 'i')
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+
+-- 10. After REFRESH: should use MV again
+refresh materialized view mv_aqj_join2;
+analyze mv_aqj_join2;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+
+-- 11. GUC off: should NOT use MV
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select aqj_t1.a, aqj_t2.b from aqj_t1 join aqj_t2 on 
aqj_t1.a = aqj_t2.a;
+
+--
+-- More complex join AQUMV test cases with richer schemas
+--
+
+create table aqj_orders(
+  order_id int,
+  customer_id int,
+  amount numeric(10,2),
+  status text,
+  order_date date
+) distributed by (order_id);
+
+create table aqj_customers(
+  customer_id int,
+  name text,
+  region text,
+  credit_limit numeric(10,2)
+) distributed by (customer_id);
+
+create table aqj_products(
+  product_id int,
+  name text,
+  category text,
+  price numeric(10,2)
+) distributed by (product_id);
+
+create table aqj_order_items(
+  item_id int,
+  order_id int,
+  product_id int,
+  quantity int
+) distributed by (item_id);
+
+insert into aqj_customers select i, 'cust_' || i, case when i % 3 = 0 then 
'east' when i % 3 = 1 then 'west' else 'north' end, (i * 100)::numeric(10,2) 
from generate_series(1, 50) i;
+insert into aqj_orders select i, (i % 50) + 1, (i * 10.5)::numeric(10,2), case 
when i % 4 = 0 then 'shipped' when i % 4 = 1 then 'pending' when i % 4 = 2 then 
'delivered' else 'cancelled' end, '2024-01-01'::date + (i % 365) from 
generate_series(1, 200) i;
+insert into aqj_products select i, 'prod_' || i, case when i % 5 = 0 then 
'electronics' when i % 5 = 1 then 'books' when i % 5 = 2 then 'clothing' when i 
% 5 = 3 then 'food' else 'toys' end, (i * 5.99)::numeric(10,2) from 
generate_series(1, 30) i;
+insert into aqj_order_items select i, (i % 200) + 1, (i % 30) + 1, (i % 10) + 
1 from generate_series(1, 500) i;
+
+analyze aqj_customers;
+analyze aqj_orders;
+analyze aqj_products;
+analyze aqj_order_items;
+
+-- 12. Join with multiple columns + WHERE on text column
+create materialized view mv_aqj_orders_cust as
+  select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+analyze mv_aqj_orders_cust;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped'
+  order by o.order_id limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped'
+  order by o.order_id limit 5;
+
+-- 13. Four-table join
+create materialized view mv_aqj_order_details as
+  select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id;
+analyze mv_aqj_order_details;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id;
+select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  order by o.order_id, p.name limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id;
+select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  order by o.order_id, p.name limit 5;
+
+-- 14. GROUP BY on join with multiple aggregates: sum, count, avg
+create materialized view mv_aqj_cust_summary as
+  select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region;
+analyze mv_aqj_cust_summary;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region;
+select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region
+  order by c.region;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region;
+select c.region, count(*) as order_count, sum(o.amount) as total_amount, 
avg(o.amount) as avg_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region
+  order by c.region;
+
+-- 15. Join with expression in target list (arithmetic + function)
+create materialized view mv_aqj_expr as
+  select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, 
upper(c.region) as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id;
+analyze mv_aqj_expr;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, 
c.name, upper(c.region) as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id;
+select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) 
as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  order by o.order_id limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, 
c.name, upper(c.region) as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id;
+select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) 
as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  order by o.order_id limit 5;
+
+-- 16. Non-match: same tables + expressions, but extra WHERE (should NOT match 
mv_aqj_expr)
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select o.order_id, o.amount * 1.1 as amount_with_tax, 
c.name, upper(c.region) as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where c.region = 'east';
+
+-- 17. Non-match: same tables but different aggregate target list
+explain(costs off)
+  select c.region, sum(o.amount) as total_amount
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region;
+
+-- 18. Non-match: different join order (o JOIN c vs c JOIN o)
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_customers c join aqj_orders o on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+
+-- 19. Join with compound WHERE (multiple AND conditions)
+create materialized view mv_aqj_compound_where as
+  select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50;
+analyze mv_aqj_compound_where;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off) select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50;
+select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50
+  order by o.order_id limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off) select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50;
+select o.order_id, o.amount, c.name
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'pending' and c.region = 'west' and o.amount > 50
+  order by o.order_id limit 5;
+
+-- 20. Self-join
+create materialized view mv_aqj_selfjoin as
+  select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount 
as amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id;
+analyze mv_aqj_selfjoin;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount 
as amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id;
+select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as 
amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id
+  order by o1.order_id, o2.order_id limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount 
as amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id;
+select o1.order_id as id1, o2.order_id as id2, o1.amount as amt1, o2.amount as 
amt2
+  from aqj_orders o1 join aqj_orders o2 on o1.customer_id = o2.customer_id
+  where o1.order_id < o2.order_id
+  order by o1.order_id, o2.order_id limit 5;
+
+-- 21. GROUP BY with multi-column key on join
+create materialized view mv_aqj_grp_multi as
+  select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status;
+analyze mv_aqj_grp_multi;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status;
+select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status
+  order by c.region, o.status limit 6;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status;
+select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status
+  order by c.region, o.status limit 6;
+
+-- 22. Four-table join with WHERE and aggregate
+create materialized view mv_aqj_3way_agg as
+  select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category;
+analyze mv_aqj_3way_agg;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category;
+select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category
+  order by c.region, p.category limit 6;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category;
+select c.region, p.category, sum(oi.quantity) as total_qty, count(*) as 
line_count
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  where o.status = 'delivered'
+  group by c.region, p.category
+  order by c.region, p.category limit 6;
+
+-- 23. Implicit four-table join (comma style)
+create materialized view mv_aqj_implicit3 as
+  select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending';
+analyze mv_aqj_implicit3;
+
+set enable_answer_query_using_materialized_views = off;
+explain(costs off)
+  select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending';
+select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending'
+  order by o.order_id, p.name limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+explain(costs off)
+  select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending';
+select o.order_id, c.name, p.name as product_name
+  from aqj_orders o, aqj_customers c, aqj_order_items oi, aqj_products p
+  where o.customer_id = c.customer_id and o.order_id = oi.order_id and 
oi.product_id = p.product_id
+    and o.status = 'pending'
+  order by o.order_id, p.name limit 5;
+
+-- 24. Result correctness across DML + REFRESH cycle
+insert into aqj_orders values(201, 1, 9999.99, 'shipped', '2025-12-31');
+set enable_answer_query_using_materialized_views = on;
+-- Stale: should NOT use MV
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+-- Refresh and verify MV is used again
+refresh materialized view mv_aqj_orders_cust;
+analyze mv_aqj_orders_cust;
+explain(costs off) select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped';
+-- The new row should appear in results via MV scan
+select o.order_id, o.amount, c.name, c.region
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  where o.status = 'shipped' and o.order_id = 201;
+
+-- 25. Post-DML comprehensive: refresh all, then verify GUC off vs on results
+refresh materialized view mv_aqj_order_details;
+refresh materialized view mv_aqj_expr;
+refresh materialized view mv_aqj_selfjoin;
+refresh materialized view mv_aqj_grp_multi;
+refresh materialized view mv_aqj_3way_agg;
+refresh materialized view mv_aqj_implicit3;
+analyze mv_aqj_order_details;
+analyze mv_aqj_expr;
+analyze mv_aqj_selfjoin;
+analyze mv_aqj_grp_multi;
+analyze mv_aqj_3way_agg;
+analyze mv_aqj_implicit3;
+
+-- Verify four-table join results after DML+refresh
+set enable_answer_query_using_materialized_views = off;
+select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  order by o.order_id, p.name limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+select o.order_id, c.name as customer_name, p.name as product_name, 
oi.quantity, p.price
+  from aqj_orders o
+  join aqj_customers c on o.customer_id = c.customer_id
+  join aqj_order_items oi on o.order_id = oi.order_id
+  join aqj_products p on oi.product_id = p.product_id
+  order by o.order_id, p.name limit 5;
+
+-- Verify expression MV results after DML+refresh
+set enable_answer_query_using_materialized_views = off;
+select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) 
as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  order by o.order_id limit 5;
+
+set enable_answer_query_using_materialized_views = on;
+select o.order_id, o.amount * 1.1 as amount_with_tax, c.name, upper(c.region) 
as region_upper
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  order by o.order_id limit 5;
+
+-- Verify multi-key GROUP BY results after DML+refresh
+set enable_answer_query_using_materialized_views = off;
+select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status
+  order by c.region, o.status limit 6;
+
+set enable_answer_query_using_materialized_views = on;
+select c.region, o.status, count(*) as cnt, sum(o.amount) as total
+  from aqj_orders o join aqj_customers c on o.customer_id = c.customer_id
+  group by c.region, o.status
+  order by c.region, o.status limit 6;
+
+-- Clean up AQUMV join test objects
+drop materialized view mv_aqj_implicit3;
+drop materialized view mv_aqj_3way_agg;
+drop materialized view mv_aqj_grp_multi;
+drop materialized view mv_aqj_selfjoin;
+drop materialized view mv_aqj_compound_where;
+drop materialized view mv_aqj_expr;
+drop materialized view mv_aqj_cust_summary;
+drop materialized view mv_aqj_order_details;
+drop materialized view mv_aqj_orders_cust;
+drop materialized view mv_aqj_implicit;
+drop materialized view mv_aqj_join3;
+drop materialized view mv_aqj_agg;
+drop materialized view mv_aqj_where;
+drop materialized view mv_aqj_join2;
+drop table aqj_order_items;
+drop table aqj_products;
+drop table aqj_customers;
+drop table aqj_orders;
+drop table aqj_t3;
+drop table aqj_t2;
+drop table aqj_t1;
+
 -- test drop table
 select mvname, datastatus from gp_matview_aux where mvname in ('mv0','mv1', 
'mv2', 'mv3');
 drop materialized view mv2;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to