Hi,
I'd like to propose to support parameterized foreign joins. Attached is
a patch for that, which has been created on top of [1].
In [2], I said that postgres_fdw could create parameterized paths from
joinable combinations of the cheapest-parameterized paths for the
inner/outer relations, but for identifying the joinable combinations,
postgres_fdw would need to do the same work as the core, which wouldn't
be good. Also, I thought that the parameterized paths could be created
by using the required_outer relations in ParamPathInfos stored in the
join relation's ppilist, which I thought would have already built
ParamPathInfos for parameterized local-join paths, but I noticed it
isn't guaranteed that such local-join paths are always created and their
ParamPathInfos are always stored in the pplilist. Instead, I'd propose
to collect the required_outer outer relations that the core tried to
create parameterized local-join paths for during add_paths_to_joinrel(),
and build parameterized foreign-join paths for those outer relations
during postgresGetForeignJoinPaths().
Here is an example:
postgres=# create extension postgres_fdw;
CREATE EXTENSION
postgres=# create server loopback foreign data wrapper postgres_fdw
options (dbname 'postgres');
CREATE SERVER
postgres=# create user mapping for public server loopback;
CREATE USER MAPPING
postgres=# create table t1 (a int , b int, CONSTRAINT t1_pkey PRIMARY
KEY (a));
CREATE TABLE
postgres=# create table t2 (a int , b int, CONSTRAINT t2_pkey PRIMARY
KEY (a));
CREATE TABLE
postgres=# create foreign table ft1 (a int, b int) server loopback
options (table_name 't1');
CREATE FOREIGN TABLE
postgres=# create foreign table ft2 (a int, b int) server loopback
options (table_name 't2');
CREATE FOREIGN TABLE
postgres=# insert into t1 select id, id % 10 from generate_series(1,
10000) id;
INSERT 0 10000
postgres=# insert into t2 select id, id % 10 from generate_series(1,
10000) id;
INSERT 0 10000
postgres=# alter foreign table ft1 options (use_remote_estimate 'true');
ALTER FOREIGN TABLE
postgres=# alter foreign table ft2 options (use_remote_estimate 'true');
ALTER FOREIGN TABLE
postgres=# create table test (a int, b int);
CREATE TABLE
postgres=# insert into test values (1, 1);
INSERT 0 1
postgres=# analyze test;
ANALYZE
postgres=# explain verbose select * from test r1 left join (ft1 r2 inner
join ft2 r3 on (r2.a = r3.a)) on (r3.a = r1.a) limit 1;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=100.58..100.92 rows=1 width=24)
Output: r1.a, r1.b, r2.a, r2.b, r3.a, r3.b
-> Nested Loop Left Join (cost=100.58..117.67 rows=50 width=24)
Output: r1.a, r1.b, r2.a, r2.b, r3.a, r3.b
-> Seq Scan on public.test r1 (cost=0.00..1.01 rows=1 width=8)
Output: r1.a, r1.b
-> Foreign Scan (cost=100.58..116.65 rows=1 width=16)
Output: r2.a, r2.b, r3.a, r3.b
Relations: (public.ft1 r2) INNER JOIN (public.ft2 r3)
Remote SQL: SELECT r2.a, r2.b, r3.a, r3.b FROM
(public.t1 r2 INNER JOIN public.t2 r3 ON (((r2.a = r3.a)))) WHERE ((r3.a
= $1::integer))
(10 rows)
Notes:
* Since add_paths_to_joinrel() for join {B, A} might provide different
parameterizations of result local-join paths from that for join {A, B},
so the patch allows postgresGetForeignJoinPaths() to build paths after
that function has pushdown_safe=true.
* create_foreignscan_path() only calls get_baserel_parampathinfo() to
set the param_info member. We would need to do something about that so
it can handle the parameterized-foreign-join-path case properly. Though
I left that function as-is because get_baserel_parampathinfo() can
return the ParamPathInfo created in postgresGetForeignJoinPaths() for an
input parameterized foreign-join path, by accident.
I'll add this to the upcoming commitfest.
Best regards,
Etsuro Fujita
[1]
https://www.postgresql.org/message-id/0700eb97-d9db-33da-4ba2-e28d2a1631d9%40lab.ntt.co.jp
[2]
https://www.postgresql.org/message-id/e18b9bf5-1557-cb9c-001e-0861a1d7dfce%40lab.ntt.co.jp
*** a/contrib/postgres_fdw/deparse.c
--- b/contrib/postgres_fdw/deparse.c
***************
*** 851,857 **** deparse_type_name(Oid type_oid, int32 typemod)
* foreign server.
*/
List *
! build_tlist_to_deparse(RelOptInfo *foreignrel)
{
List *tlist = NIL;
PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private;
--- 851,857 ----
* foreign server.
*/
List *
! build_tlist_to_deparse(RelOptInfo *foreignrel, List *local_param_conds)
{
List *tlist = NIL;
PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) foreignrel->fdw_private;
***************
*** 873,878 **** build_tlist_to_deparse(RelOptInfo *foreignrel)
--- 873,882 ----
tlist = add_to_flat_tlist(tlist,
pull_var_clause((Node *) fpinfo->local_conds,
PVC_RECURSE_PLACEHOLDERS));
+ if (local_param_conds)
+ tlist = add_to_flat_tlist(tlist,
+ pull_var_clause((Node *) local_param_conds,
+ PVC_RECURSE_PLACEHOLDERS));
return tlist;
}
*** a/contrib/postgres_fdw/postgres_fdw.c
--- b/contrib/postgres_fdw/postgres_fdw.c
***************
*** 412,420 **** static List *get_useful_pathkeys_for_relation(PlannerInfo *root,
--- 412,426 ----
static List *get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel);
static void add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel,
Path *epq_path);
+ static void add_parameterized_paths_for_rel(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ List *req_outer_list);
static void add_foreign_grouping_paths(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *grouped_rel);
+ static List *build_joinrel_param_join_conds(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Relids required_outer);
/*
***************
*** 1131,1139 **** postgresGetForeignPlan(PlannerInfo *root,
/*
* create_scan_plan() and create_foreignscan_plan() pass
* rel->baserestrictinfo + parameterization clauses through
! * scan_clauses. For a join rel->baserestrictinfo is NIL and we are
! * not considering parameterization right now, so there should be no
! * scan_clauses for a joinrel and upper rel either.
*/
Assert(!scan_clauses);
}
--- 1137,1144 ----
/*
* create_scan_plan() and create_foreignscan_plan() pass
* rel->baserestrictinfo + parameterization clauses through
! * scan_clauses, but for a join or upper relation, there should be no
! * scan_clauses.
*/
Assert(!scan_clauses);
}
***************
*** 1186,1197 **** postgresGetForeignPlan(PlannerInfo *root,
if (foreignrel->reloptkind == RELOPT_JOINREL ||
foreignrel->reloptkind == RELOPT_UPPER_REL)
{
! /* For a join relation, get the conditions from fdw_private structure */
! remote_conds = fpinfo->remote_conds;
! local_exprs = fpinfo->local_conds;
! /* Build the list of columns to be fetched from the foreign server. */
! fdw_scan_tlist = build_tlist_to_deparse(foreignrel);
/*
* Ensure that the outer plan produces a tuple whose descriptor
--- 1191,1246 ----
if (foreignrel->reloptkind == RELOPT_JOINREL ||
foreignrel->reloptkind == RELOPT_UPPER_REL)
{
! /*
! * Get the remote and local conditions from fdw_private structure.
! * If this is a parameterized foreign join, we also need to enforce
! * all the join clauses available from the outer relation(s). Also,
! * build the list of columns to be fetched from the foreign server.
! */
! if (best_path->path.param_info)
! {
! List *param_join_conds;
! List *remote_param_join_conds;
! List *local_param_join_conds;
! List *local_param_join_exprs;
! Assert(foreignrel->reloptkind == RELOPT_JOINREL);
!
! /* Identify all join clauses that are movable to this rel */
! param_join_conds =
! build_joinrel_param_join_conds(root, foreignrel,
! PATH_REQ_OUTER((Path *) best_path));
!
! /*
! * param_join_conds might contain both clauses that are safe to
! * send across, and clauses that aren't.
! */
! classifyConditions(root, foreignrel,
! param_join_conds,
! &remote_param_join_conds,
! &local_param_join_conds);
!
! /* Get the remote and local conditions */
! remote_conds = list_concat(list_copy(remote_param_join_conds),
! fpinfo->remote_conds);
! local_param_join_exprs =
! get_actual_clauses(local_param_join_conds);
! local_exprs = list_concat(list_copy(local_param_join_exprs),
! fpinfo->local_conds);
!
! /* Build the list of the columns */
! fdw_scan_tlist = build_tlist_to_deparse(foreignrel,
! local_param_join_conds);
! }
! else
! {
! /* Get the remote and local conditions */
! remote_conds = fpinfo->remote_conds;
! local_exprs = fpinfo->local_conds;
!
! /* Build the list of the columns */
! fdw_scan_tlist = build_tlist_to_deparse(foreignrel, NIL);
! }
/*
* Ensure that the outer plan produces a tuple whose descriptor
***************
*** 2531,2544 **** estimate_path_cost_size(PlannerInfo *root,
/* Build the list of columns to be fetched from the foreign server. */
if (foreignrel->reloptkind == RELOPT_JOINREL ||
foreignrel->reloptkind == RELOPT_UPPER_REL)
! fdw_scan_tlist = build_tlist_to_deparse(foreignrel);
else
fdw_scan_tlist = NIL;
/*
* The complete list of remote conditions includes everything from
! * baserestrictinfo plus any extra join_conds relevant to this
! * particular path.
*/
remote_conds = list_concat(list_copy(remote_param_join_conds),
fpinfo->remote_conds);
--- 2580,2594 ----
/* Build the list of columns to be fetched from the foreign server. */
if (foreignrel->reloptkind == RELOPT_JOINREL ||
foreignrel->reloptkind == RELOPT_UPPER_REL)
! fdw_scan_tlist = build_tlist_to_deparse(foreignrel,
! local_param_join_conds);
else
fdw_scan_tlist = NIL;
/*
* The complete list of remote conditions includes everything from
! * baserestrictinfo (or top-level non-outer-join clauses in join cases)
! * plus any extra join_conds relevant to this particular path.
*/
remote_conds = list_concat(list_copy(remote_param_join_conds),
fpinfo->remote_conds);
***************
*** 4145,4150 **** foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
--- 4195,4201 ----
fpinfo->outerrel = outerrel;
fpinfo->innerrel = innerrel;
fpinfo->jointype = jointype;
+ fpinfo->extra = extra;
/*
* Pull the other remote conditions from the joining relations into join
***************
*** 4310,4315 **** add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel,
--- 4361,4575 ----
}
}
+ static void
+ add_parameterized_paths_for_rel(PlannerInfo *root, RelOptInfo *joinrel,
+ List *req_outer_list)
+ {
+ PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) joinrel->fdw_private;
+ RelOptInfo *outerrel = fpinfo->outerrel;
+ RelOptInfo *innerrel = fpinfo->innerrel;
+ ListCell *lc;
+
+ /*
+ * If we're not using remote estimates, we can't do anything.
+ */
+ if (!fpinfo->use_remote_estimate)
+ return;
+
+ /*
+ * Build a path for each outer relation in the given required_outer_list.
+ */
+ foreach(lc, req_outer_list)
+ {
+ Relids required_outer = (Relids) lfirst(lc);
+ ForeignPath *path;
+ Path *epq_path;
+ List *param_join_conds;
+ double rows;
+ int width;
+ Cost startup_cost;
+ Cost total_cost;
+ ParamPathInfo *param_info;
+ bool found;
+ ListCell *lc2;
+
+ Assert(!bms_is_empty(required_outer));
+
+ /* If we have already considered the required_outer, skip it */
+ found = false;
+ foreach(lc2, fpinfo->req_outer_list)
+ {
+ Relids considered_outer = (Relids) lfirst(lc2);
+
+ if (bms_equal(considered_outer, required_outer))
+ {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ continue;
+
+ /* Save the required_outer */
+ fpinfo->req_outer_list = lappend(fpinfo->req_outer_list,
+ required_outer);
+
+ /* Make an alternative local join path */
+ if (root->parse->commandType == CMD_DELETE ||
+ root->parse->commandType == CMD_UPDATE ||
+ root->rowMarks)
+ {
+ Path *outer_path = outerrel->cheapest_total_path;
+ Path *inner_path = innerrel->cheapest_total_path;
+
+ /* Should be unparameterized */
+ Assert(outer_path->param_info == NULL);
+ Assert(inner_path->param_info == NULL);
+
+ /* Create a parameterized local-join path */
+ epq_path = CreateLocalJoinPath(root, joinrel,
+ outer_path,
+ inner_path,
+ required_outer,
+ fpinfo->jointype,
+ fpinfo->extra);
+ if (!epq_path)
+ {
+ elog(DEBUG3, "could not push down foreign join because a local path suitable for EPQ checks was not created");
+ continue;
+ }
+ }
+ else
+ epq_path = NULL;
+
+ /* Identify all join clauses that are movable to this rel */
+ param_join_conds = build_joinrel_param_join_conds(root, joinrel,
+ required_outer);
+
+ /* Check if there are safe-to-send-to-remote join clauses */
+ found = false;
+ foreach(lc2, param_join_conds)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc2);
+
+ if (is_foreign_expr(root, joinrel, rinfo->clause))
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ continue;
+
+ /* Get a cost estimate from the remote */
+ estimate_path_cost_size(root, joinrel, param_join_conds, NIL,
+ &rows, &width, &startup_cost, &total_cost);
+
+ /* Find or build the joinrel ParamPathInfo */
+ found = false;
+ foreach(lc2, joinrel->ppilist)
+ {
+ ParamPathInfo *ppi = (ParamPathInfo *) lfirst(lc2);
+
+ if (bms_equal(ppi->ppi_req_outer, required_outer))
+ {
+ param_info = ppi;
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ param_info = makeNode(ParamPathInfo);
+ param_info->ppi_req_outer = required_outer;
+ param_info->ppi_rows = 0;
+ param_info->ppi_clauses = NIL;
+ joinrel->ppilist = lappend(joinrel->ppilist, param_info);
+ }
+ param_info->ppi_rows = rows;
+
+ /*
+ * If we found the ParamPathInfo, paths in the pathlist might have
+ * looked at ppi_rows, so fix that to ensure that those match our
+ * idea of the rowcount.
+ */
+ if (found)
+ {
+ foreach(lc2, joinrel->pathlist)
+ {
+ Path *old_path = (Path *) lfirst(lc2);
+
+ if (bms_subset_compare(PATH_REQ_OUTER(old_path),
+ required_outer) == BMS_EQUAL)
+ {
+ /*
+ * Update the rows of the path
+ *
+ * In principle we might need to re-estimate the cost of
+ * the path and re-add the path to the pathlist because
+ * the cost has been calculated using the ppi_rows, but
+ * that would be expensive. However, even if we did do
+ * so, the path is likely to be dominated by the path
+ * created here unless joinrel->consider_param_startup is
+ * true, so it seems OK to live with the approximation.
+ */
+ old_path->rows = rows;
+ }
+ }
+ }
+
+ /* Make the path */
+ path = create_foreignscan_path(root,
+ joinrel,
+ NULL, /* default pathtarget */
+ rows,
+ startup_cost,
+ total_cost,
+ NIL, /* no pathkeys */
+ param_info->ppi_req_outer,
+ epq_path,
+ NIL); /* no fdw_private */
+ add_path(joinrel, (Path *) path);
+ }
+ }
+
+ static List *
+ build_joinrel_param_join_conds(PlannerInfo *root,
+ RelOptInfo *joinrel,
+ Relids required_outer)
+ {
+ List *result;
+ Relids join_and_outer;
+ ListCell *lc;
+
+ /*
+ * Identify all joinclauses that are movable to this join rel given this
+ * parameterization.
+ */
+ join_and_outer = bms_union(joinrel->relids, required_outer);
+ result = NIL;
+ foreach(lc, joinrel->joininfo)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
+
+ if (join_clause_is_movable_into(rinfo,
+ joinrel->relids,
+ join_and_outer))
+ result = lappend(result, rinfo);
+ }
+
+ /*
+ * Add in joinclauses generated by EquivalenceClasses, too
+ */
+ result = list_concat(result,
+ generate_join_implied_equalities(root,
+ join_and_outer,
+ required_outer,
+ joinrel));
+
+ return result;
+ }
+
/*
* postgresGetForeignJoinPaths
* Add possible ForeignPath to joinrel, if join is safe to push down.
***************
*** 4332,4354 **** postgresGetForeignJoinPaths(PlannerInfo *root,
* EvalPlanQual gets triggered. */
/*
! * Skip if this join combination has been considered already.
*/
! if (joinrel->fdw_private)
return;
/*
! * Create unfinished PgFdwRelationInfo entry which is used to indicate
! * that the join relation is already considered, so that we won't waste
! * time in judging safety of join pushdown and adding the same paths again
! * if found safe. Once we know that this join can be pushed down, we fill
! * the entry.
*/
! fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo));
! fpinfo->pushdown_safe = false;
! joinrel->fdw_private = fpinfo;
! /* attrs_used is only for base relations. */
! fpinfo->attrs_used = NULL;
/*
* If there is a possibility that EvalPlanQual will be executed, we need
--- 4592,4626 ----
* EvalPlanQual gets triggered. */
/*
! * If this is not the first call and pushdown_safe is already true, all
! * we need to do is consider parameterized paths for the join relation.
*/
! if (joinrel->fdw_private &&
! ((PgFdwRelationInfo *) joinrel->fdw_private)->pushdown_safe)
! {
! add_parameterized_paths_for_rel(root, joinrel, extra->req_outer_list);
return;
+ }
/*
! * If this is the first call, create the PgFdwRelationInfo entry. Once
! * we know that the join can be pushed down, we fill in the entry.
*/
! if (!joinrel->fdw_private)
! {
! fpinfo = (PgFdwRelationInfo *) palloc0(sizeof(PgFdwRelationInfo));
! fpinfo->pushdown_safe = false;
! /* attrs_used is only for base relations. */
! fpinfo->attrs_used = NULL;
! /* Initialize req_outer_list to NIL. */
! fpinfo->req_outer_list = NIL;
! joinrel->fdw_private = fpinfo;
! }
! else
! {
! fpinfo = (PgFdwRelationInfo *) joinrel->fdw_private;
! Assert(fpinfo->pushdown_safe == false);
! }
/*
* If there is a possibility that EvalPlanQual will be executed, we need
***************
*** 4445,4451 **** postgresGetForeignJoinPaths(PlannerInfo *root,
/* Consider pathkeys for the join relation */
add_paths_with_pathkeys_for_rel(root, joinrel, epq_path);
! /* XXX Consider parameterized paths for the join relation */
}
/*
--- 4717,4724 ----
/* Consider pathkeys for the join relation */
add_paths_with_pathkeys_for_rel(root, joinrel, epq_path);
! /* Consider parameterized paths for the join relation */
! add_parameterized_paths_for_rel(root, joinrel, extra->req_outer_list);
}
/*
*** a/contrib/postgres_fdw/postgres_fdw.h
--- b/contrib/postgres_fdw/postgres_fdw.h
***************
*** 91,97 **** typedef struct PgFdwRelationInfo
--- 91,100 ----
RelOptInfo *outerrel;
RelOptInfo *innerrel;
JoinType jointype;
+ JoinPathExtraData *extra;
List *joinclauses;
+ List *req_outer_list; /* outer relations considered so far to build
+ * parameterized foreign-join paths */
/* Grouping information */
List *grouped_tlist;
***************
*** 158,164 **** extern void deparseAnalyzeSql(StringInfo buf, Relation rel,
List **retrieved_attrs);
extern void deparseStringLiteral(StringInfo buf, const char *val);
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
! extern List *build_tlist_to_deparse(RelOptInfo *foreignrel);
extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
RelOptInfo *foreignrel, List *tlist,
List *remote_conds, List *pathkeys,
--- 161,168 ----
List **retrieved_attrs);
extern void deparseStringLiteral(StringInfo buf, const char *val);
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
! extern List *build_tlist_to_deparse(RelOptInfo *foreignrel,
! List *local_param_conds);
extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
RelOptInfo *foreignrel, List *tlist,
List *remote_conds, List *pathkeys,
*** a/src/backend/optimizer/path/joinpath.c
--- b/src/backend/optimizer/path/joinpath.c
***************
*** 93,98 **** add_paths_to_joinrel(PlannerInfo *root,
--- 93,99 ----
extra.mergeclauses = NIL;
extra.outersortkeys = NIL;
extra.innersortkeys = NIL;
+ extra.req_outer_list = NIL;
/*
* Find potential mergejoin clauses. We can skip this if we are not
***************
*** 300,305 **** try_nestloop_path(PlannerInfo *root,
--- 301,311 ----
return;
}
+ /* Save required_outer for possible use by the FDW */
+ if (!bms_is_empty(required_outer))
+ extra->req_outer_list = lappend(extra->req_outer_list,
+ bms_copy(required_outer));
+
/*
* Do a precheck to quickly eliminate obviously-inferior paths. We
* calculate a cheap lower bound on the path's cost and then use
***************
*** 427,432 **** try_mergejoin_path(PlannerInfo *root,
--- 433,443 ----
return;
}
+ /* Save required_outer for possible use by the FDW */
+ if (!bms_is_empty(required_outer))
+ extra->req_outer_list = lappend(extra->req_outer_list,
+ bms_copy(required_outer));
+
/*
* If the given paths are already well enough ordered, we can skip doing
* an explicit sort.
***************
*** 503,508 **** try_hashjoin_path(PlannerInfo *root,
--- 514,524 ----
return;
}
+ /* Save required_outer for possible use by the FDW */
+ if (!bms_is_empty(required_outer))
+ extra->req_outer_list = lappend(extra->req_outer_list,
+ bms_copy(required_outer));
+
/*
* See comments in try_nestloop_path(). Also note that hashjoin paths
* never have any output pathkeys, per comments in create_hashjoin_path.
*** a/src/include/nodes/relation.h
--- b/src/include/nodes/relation.h
***************
*** 2021,2026 **** typedef struct SemiAntiJoinFactors
--- 2021,2027 ----
* mergeclauses are the RestrictInfos to use as merge clauses in a mergejoin
* outersortkeys are the sort pathkeys for the outer side of the mergejoin
* innersortkeys are the sort pathkeys for the inner side of the mergejoin
+ * req_outer_list is a list of parameterizations of result paths
*/
typedef struct JoinPathExtraData
{
***************
*** 2034,2039 **** typedef struct JoinPathExtraData
--- 2035,2041 ----
List *mergeclauses;
List *outersortkeys;
List *innersortkeys;
+ List *req_outer_list;
} JoinPathExtraData;
/*
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers