Hi, hackers!
There is a known issue that index only scan (IOS) can only work with
simple index keys based on single attributes and doesn't work with index
expressions. In this patch I propose a solution that adds support of IOS
for index expressions. Here's an example:
create table abc(a int, b int, c int);
create index on abc ((a * 1000 + b), c);
with t1 as (select generate_series(1, 1000) as x),
t2 as (select generate_series(0, 999) as x)
insert into abc(a, b, c)
select t1.x, t2.x, t2.x from t1, t2;
vacuum analyze;
Explain results with the patch:
explain (analyze, buffers) select a * 1000 + b + c from abc where a *
1000 + b = 1001;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Index Only Scan using abc_expr_c_idx on abc (cost=0.42..4.45 rows=1
width=4) (actual time=0.032..0.033 rows=1 loops=1)
Index Cond: ((((a * 1000) + b)) = 1001)
Heap Fetches: 0
Buffers: shared hit=4
Planning time: 0.184 ms
Execution time: 0.077 ms
(6 rows)
Before the patch it was:
explain (analyze, buffers) select a * 1000 + b + c from abc where a *
1000 + b = 1001;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Index Scan using abc_expr_c_idx on abc (cost=0.42..8.45 rows=1
width=4) (actual time=0.039..0.041 rows=1 loops=1)
Index Cond: (((a * 1000) + b) = 1001)
Buffers: shared hit=4
Planning time: 0.177 ms
Execution time: 0.088 ms
(5 rows)
This solution has limitations though: the restriction or the target
expression tree (or its part) must match exactly the index. E.g. this
expression will pass the check:
select a * 1000 + b + 100 from ...
but this will fail:
select 100 + a * 1000 + b from ...
because the parser groups it as:
(100 + a * 1000) + b
In this form it won't match any index key. Another case is when we
create index on (a+b) and then make query like 'select b+a ...' or '...
where b+a = smth' -- it won't match. This applies to regular index scan
too. Probably it worth to discuss the way to normalize index expressions
and clauses and work out more convenient way to match them.
Anyway, I will be grateful if you take a look at the patch in
attachment. Any comments and tips are welcome.
Thanks!
--
Ildar Musin
i.mu...@postgrespro.ru
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2952bfb..9eb0e12 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -25,6 +25,7 @@
#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
@@ -130,7 +131,13 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
static Relids get_bitmap_tree_required_outer(Path *bitmapqual);
static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
static int find_list_position(Node *node, List **nodelist);
-static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only_targetlist(PlannerInfo *root,
+ RelOptInfo *rel,
+ IndexOptInfo *index,
+ Bitmapset *index_canreturn_attrs);
+static bool check_index_only_clauses(List *clauses,
+ IndexOptInfo *index);
static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids);
static double adjust_rowcount_for_semijoins(PlannerInfo *root,
Index cur_relid,
@@ -190,7 +197,6 @@ static List *network_prefix_quals(Node *leftop, Oid expr_op, Oid opfamily,
static Datum string_to_datum(const char *str, Oid datatype);
static Const *string_to_const(const char *str, Oid datatype);
-
/*
* create_index_paths()
* Generate all interesting index paths for the given relation.
@@ -1020,7 +1026,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
* index data retrieval anyway.
*/
index_only_scan = (scantype != ST_BITMAPSCAN &&
- check_index_only(rel, index));
+ check_index_only(root, rel, index));
/*
* 4. Generate an indexscan path if there are relevant restriction clauses
@@ -1780,13 +1786,12 @@ find_list_position(Node *node, List **nodelist)
return i;
}
-
/*
* check_index_only
* Determine whether an index-only scan is possible for this index.
*/
static bool
-check_index_only(RelOptInfo *rel, IndexOptInfo *index)
+check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index)
{
bool result;
Bitmapset *attrs_used = NULL;
@@ -1836,10 +1841,7 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
{
int attno = index->indexkeys[i];
- /*
- * For the moment, we just ignore index expressions. It might be nice
- * to do something with them, later.
- */
+ /* Handle expressions later */
if (attno == 0)
continue;
@@ -1852,12 +1854,210 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
/* Do we have all the necessary attributes? */
result = bms_is_subset(attrs_used, index_canreturn_attrs);
+ /* Check index expressions */
+ if (!result && index->indexprs)
+ {
+ /*
+ * We need to check both: the target list and the clauses. If the
+ * match the index
+ */
+ result =
+ check_index_only_targetlist(root, rel, index, index_canreturn_attrs) &&
+ check_index_only_clauses(index->indrestrictinfo, index);
+ }
+
bms_free(attrs_used);
bms_free(index_canreturn_attrs);
return result;
}
+typedef struct check_index_only_walker_ctx
+{
+ IndexOptInfo *index;
+ Bitmapset *index_canreturn_attrs; /* attributes that can be returned
+ * by index */
+ bool match; /* TRUE if expression matches
+ * index */
+} check_index_only_walker_ctx;
+
+/*
+ * check_index_only_expr_walker
+ * Recursive walker that checks if each part of the expression can be
+ * build with index expressions or attributes
+ */
+static bool
+check_index_only_expr_walker(Node *node, check_index_only_walker_ctx *ctx)
+{
+ int i;
+
+ /*
+ * If node is empty or some subexpression has already reported that it
+ * isn't match the index then quit looking any further
+ */
+ if (node == NULL || ctx->match == false)
+ return false;
+
+ /*
+ * If this is a Var then check if it can be returned by the index
+ */
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+
+ if (var->varno == ctx->index->rel->relid &&
+ !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
+ ctx->index_canreturn_attrs))
+ ctx->match = false;
+ return false;
+ }
+
+ /*
+ * Check if expression can be returned by index
+ */
+ for (i = 0; i < ctx->index->ncolumns; i++)
+ {
+ if (match_index_to_operand(node, i, ctx->index))
+ {
+ /*
+ * It's alright. This expression can be returned by index
+ */
+ return false;
+ }
+ }
+
+ /*
+ * Recursive check
+ */
+ return expression_tree_walker(node, check_index_only_expr_walker, (void *) ctx);
+}
+
+/*
+ * check_index_only_targetlist
+ * Checks that every target of index->relation can be returned by index
+ *
+ * In this function we're looking through root->parse->targetlist and pick
+ * those targets which contain index->relation's attributes. For each selected
+ * target we use recursive function check_index_only_expr_walker() to check if
+ * target can be build with the index expressions and attributes and none of
+ * the other index->relation's attributes
+ */
+static bool
+check_index_only_targetlist(PlannerInfo *root,
+ RelOptInfo *rel,
+ IndexOptInfo *index,
+ Bitmapset *index_canreturn_attrs)
+{
+ check_index_only_walker_ctx *ctx;
+ ListCell *lc;
+ bool result = true;
+ bool flag;
+
+ /* Check expressions from targetlist */
+ foreach(lc, root->parse->targetList)
+ {
+ TargetEntry *te = (TargetEntry *) lfirst(lc);
+ Bitmapset *varnos = pull_varnos((Node *) te->expr);
+
+ if (bms_is_member(rel->relid, varnos))
+ {
+ Bitmapset *attrs = NULL;
+
+ flag = false;
+
+ pull_varattnos((Node *) te->expr, rel->relid, &attrs);
+ if (bms_is_subset(attrs, index_canreturn_attrs))
+ flag = true;
+ else
+ {
+ ctx = (check_index_only_walker_ctx *) palloc(sizeof(check_index_only_walker_ctx));
+ ctx->index = index;
+ ctx->match = true;
+ ctx->index_canreturn_attrs = index_canreturn_attrs;
+ check_index_only_expr_walker((Node *) te->expr, ctx);
+
+ if (ctx->match)
+ flag = true;
+
+ pfree(ctx);
+ }
+ bms_free(attrs);
+
+ if (!flag)
+ {
+ result = false;
+ break;
+ }
+ }
+ bms_free(varnos);
+ }
+
+ return result;
+}
+
+/*
+ * check_index_only_clauses
+ * Recursive function that checks that each clause matches the index
+ *
+ * clauses is a list of RestrictInfos or a BoolExpr containing RestrictInfos
+ */
+static bool
+check_index_only_clauses(List *clauses,
+ IndexOptInfo *index)
+{
+ int indexcol;
+ bool match;
+ ListCell *lc;
+
+ foreach(lc, clauses)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ match = false;
+ /*
+ * We expect here either a RestrictInfo or a boolean
+ * expression containing other RestrictInfos
+ */
+ if (IsA(node, RestrictInfo))
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) node;
+
+ if (!restriction_is_or_clause(rinfo))
+ {
+ for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+ {
+ if (match_clause_to_indexcol(index, indexcol, rinfo))
+ {
+ match = true;
+ break;
+ }
+ }
+ }
+ else
+ {
+ List *subclauses = ((BoolExpr *) rinfo->orclause)->args;
+
+ match = check_index_only_clauses(subclauses, index);
+ }
+ }
+ else if (IsA(node, BoolExpr))
+ {
+ BoolExpr *boolexpr = (BoolExpr *) node;
+
+ match = check_index_only_clauses(boolexpr->args, index);
+ }
+
+ /*
+ * If at least one restriction doesn't match index then return false
+ */
+ if (!match)
+ return false;
+ }
+
+ /* If we got here then everything's fine */
+ return true;
+}
+
/*
* get_loop_count
* Choose the loop count estimate to use for costing a parameterized path
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers