Hi, hackers!

There is a known issue that index only scan (IOS) can only work with simple index keys based on single attributes and doesn't work with index expressions. In this patch I propose a solution that adds support of IOS for index expressions. Here's an example:

create table abc(a int, b int, c int);
create index on abc ((a * 1000 + b), c);

with t1 as (select generate_series(1, 1000) as x),
     t2 as (select generate_series(0, 999) as x)
insert into abc(a, b, c)
    select t1.x, t2.x, t2.x from t1, t2;
vacuum analyze;

Explain results with the patch:

explain (analyze, buffers) select a * 1000 + b + c from abc where a * 1000 + b = 1001;
                                                       QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Index Only Scan using abc_expr_c_idx on abc (cost=0.42..4.45 rows=1 width=4) (actual time=0.032..0.033 rows=1 loops=1)
   Index Cond: ((((a * 1000) + b)) = 1001)
   Heap Fetches: 0
   Buffers: shared hit=4
 Planning time: 0.184 ms
 Execution time: 0.077 ms
(6 rows)

Before the patch it was:

explain (analyze, buffers) select a * 1000 + b + c from abc where a * 1000 + b = 1001;
                                                     QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Index Scan using abc_expr_c_idx on abc (cost=0.42..8.45 rows=1 width=4) (actual time=0.039..0.041 rows=1 loops=1)
   Index Cond: (((a * 1000) + b) = 1001)
   Buffers: shared hit=4
 Planning time: 0.177 ms
 Execution time: 0.088 ms
(5 rows)

This solution has limitations though: the restriction or the target expression tree (or its part) must match exactly the index. E.g. this expression will pass the check:

select a * 1000 + b + 100 from ...

but this will fail:

select 100 + a * 1000 + b from ...

because the parser groups it as:

(100 + a * 1000) + b

In this form it won't match any index key. Another case is when we create index on (a+b) and then make query like 'select b+a ...' or '... where b+a = smth' -- it won't match. This applies to regular index scan too. Probably it worth to discuss the way to normalize index expressions and clauses and work out more convenient way to match them. Anyway, I will be grateful if you take a look at the patch in attachment. Any comments and tips are welcome.

Thanks!

--
Ildar Musin
i.mu...@postgrespro.ru

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2952bfb..9eb0e12 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -25,6 +25,7 @@
 #include "catalog/pg_opfamily.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
@@ -130,7 +131,13 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
 static Relids get_bitmap_tree_required_outer(Path *bitmapqual);
 static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
 static int	find_list_position(Node *node, List **nodelist);
-static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index);
+static bool check_index_only_targetlist(PlannerInfo *root,
+							RelOptInfo *rel,
+							IndexOptInfo *index,
+							Bitmapset *index_canreturn_attrs);
+static bool check_index_only_clauses(List *clauses,
+						 IndexOptInfo *index);
 static double get_loop_count(PlannerInfo *root, Index cur_relid, Relids outer_relids);
 static double adjust_rowcount_for_semijoins(PlannerInfo *root,
 							  Index cur_relid,
@@ -190,7 +197,6 @@ static List *network_prefix_quals(Node *leftop, Oid expr_op, Oid opfamily,
 static Datum string_to_datum(const char *str, Oid datatype);
 static Const *string_to_const(const char *str, Oid datatype);
 
-
 /*
  * create_index_paths()
  *	  Generate all interesting index paths for the given relation.
@@ -1020,7 +1026,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	 * index data retrieval anyway.
 	 */
 	index_only_scan = (scantype != ST_BITMAPSCAN &&
-					   check_index_only(rel, index));
+					   check_index_only(root, rel, index));
 
 	/*
 	 * 4. Generate an indexscan path if there are relevant restriction clauses
@@ -1780,13 +1786,12 @@ find_list_position(Node *node, List **nodelist)
 	return i;
 }
 
-
 /*
  * check_index_only
  *		Determine whether an index-only scan is possible for this index.
  */
 static bool
-check_index_only(RelOptInfo *rel, IndexOptInfo *index)
+check_index_only(PlannerInfo *root, RelOptInfo *rel, IndexOptInfo *index)
 {
 	bool		result;
 	Bitmapset  *attrs_used = NULL;
@@ -1836,10 +1841,7 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 	{
 		int			attno = index->indexkeys[i];
 
-		/*
-		 * For the moment, we just ignore index expressions.  It might be nice
-		 * to do something with them, later.
-		 */
+		/* Handle expressions later */
 		if (attno == 0)
 			continue;
 
@@ -1852,12 +1854,210 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index)
 	/* Do we have all the necessary attributes? */
 	result = bms_is_subset(attrs_used, index_canreturn_attrs);
 
+	/* Check index expressions */
+	if (!result && index->indexprs)
+	{
+		/*
+		 * We need to check both: the target list and the clauses. If the
+		 * match the index
+		 */
+		result =
+			check_index_only_targetlist(root, rel, index, index_canreturn_attrs) &&
+			check_index_only_clauses(index->indrestrictinfo, index);
+	}
+
 	bms_free(attrs_used);
 	bms_free(index_canreturn_attrs);
 
 	return result;
 }
 
+typedef struct check_index_only_walker_ctx
+{
+	IndexOptInfo   *index;
+	Bitmapset	   *index_canreturn_attrs;	/* attributes that can be returned
+											 * by index */
+	bool			match;					/* TRUE if expression matches
+											 * index */
+} check_index_only_walker_ctx;
+
+/*
+ * check_index_only_expr_walker
+ * 		Recursive walker that checks if each part of the expression can be
+ *		build with index expressions or attributes
+ */
+static bool
+check_index_only_expr_walker(Node *node, check_index_only_walker_ctx *ctx)
+{
+	int i;
+
+	/*
+	 * If node is empty or some subexpression has already reported that it
+	 * isn't match the index then quit looking any further
+	 */
+	if (node == NULL || ctx->match == false)
+		return false;
+
+	/*
+	 * If this is a Var then check if it can be returned by the index
+	 */
+	if (IsA(node, Var))
+	{
+		Var *var = (Var *) node;
+
+		if (var->varno == ctx->index->rel->relid &&
+			!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
+						   ctx->index_canreturn_attrs))
+			ctx->match = false;
+		return false;
+	}
+
+	/*
+	 * Check if expression can be returned by index
+	 */
+	for (i = 0; i < ctx->index->ncolumns; i++)
+	{
+		if (match_index_to_operand(node, i, ctx->index))
+		{
+			/*
+			 * It's alright. This expression can be returned by index
+			 */
+			return false;
+		}
+	}
+
+	/*
+	 * Recursive check
+	 */
+	return expression_tree_walker(node, check_index_only_expr_walker, (void *) ctx);
+}
+
+/*
+ * check_index_only_targetlist
+ *		Checks that every target of index->relation can be returned by index
+ *
+ * In this function we're looking through root->parse->targetlist and pick
+ * those targets which contain index->relation's attributes. For each selected
+ * target we use recursive function check_index_only_expr_walker() to check if
+ * target can be build with the index expressions and attributes and none of
+ * the other index->relation's attributes
+ */
+static bool
+check_index_only_targetlist(PlannerInfo *root,
+							RelOptInfo *rel,
+							IndexOptInfo *index,
+							Bitmapset *index_canreturn_attrs)
+{
+	check_index_only_walker_ctx *ctx;
+	ListCell *lc;
+	bool	result = true;
+	bool	flag;
+
+	/* Check expressions from targetlist */
+	foreach(lc, root->parse->targetList)
+	{
+		TargetEntry *te = (TargetEntry *) lfirst(lc);
+		Bitmapset *varnos = pull_varnos((Node *) te->expr);
+
+		if (bms_is_member(rel->relid, varnos))
+		{
+			Bitmapset *attrs = NULL;
+
+			flag = false;
+
+			pull_varattnos((Node *) te->expr, rel->relid, &attrs);
+			if (bms_is_subset(attrs, index_canreturn_attrs))
+				flag = true;
+			else
+			{
+				ctx = (check_index_only_walker_ctx *) palloc(sizeof(check_index_only_walker_ctx));
+				ctx->index = index;
+				ctx->match = true;
+				ctx->index_canreturn_attrs = index_canreturn_attrs;
+				check_index_only_expr_walker((Node *) te->expr, ctx);
+
+				if (ctx->match)
+					flag = true;
+
+				pfree(ctx);
+			}
+			bms_free(attrs);
+
+			if (!flag)
+			{
+				result = false;
+				break;
+			}
+		}
+		bms_free(varnos);
+	}
+
+	return result;
+}
+
+/*
+ * check_index_only_clauses
+ *		Recursive function that checks that each clause matches the index
+ *
+ * clauses is a list of RestrictInfos or a BoolExpr containing RestrictInfos
+ */
+static bool
+check_index_only_clauses(List *clauses,
+						 IndexOptInfo *index)
+{
+	int 		indexcol;
+	bool		match;
+	ListCell   *lc;
+
+	foreach(lc, clauses)
+	{
+		Node *node = (Node *) lfirst(lc);
+
+		match = false;
+		/*
+		 * We expect here either a RestrictInfo or a boolean
+		 * expression containing other RestrictInfos
+		 */
+		if (IsA(node, RestrictInfo))
+		{
+			RestrictInfo *rinfo = (RestrictInfo *) node;
+
+			if (!restriction_is_or_clause(rinfo))
+			{
+				for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
+				{
+					if (match_clause_to_indexcol(index, indexcol, rinfo))
+					{
+						match = true;
+						break;
+					}
+				}
+			}
+			else
+			{
+				List *subclauses = ((BoolExpr *) rinfo->orclause)->args;
+
+				match = check_index_only_clauses(subclauses, index);
+			}
+		}
+		else if (IsA(node, BoolExpr))
+		{
+			BoolExpr *boolexpr = (BoolExpr *) node;
+
+			match = check_index_only_clauses(boolexpr->args, index);
+		}
+
+		/*
+		 * If at least one restriction doesn't match index then return false
+		 */
+		if (!match)
+			return false;
+	}
+
+	/* If we got here then everything's fine */
+	return true;
+}
+
 /*
  * get_loop_count
  *		Choose the loop count estimate to use for costing a parameterized path
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to