Re: [HACKERS] Get more from indices.

Kyotaro HORIGUCHI Tue, 19 Nov 2013 03:37:06 -0800

Hello, I've totally refactored the series of patches and cut out
the appropriate portion as 'unique (and non-nullable) index
stuff'.


As the discussion before, it got rid of path distinctness. This
patch works only on index 'full-orederedness', i.e., unique index
on non-nullable columns.

This patch itself does not so much. Will have power applied with
'Using indices for UNION' patch.


=== Making test table

create table t (a int not null, b int not null, c int, d text);
create unique index i_t_ab on t (a, b);
insert into t (select a / 5, 4 - (a % 5), a, 't' from generate_series(000000, 
099999) a);


=== Example 1.

not-patched=# explain select * from t order by a, b ,c limit 1;
>                               QUERY PLAN                              
> ----------------------------------------------------------------------
>  Limit  (cost=2041.00..2041.00 rows=1 width=14)
>    ->  Sort  (cost=2041.00..2291.00 rows=100000 width=14)
>          Sort Key: a, b, c
>          ->  Seq Scan on t  (cost=0.00..1541.00 rows=100000 width=14)

patched=# explain select * from t order by a, b ,c limit 1;
>                                   QUERY PLAN                             
> -----------------------------------------------------------------------------
>  Limit  (cost=0.29..0.33 rows=1 width=14)
>    ->  Index Scan using i_t_ab on t  (cost=0.29..3857.04 rows=100000 width=14)


=== Example 2.

not-patched=# explain select distinct * from t order by a limit 1;
>                                 QUERY PLAN                                 
> ---------------------------------------------------------------------------
>  Limit  (cost=1820.46..1820.47 rows=1 width=44)
>    ->  Sort  (cost=1820.46..1835.34 rows=5951 width=44)
>          Sort Key: a
>          ->  HashAggregate  (cost=1731.20..1790.71 rows=5951 width=44)
>                ->  Seq Scan on t  (cost=0.00..1136.10 rows=59510 width=44)

patched=# explain select distinct * from t order by a limit 1;
>                                      QUERY PLAN                               
>       
> ------------------------------------------------------------------------------------
>  Limit  (cost=0.29..1.09 rows=1 width=44)
>    ->  Unique  (cost=0.29..4756.04 rows=5951 width=44)
>          ->  Index Scan using i_t_ab on t  (cost=0.29..4160.94 rows=59510 
> width=44)

The unique node could be removed technically but it requires to
care the distinctness of path/plan. So it has been put out to
"Using indeces for UNION" patch.


Any comments?

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center

diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 606734a..43be0a5 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -953,7 +953,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 		index_pathkeys = build_index_pathkeys(root, index,
 											  ForwardScanDirection);
 		useful_pathkeys = truncate_useless_pathkeys(root, rel,
-													index_pathkeys);
+													index_pathkeys,
+													index->full_ordered);
 		orderbyclauses = NIL;
 		orderbyclausecols = NIL;
 	}
@@ -1015,7 +1016,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 		index_pathkeys = build_index_pathkeys(root, index,
 											  BackwardScanDirection);
 		useful_pathkeys = truncate_useless_pathkeys(root, rel,
-													index_pathkeys);
+													index_pathkeys,
+													index->full_ordered);
 		if (useful_pathkeys != NIL)
 		{
 			ipath = create_index_path(root, index,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 032b2cd..5d8ee04 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -369,6 +369,29 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 }
 
 /*
+ * path_is_ordered
+ * Return true if the path is apparently ordered on the pathkeys.
+ */
+bool
+path_is_ordered(Path *path, List *pathkeys)
+{
+	if (pathkeys_contained_in(pathkeys, path->pathkeys))
+		return true;
+
+	/*
+	 * If IndexPath is fully ordered, it is sufficiently ordered if index
+	 * pathkeys is a subset of target pathkeys
+	 */
+	if (pathkeys && path->pathkeys &&
+		IsA(path, IndexPath) &&
+		((IndexPath*)path)->indexinfo->full_ordered &&
+		(pathkeys_contained_in(path->pathkeys, pathkeys)))
+		return true;
+
+	return false;
+}
+
+/*
  * get_cheapest_fractional_path_for_pathkeys
  *	  Find the cheapest path (for retrieving a specified fraction of all
  *	  the tuples) that satisfies the given pathkeys and parameterization.
@@ -381,6 +404,8 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
  * 'pathkeys' represents a required ordering (in canonical form!)
  * 'required_outer' denotes allowable outer relations for parameterized paths
  * 'fraction' is the fraction of the total tuples expected to be retrieved
+ * paths->pathkeys might be replaced with pathkeys so as to declare that the
+ * path is ordered on it.
  */
 Path *
 get_cheapest_fractional_path_for_pathkeys(List *paths,
@@ -403,9 +428,17 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
 			compare_fractional_path_costs(matched_path, path, fraction) <= 0)
 			continue;
 
-		if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+		if (path_is_ordered(path, pathkeys) &&
 			bms_is_subset(PATH_REQ_OUTER(path), required_outer))
+		{
+			/*
+			 * Set required pathkeys as the path's pathkeys so as to declare
+			 * that this path is ordred on the pathkeys.
+			 */
+			if (list_length(path->pathkeys) < list_length(pathkeys))
+				path->pathkeys = pathkeys;
 			matched_path = path;
+		}
 	}
 	return matched_path;
 }
@@ -747,7 +780,7 @@ build_join_pathkeys(PlannerInfo *root,
 	 * contain pathkeys that were useful for forming this joinrel but are
 	 * uninteresting to higher levels.
 	 */
-	return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
+	return truncate_useless_pathkeys(root, joinrel, outer_pathkeys, false);
 }
 
 /****************************************************************************
@@ -1404,7 +1437,8 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
  * So the result is always either 0 or list_length(root->query_pathkeys).
  */
 static int
-pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys,
+							 bool pk_full_ordered)
 {
 	if (root->query_pathkeys == NIL)
 		return 0;				/* no special ordering requested */
@@ -1418,23 +1452,35 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
 		return list_length(root->query_pathkeys);
 	}
 
+	/*
+	 * Given that the pathkeys compose an unique key, they are useful for
+	 * ordering when they are a sublist of query_pathkeys.
+	 */
+	if (pk_full_ordered &&
+		pathkeys_contained_in(pathkeys, root->query_pathkeys))
+		return list_length(pathkeys);
+
 	return 0;					/* path ordering not useful */
 }
 
 /*
  * truncate_useless_pathkeys
  *		Shorten the given pathkey list to just the useful pathkeys.
+ *
+ * When pk_full_ordered is true, reconsider counting the uniqueness even if
+ * the pathkeys are once judged as useless by the general criteria.
  */
 List *
 truncate_useless_pathkeys(PlannerInfo *root,
 						  RelOptInfo *rel,
-						  List *pathkeys)
+						  List *pathkeys,
+						  bool pk_full_ordered)
 {
 	int			nuseful;
 	int			nuseful2;
 
 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
-	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys, pk_full_ordered);
 	if (nuseful2 > nuseful)
 		nuseful = nuseful2;
 
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 954666c..ee92545 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -231,6 +231,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 			 */
 			if (info->relam == BTREE_AM_OID)
 			{
+				bool has_nullable_keycol = false;
+
 				/*
 				 * If it's a btree index, we can use its opfamily OIDs
 				 * directly as the sort ordering opfamily OIDs.
@@ -244,10 +246,25 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 				for (i = 0; i < ncolumns; i++)
 				{
 					int16		opt = indexRelation->rd_indoption[i];
+					int16		keyattno = index->indkey.values[i];
 
 					info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
 					info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
-				}
+
+					if (keyattno > 0)
+					{
+						has_nullable_keycol |= 
+							!relation->rd_att->attrs[keyattno - 1]->attnotnull;
+					}
+					else if (keyattno != ObjectIdAttributeNumber)
+						has_nullable_keycol = true;
+ 				}
+
+				/* Check to see if it is a full-ordered index */
+				if (IndexIsValid(index) &&
+					index->indisunique && index->indimmediate &&
+					!has_nullable_keycol)
+					info->full_ordered = true;
 			}
 			else if (indexRelation->rd_am->amcanorder)
 			{
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 6d7b594..b38c667 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -524,6 +524,7 @@ typedef struct IndexOptInfo
 	bool		predOK;			/* true if predicate matches query */
 	bool		unique;			/* true if a unique index */
 	bool		immediate;		/* is uniqueness enforced immediately? */
+	bool		full_ordered;	/* is ordered rows not duplicate ? */
 	bool		hypothetical;	/* true if index doesn't really exist */
 	bool		canreturn;		/* can index return IndexTuples? */
 	bool		amcanorderbyop; /* does AM support order by operator result? */
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 96ffdb1..9e53b42 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -160,6 +160,7 @@ extern bool pathkeys_contained_in(List *keys1, List *keys2);
 extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 							   Relids required_outer,
 							   CostSelector cost_criterion);
+extern bool path_is_ordered(Path *path, List *pathkeys);
 extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
 										  List *pathkeys,
 										  Relids required_outer,
@@ -191,7 +192,8 @@ extern List *make_inner_pathkeys_for_merge(PlannerInfo *root,
 							  List *outer_pathkeys);
 extern List *truncate_useless_pathkeys(PlannerInfo *root,
 						  RelOptInfo *rel,
-						  List *pathkeys);
+						  List *pathkeys,
+						  bool pk_full_ordered);
 extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
 
 #endif   /* PATHS_H */
diff --git a/src/test/isolation/expected/drop-index-concurrently-1.out b/src/test/isolation/expected/drop-index-concurrently-1.out
index 75dff56..ab96fa0 100644
--- a/src/test/isolation/expected/drop-index-concurrently-1.out
+++ b/src/test/isolation/expected/drop-index-concurrently-1.out
@@ -19,10 +19,8 @@ Sort
 step explains: EXPLAIN (COSTS OFF) EXECUTE getrow_seq;
 QUERY PLAN     
 
-Sort           
-  Sort Key: id, data
-  ->  Seq Scan on test_dc
-        Filter: ((data)::text = '34'::text)
+Index Scan using test_dc_pkey on test_dc
+  Filter: ((data)::text = '34'::text)
 step select2: SELECT * FROM test_dc WHERE data=34 ORDER BY id,data;
 id             data

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] Get more from indices.

Reply via email to