Hello, This is the last episode of the 'dance with indices'?
series.
Unique indexes can sort the tuples in corresponding tables
prefectly. So this query might can use index.
> uniquetest=# create table t (a int, b int, c int, d text);
> uniquetest=# create unique index i_t_pkey on t(a, b);
> uniquetest=# insert into t
> (select a % 10, a / 10, a, 't' from generate_series(0, 100000) a);
> uniquetest=# analyze;
>
> uniquetest=# explain (costs off, analyze on) select distinct * from t;
> QUERY PLAN
> ---------------------------------------------------------------------------
> Unique (actual time=149.625..245.978 rows=100001 loops=1)
> -> Sort (actual time=149.624..199.806 rows=100001 loops=1)
> Sort Key: a, b, c, d
> Sort Method: external merge Disk: 2328kB
> -> Seq Scan on t (actual time=0.016..15.597 rows=100001 loops=1)
> Total runtime: 251.272 ms
With this patch, the plan for this query becomes as follows,
> uniquetest=# explain (costs off, analyze on) select distinct * from t;
> QUERY PLAN
> ---------------------------------------------------------------------
> Index Scan using i_t_pkey on t
> (actual time=0.019..32.457 rows=100001 loops=1)
> Total runtime: 37.488 ms
This only seems not so valuable to archive, but with my previous
MergeAppend for UNION patch, this will have more value.
> uniquetest=# create table pu1 (a int, b int, c int, d text);
> uniquetest=# create unique index pu1_pkey_idx on pu1 (a, b);
> uniquetest=# create table cu11 (like pu1 including all) inherits (pu1);
> uniquetest=# create table cu12 (like pu1 including all) inherits (pu1);
> uniquetest=# create table pu2 (like pu1 including all);
> uniquetest=# create table cu21 (like pu1 including all) inherits (pu2);
> uniquetest=# create table cu22 (like pu1 including all) inherits (pu2);
> uniquetest=# insert into cu11 (select a / 5, 4 - (a % 5), a, 'cu11'
> from generate_series(000000, 099999) a);
> uniquetest=# insert into cu12 (select a / 5, 4 - (a % 5), a, 'cu12'
> from generate_series(100000, 199999) a);
> uniquetest=# insert into cu21 (select a / 5, 4 - (a % 5), a, 'cu21'
> from generate_series(150000, 249999) a);
> uniquetest=# insert into cu22 (select a / 5, 4 - (a % 5), a, 'cu22'
> from generate_series(250000, 349999) a);
> uniquetest=# explain (analyze on, costs off)
> select * from pu1 union select * from pu2 order by a, b limit 100;
> QUERY PLAN
> --------------------------------------------------------------------------
> Limit (actual time=0.226..0.591 rows=100 loops=1)
> -> Unique (actual time=0.223..0.561 rows=100 loops=1)
> -> Merge Append (actual time=0.223..0.470 rows=100 loops=1)
> Sort Key: pu1.a, pu1.b, pu1.c, pu1.d
> -> Limit (actual time=0.125..0.326 rows=100 loops=1)
> -> Unique (actual time=0.125..0.303 rows=100 loops=1)
> -> Merge Append (actual time=0.123..0.220 rows=100 loops=1)
> Sort Key: pu1.a, pu1.b, pu1.c, pu1.d
> -> Index Scan using..on pu1 (actual time=0.005..0.005 rows=0 loops=1)
> -> Index Scan using..on cu11(actual time=0.071..0.128 rows=100
> loops=1)
> -> Index Scan using..on cu12 (actual time=0.045..0.045 rows=1 loops=1)
> -> Limit (actual time=0.096..0.096 rows=1 loops=1)
> -> Unique (actual time=0.096..0.096 rows=1 loops=1)
> -> Merge Append (actual time=0.094..0.094 rows=1 loops=1)
> Sort Key: pu2.a, pu2.b, pu2.c, pu2.d
> -> Index Scan using..on pu2 (actual time=0.002..0.002 rows=0 loops=1)
> -> Index Scan using..on cu21 (actual time=0.047..0.047 rows=1 loops=1)
> -> Index Scan using..on cu22 (actual time=0.043..0.043 rows=1 loops=1)
> Total runtime: 1.987 ms
On the other hand, what we will get from the unpatched PostgreSQL is,
> uniquetest=# explain (analyze on, costs off)
> select * from pu1 union select * from pu2 order by a, b limit 100;
> QUERY PLAN
> -------------------------------------------------------------------------
> Limit (actual time=535.620..535.706 rows=100 loops=1)
> -> Unique (actual time=535.618..535.695 rows=100 loops=1)
> -> Sort (actual time=535.617..535.637 rows=100 loops=1)
> Sort Key: pu1.a, pu1.b, pu1.c, pu1.d
> Sort Method: external sort Disk: 10568kB
> -> Append (actual time=0.012..144.144 rows=400000 loops=1)
> -> Append (actual time=0.012..49.570 rows=200000 loops=1)
> -> Seq Scan on pu1 (actual time=0.001..0.001 rows=0 loops=1)
> -> Seq Scan on cu11 (actual time=0.011..16.202 rows=100000 loops=1)
> -> Seq Scan on cu12 (actual time=0.008..16.334 rows=100000 loops=1)
> -> Append (actual time=0.010..54.052 rows=200000 loops=1)
> -> Seq Scan on pu2 (actual time=0.000..0.000 rows=0 loops=1)
> -> Seq Scan on cu21 (actual time=0.009..16.444 rows=100000 loops=1)
> -> Seq Scan on cu22 (actual time=0.021..18.923 rows=100000 loops=1)
> Total runtime: 537.765 ms
What do you think about this?
This could be realized by following modifications,
- Consider a query pathekeys is useful for ordering when a index
pathkey is a subset of that when the index is UNIQUE.
(build_index_paths, pathkeys_useful_for_ordering,
truncate_useless_pathkeys, grouping_planner)
- Judge a path is orderd or not counting the uniqueness of the
path.
- Regard IndexPath on UNIQUE index is reagarded uniquely
ordered.
- Regard MergeAppendPath of which all children is uniquely
orderd as (not necessarily uniquely) ordered.
(path_is_ordered)
- Judge the necessity of sorting on above
premises. (grouping_planner)
Needless to say, theses patches have no effect on any other set
operations than UNION(ALL). They are, INTERSECT(ALL),
EXCEPT(ALL).
regards,
--
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 606734a..bc1ab9a 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -953,7 +953,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
index_pathkeys = build_index_pathkeys(root, index,
ForwardScanDirection);
useful_pathkeys = truncate_useless_pathkeys(root, rel,
- index_pathkeys);
+ index_pathkeys,
+ index->unique);
orderbyclauses = NIL;
orderbyclausecols = NIL;
}
@@ -1015,7 +1016,8 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
index_pathkeys = build_index_pathkeys(root, index,
BackwardScanDirection);
useful_pathkeys = truncate_useless_pathkeys(root, rel,
- index_pathkeys);
+ index_pathkeys,
+ index->unique);
if (useful_pathkeys != NIL)
{
ipath = create_index_path(root, index,
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 6724996..87070e7 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -363,6 +363,68 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
}
/*
+ * path_is_uniquely_ordered
+ * Return true if the path is apparently uniquely ordered on the pathkeys.
+ */
+bool
+path_is_uniquely_ordered(Path *path, List *pathkeys)
+{
+ if (pathkeys && path->pathkeys &&
+ IsA(path, IndexPath) &&
+ ((IndexPath*)path)->indexinfo->unique &&
+ (pathkeys_contained_in(path->pathkeys, pathkeys)))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+
+/*
+ * path_is_ordered
+ * Return true if the path is apparently ordered on the pathkeys. The given
+ * path might be modified so that it will be recognized later as sufficiently
+ * ordered.
+ */
+bool
+path_is_ordered(Path *path, List *pathkeys)
+{
+ if (pathkeys_contained_in(pathkeys, path->pathkeys))
+ return true;
+
+ /* path is obviously ordered when uniquely ordered */
+ if (path_is_uniquely_ordered(path, pathkeys))
+ return true;
+
+ /*
+ * MergeAppendPath's pathkeys can be replaced by arbitrary pathkeys when
+ * all subpaths are uniquely ordered on the target pathkeys.
+ */
+ if (IsA(path, MergeAppendPath))
+ {
+ ListCell *lc;
+ MergeAppendPath *mpath = (MergeAppendPath *)path;
+
+ foreach (lc, mpath->subpaths)
+ {
+ Path *subpath = (Path *) lfirst(lc);
+ if (!path_is_uniquely_ordered(subpath, pathkeys)) break;
+ }
+ if (!lc)
+ {
+ /*
+ * This MergeAppendPath is sufficiently ordered on the target
+ * pathkeys. Reflect that on this path.
+ */
+ mpath->path.pathkeys = pathkeys;
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
* get_cheapest_fractional_path_for_pathkeys
* Find the cheapest path (for retrieving a specified fraction of all
* the tuples) that satisfies the given pathkeys and parameterization.
@@ -397,7 +459,7 @@ get_cheapest_fractional_path_for_pathkeys(List *paths,
compare_fractional_path_costs(matched_path, path, fraction) <= 0)
continue;
- if (pathkeys_contained_in(pathkeys, path->pathkeys) &&
+ if (path_is_ordered(path, pathkeys) &&
bms_is_subset(PATH_REQ_OUTER(path), required_outer))
matched_path = path;
}
@@ -733,7 +795,7 @@ build_join_pathkeys(PlannerInfo *root,
* contain pathkeys that were useful for forming this joinrel but are
* uninteresting to higher levels.
*/
- return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
+ return truncate_useless_pathkeys(root, joinrel, outer_pathkeys, false);
}
/****************************************************************************
@@ -1378,9 +1440,12 @@ right_merge_direction(PlannerInfo *root, PathKey *pathkey)
* Unlike merge pathkeys, this is an all-or-nothing affair: it does us
* no good to order by just the first key(s) of the requested ordering.
* So the result is always either 0 or list_length(root->query_pathkeys).
+ * pk_uniq can be true when pathkeys are unique key itself. Tuples are
+ * sufficiently ordered when the pathkeys are the subset of the
+ * query_pathkeys for the case.
*/
static int
-pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
+pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys, bool pk_uniq)
{
if (root->query_pathkeys == NIL)
return 0; /* no special ordering requested */
@@ -1394,23 +1459,34 @@ pathkeys_useful_for_ordering(PlannerInfo *root, List *pathkeys)
return list_length(root->query_pathkeys);
}
+ /*
+ * Given that the pathkeys compose an unique key, they are useful for
+ * ordering when they are a sublist of query_pathkeys.
+ */
+ if (pk_uniq && pathkeys_contained_in(pathkeys, root->query_pathkeys))
+ return list_length(pathkeys);
+
return 0; /* path ordering not useful */
}
/*
* truncate_useless_pathkeys
* Shorten the given pathkey list to just the useful pathkeys.
+ *
+ * When pathkeys_unique is true, reconsider counting the uniqueness even if
+ * the pathkeys are once judged as useless by the general criteria.
*/
List *
truncate_useless_pathkeys(PlannerInfo *root,
RelOptInfo *rel,
- List *pathkeys)
+ List *pathkeys,
+ bool pathkeys_unique)
{
int nuseful;
int nuseful2;
nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
- nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
+ nuseful2 = pathkeys_useful_for_ordering(root, pathkeys, pathkeys_unique);
if (nuseful2 > nuseful)
nuseful = nuseful2;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 9b9eb2f..7b1490f 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -809,7 +809,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path)
numsortkeys * sizeof(bool)) == 0);
/* Now, insert a Sort node if subplan isn't sufficiently ordered */
- if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+ if (!path_is_ordered(subpath, pathkeys))
subplan = (Plan *) make_sort(root, subplan, numsortkeys,
sortColIdx, sortOperators,
collations, nullsFirst,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 86abdf6..1ce04ac 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -258,6 +258,31 @@ standard_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
return result;
}
+/*
+ * Check if a path is sufficiently ordered on target pathkeys. This function
+ * is intended to be used as a replace for pathkeys_contained_in() when the
+ * focused path might be uniquely ordered on the current_pathkeys.
+ */
+static bool
+sort_needed(List *current_pathkeys, bool path_is_unique, List *target_pathkeys)
+{
+ /*
+ * If target pathkeys are a sublist of current pathkeys, the path is
+ * ordered also on target pathkeys
+ */
+ if (pathkeys_contained_in(target_pathkeys, current_pathkeys))
+ return false;
+
+ /*
+ * If a plan is uniquely ordered on current_pathkeys, it is sufficiently
+ * ordered on any pathkeys containing it.
+ */
+ if (path_is_unique && current_pathkeys &&
+ pathkeys_contained_in(current_pathkeys, target_pathkeys))
+ return false;
+
+ return true;
+}
/*--------------------
* subquery_planner
@@ -1043,6 +1068,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
double dNumGroups = 0;
bool use_hashed_distinct = false;
bool tested_hashed_distinct = false;
+ bool result_plan_is_unique = false;
/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
if (parse->limitCount || parse->limitOffset)
@@ -1451,18 +1477,24 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
result_plan = create_plan(root, best_path);
current_pathkeys = best_path->pathkeys;
+ result_plan_is_unique =
+ path_is_uniquely_ordered(best_path, root->query_pathkeys);
/* Detect if we'll need an explicit sort for grouping */
- if (parse->groupClause && !use_hashed_grouping &&
- !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
+ if (parse->groupClause && !use_hashed_grouping)
{
- need_sort_for_grouping = true;
+ if (path_is_ordered(best_path, root->group_pathkeys))
+ current_pathkeys = root->group_pathkeys;
+ else
+ {
+ need_sort_for_grouping = true;
- /*
- * Always override create_plan's tlist, so that we don't sort
- * useless data from a "physical" tlist.
- */
- need_tlist_eval = true;
+ /*
+ * Always override create_plan's tlist, so that we don't
+ * sort useless data from a "physical" tlist.
+ */
+ need_tlist_eval = true;
+ }
}
/*
@@ -1575,6 +1607,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
extract_grouping_ops(parse->groupClause),
numGroups,
result_plan);
+
+ /* Aggregation might break the tuple uniqueness. */
+ result_plan_is_unique = false;
}
else if (parse->groupClause)
{
@@ -1603,7 +1638,11 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
extract_grouping_ops(parse->groupClause),
dNumGroups,
result_plan);
- /* The Group node won't change sort ordering */
+ /*
+ * The Group node won't change sort ordering, however might
+ * break the uniqueness.
+ */
+ result_plan_is_unique = false;
}
else if (root->hasHavingQual)
{
@@ -1713,8 +1752,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
result_plan,
window_pathkeys,
-1.0);
- if (!pathkeys_contained_in(window_pathkeys,
- current_pathkeys))
+
+ if (sort_needed(current_pathkeys, result_plan_is_unique,
+ window_pathkeys))
{
/* we do indeed need to sort */
result_plan = (Plan *) sort_plan;
@@ -1770,6 +1810,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
wc->startOffset,
wc->endOffset,
result_plan);
+ /* Aggregation might break the tuple uniqueness. */
+ result_plan_is_unique = false;
}
}
} /* end of if (setOperations) */
@@ -1855,8 +1897,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
needed_pathkeys = root->sort_pathkeys;
else
needed_pathkeys = root->distinct_pathkeys;
-
- if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
+
+ if (sort_needed(current_pathkeys, result_plan_is_unique,
+ needed_pathkeys))
{
if (list_length(root->distinct_pathkeys) >=
list_length(root->sort_pathkeys))
@@ -1875,8 +1918,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
-1.0);
}
- result_plan = (Plan *) make_unique(result_plan,
- parse->distinctClause);
+ if (!result_plan_is_unique)
+ result_plan = (Plan *) make_unique(result_plan,
+ parse->distinctClause);
result_plan->plan_rows = dNumDistinctRows;
/* The Unique node won't change sort ordering */
}
@@ -1888,7 +1932,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
*/
if (parse->sortClause)
{
- if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
+ if (sort_needed(current_pathkeys, result_plan_is_unique,
+ root->sort_pathkeys))
{
result_plan = (Plan *) make_sort_from_pathkeys(root,
result_plan,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 9ef93c7..8e6d0e7 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -156,6 +156,8 @@ typedef enum
extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
extern bool pathkeys_contained_in(List *keys1, List *keys2);
+extern bool path_is_ordered(Path *path, List *pathkeys);
+extern bool path_is_uniquely_ordered(Path *path, List *pathkeys);
extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
Relids required_outer,
CostSelector cost_criterion);
@@ -190,7 +192,8 @@ extern List *make_inner_pathkeys_for_merge(PlannerInfo *root,
List *outer_pathkeys);
extern List *truncate_useless_pathkeys(PlannerInfo *root,
RelOptInfo *rel,
- List *pathkeys);
+ List *pathkeys,
+ bool pathkeys_unique);
extern bool has_useful_pathkeys(PlannerInfo *root, RelOptInfo *rel);
#endif /* PATHS_H */
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers