Re: Index Skip Scan

Jesper Pedersen Fri, 02 Aug 2019 09:03:43 -0700

Hi,

On 8/2/19 8:14 AM, Dmitry Dolgov wrote:

And this too (slightly rewritten:). We will soon post the new version of patch
with updates about UniqueKey from Jesper.


Yes.

We decided to send this now, although there is still feedback from Davidthat needs to be considered/acted on.

The patches can be reviewed independently, but we will send them as aset from now on. Development of UniqueKey will be kept separate though [1].

Note, that while UniqueKey can form the foundation of optimizations forGROUP BY queries it isn't the focus for this patch series. Contributionsare very welcomed of course.


[1] https://github.com/jesperpedersen/postgres/tree/uniquekey

Best regards,
 Jesper

>From 35018a382d792d6ceeb8d0e9d16bc14ea2e3f148 Mon Sep 17 00:00:00 2001
From: jesperpedersen <[email protected]>
Date: Fri, 2 Aug 2019 07:52:08 -0400
Subject: [PATCH 1/2] Unique key

Design by David Rowley.

Author: Jesper Pedersen
---
 src/backend/nodes/outfuncs.c           |  14 +++
 src/backend/nodes/print.c              |  39 +++++++
 src/backend/optimizer/path/Makefile    |   2 +-
 src/backend/optimizer/path/allpaths.c  |   8 ++
 src/backend/optimizer/path/costsize.c  |   5 +
 src/backend/optimizer/path/indxpath.c  |  41 +++++++
 src/backend/optimizer/path/pathkeys.c  |  72 ++++++++++--
 src/backend/optimizer/path/uniquekey.c | 147 +++++++++++++++++++++++++
 src/backend/optimizer/plan/planner.c   |  18 ++-
 src/backend/optimizer/util/pathnode.c  |  12 ++
 src/backend/optimizer/util/tlist.c     |   1 -
 src/include/nodes/nodes.h              |   1 +
 src/include/nodes/pathnodes.h          |  18 +++
 src/include/nodes/print.h              |   2 +-
 src/include/optimizer/pathnode.h       |   1 +
 src/include/optimizer/paths.h          |  11 ++
 16 files changed, 377 insertions(+), 15 deletions(-)
 create mode 100644 src/backend/optimizer/path/uniquekey.c

diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index e6ce8e2110..9a4f3e8e4b 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -1720,6 +1720,7 @@ _outPathInfo(StringInfo str, const Path *node)
 	WRITE_FLOAT_FIELD(startup_cost, "%.2f");
 	WRITE_FLOAT_FIELD(total_cost, "%.2f");
 	WRITE_NODE_FIELD(pathkeys);
+	WRITE_NODE_FIELD(uniquekeys);
 }
 
 /*
@@ -2201,6 +2202,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
 	WRITE_NODE_FIELD(eq_classes);
 	WRITE_BOOL_FIELD(ec_merging_done);
 	WRITE_NODE_FIELD(canon_pathkeys);
+	WRITE_NODE_FIELD(canon_uniquekeys);
 	WRITE_NODE_FIELD(left_join_clauses);
 	WRITE_NODE_FIELD(right_join_clauses);
 	WRITE_NODE_FIELD(full_join_clauses);
@@ -2210,6 +2212,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
 	WRITE_NODE_FIELD(placeholder_list);
 	WRITE_NODE_FIELD(fkey_list);
 	WRITE_NODE_FIELD(query_pathkeys);
+	WRITE_NODE_FIELD(query_uniquekeys);
 	WRITE_NODE_FIELD(group_pathkeys);
 	WRITE_NODE_FIELD(window_pathkeys);
 	WRITE_NODE_FIELD(distinct_pathkeys);
@@ -2397,6 +2400,14 @@ _outPathKey(StringInfo str, const PathKey *node)
 	WRITE_BOOL_FIELD(pk_nulls_first);
 }
 
+static void
+_outUniqueKey(StringInfo str, const UniqueKey *node)
+{
+	WRITE_NODE_TYPE("UNIQUEKEY");
+
+	WRITE_NODE_FIELD(eq_clause);
+}
+
 static void
 _outPathTarget(StringInfo str, const PathTarget *node)
 {
@@ -4073,6 +4084,9 @@ outNode(StringInfo str, const void *obj)
 			case T_PathKey:
 				_outPathKey(str, obj);
 				break;
+			case T_UniqueKey:
+				_outUniqueKey(str, obj);
+				break;
 			case T_PathTarget:
 				_outPathTarget(str, obj);
 				break;
diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c
index 4ecde6b421..62c9d4ef7a 100644
--- a/src/backend/nodes/print.c
+++ b/src/backend/nodes/print.c
@@ -459,6 +459,45 @@ print_pathkeys(const List *pathkeys, const List *rtable)
 	printf(")\n");
 }
 
+/*
+ * print_uniquekeys -
+ *	  unique_key an UniqueKey
+ */
+void
+print_uniquekeys(const List *uniquekeys, const List *rtable)
+{
+	ListCell   *l;
+
+	printf("(");
+	foreach(l, uniquekeys)
+	{
+		UniqueKey *unique_key = (UniqueKey *) lfirst(l);
+		EquivalenceClass *eclass = (EquivalenceClass *) unique_key->eq_clause;
+		ListCell   *k;
+		bool		first = true;
+
+		/* chase up */
+		while (eclass->ec_merged)
+			eclass = eclass->ec_merged;
+
+		printf("(");
+		foreach(k, eclass->ec_members)
+		{
+			EquivalenceMember *mem = (EquivalenceMember *) lfirst(k);
+
+			if (first)
+				first = false;
+			else
+				printf(", ");
+			print_expr((Node *) mem->em_expr, rtable);
+		}
+		printf(")");
+		if (lnext(uniquekeys, l))
+			printf(", ");
+	}
+	printf(")\n");
+}
+
 /*
  * print_tl
  *	  print targetlist in a more legible way.
diff --git a/src/backend/optimizer/path/Makefile b/src/backend/optimizer/path/Makefile
index 6864a62132..8249a6b6db 100644
--- a/src/backend/optimizer/path/Makefile
+++ b/src/backend/optimizer/path/Makefile
@@ -13,6 +13,6 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = allpaths.o clausesel.o costsize.o equivclass.o indxpath.o \
-       joinpath.o joinrels.o pathkeys.o tidpath.o
+       joinpath.o joinrels.o pathkeys.o tidpath.o uniquekey.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index e9ee32b7f4..6f4c25f7dd 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -3957,6 +3957,14 @@ print_path(PlannerInfo *root, Path *path, int indent)
 		print_pathkeys(path->pathkeys, root->parse->rtable);
 	}
 
+	if (path->uniquekeys)
+	{
+		for (i = 0; i < indent; i++)
+			printf("\t");
+		printf("  uniquekeys: ");
+		print_uniquekeys(path->uniquekeys, root->parse->rtable);
+	}
+
 	if (join)
 	{
 		JoinPath   *jp = (JoinPath *) path;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 3a9a994733..2565dcf296 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -705,6 +705,11 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 		path->path.parallel_aware = true;
 	}
 
+	/* Consider cost based on unique key */
+	if (path->path.uniquekeys)
+	{
+	}
+
 	/*
 	 * Now interpolate based on estimated index order correlation to get total
 	 * disk I/O cost for main table accesses.
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 5f339fdfde..4b90dd378a 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -189,6 +189,7 @@ static Expr *match_clause_to_ordering_op(IndexOptInfo *index,
 static bool ec_member_matches_indexcol(PlannerInfo *root, RelOptInfo *rel,
 									   EquivalenceClass *ec, EquivalenceMember *em,
 									   void *arg);
+static List *get_uniquekeys_for_index(PlannerInfo *root, List *pathkeys);
 
 
 /*
@@ -874,6 +875,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	List	   *orderbyclausecols;
 	List	   *index_pathkeys;
 	List	   *useful_pathkeys;
+	List	   *useful_uniquekeys;
 	bool		found_lower_saop_clause;
 	bool		pathkeys_possibly_useful;
 	bool		index_is_ordered;
@@ -1036,11 +1038,15 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 	if (index_clauses != NIL || useful_pathkeys != NIL || useful_predicate ||
 		index_only_scan)
 	{
+		if (has_useful_uniquekeys(root))
+			useful_uniquekeys = get_uniquekeys_for_index(root, useful_pathkeys);
+
 		ipath = create_index_path(root, index,
 								  index_clauses,
 								  orderbyclauses,
 								  orderbyclausecols,
 								  useful_pathkeys,
+								  useful_uniquekeys,
 								  index_is_ordered ?
 								  ForwardScanDirection :
 								  NoMovementScanDirection,
@@ -1063,6 +1069,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 									  orderbyclauses,
 									  orderbyclausecols,
 									  useful_pathkeys,
+									  useful_uniquekeys,
 									  index_is_ordered ?
 									  ForwardScanDirection :
 									  NoMovementScanDirection,
@@ -1093,11 +1100,15 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 													index_pathkeys);
 		if (useful_pathkeys != NIL)
 		{
+			if (has_useful_uniquekeys(root))
+				useful_uniquekeys = get_uniquekeys_for_index(root, useful_pathkeys);
+
 			ipath = create_index_path(root, index,
 									  index_clauses,
 									  NIL,
 									  NIL,
 									  useful_pathkeys,
+									  useful_uniquekeys,
 									  BackwardScanDirection,
 									  index_only_scan,
 									  outer_relids,
@@ -1115,6 +1126,7 @@ build_index_paths(PlannerInfo *root, RelOptInfo *rel,
 										  NIL,
 										  NIL,
 										  useful_pathkeys,
+										  useful_uniquekeys,
 										  BackwardScanDirection,
 										  index_only_scan,
 										  outer_relids,
@@ -3369,6 +3381,35 @@ match_clause_to_ordering_op(IndexOptInfo *index,
 	return clause;
 }
 
+/*
+ * get_uniquekeys_for_index
+ */
+static List *
+get_uniquekeys_for_index(PlannerInfo *root, List *pathkeys)
+{
+	ListCell *lc;
+
+	if (pathkeys)
+	{
+		List *uniquekeys = NIL;
+		foreach(lc, pathkeys)
+		{
+			UniqueKey *unique_key;
+			PathKey *pk = (PathKey *) lfirst(lc);
+			EquivalenceClass *ec = (EquivalenceClass *) pk->pk_eclass;
+
+			unique_key = makeNode(UniqueKey);
+			unique_key->eq_clause = ec;
+			
+			lappend(uniquekeys, unique_key);
+		}
+
+		if (uniquekeys_contained_in(root->canon_uniquekeys, uniquekeys))
+			return uniquekeys;
+	}
+
+	return NIL;
+}
 
 /****************************************************************************
  *				----  ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS	----
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 2f4fea241a..0cba366c06 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -29,6 +29,7 @@
 #include "utils/lsyscache.h"
 
 
+static bool pathkey_is_unique(PathKey *new_pathkey, List *pathkeys);
 static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys);
 static bool matches_boolean_partition_clause(RestrictInfo *rinfo,
 											 RelOptInfo *partrel,
@@ -96,6 +97,30 @@ make_canonical_pathkey(PlannerInfo *root,
 	return pk;
 }
 
+/*
+ * pathkey_is_unique
+ *	   Part of pathkey_is_redundant, that is reponsible for the case, when the
+ *	   new pathkey's equivalence class is the same as that of any existing
+ *	   member of the pathkey list.
+ */
+static bool
+pathkey_is_unique(PathKey *new_pathkey, List *pathkeys)
+{
+	EquivalenceClass *new_ec = new_pathkey->pk_eclass;
+	ListCell   *lc;
+
+	/* If same EC already is already in the list, then not unique */
+	foreach(lc, pathkeys)
+	{
+		PathKey    *old_pathkey = (PathKey *) lfirst(lc);
+
+		if (new_ec == old_pathkey->pk_eclass)
+			return false;
+	}
+
+	return true;
+}
+
 /*
  * pathkey_is_redundant
  *	   Is a pathkey redundant with one already in the given list?
@@ -135,22 +160,12 @@ static bool
 pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys)
 {
 	EquivalenceClass *new_ec = new_pathkey->pk_eclass;
-	ListCell   *lc;
 
 	/* Check for EC containing a constant --- unconditionally redundant */
 	if (EC_MUST_BE_REDUNDANT(new_ec))
 		return true;
 
-	/* If same EC already used in list, then redundant */
-	foreach(lc, pathkeys)
-	{
-		PathKey    *old_pathkey = (PathKey *) lfirst(lc);
-
-		if (new_ec == old_pathkey->pk_eclass)
-			return true;
-	}
-
-	return false;
+	return !pathkey_is_unique(new_pathkey, pathkeys);
 }
 
 /*
@@ -1098,6 +1113,41 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 	return pathkeys;
 }
 
+/*
+ * make_pathkeys_for_uniquekeyclauses
+ *		Generate a pathkeys list to be used for uniquekey clauses
+ */
+List *
+make_pathkeys_for_uniquekeys(PlannerInfo *root,
+							 List *sortclauses,
+							 List *tlist)
+{
+	List	   *pathkeys = NIL;
+	ListCell   *l;
+
+	foreach(l, sortclauses)
+	{
+		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
+		Expr	   *sortkey;
+		PathKey    *pathkey;
+
+		sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
+		Assert(OidIsValid(sortcl->sortop));
+		pathkey = make_pathkey_from_sortop(root,
+										   sortkey,
+										   root->nullable_baserels,
+										   sortcl->sortop,
+										   sortcl->nulls_first,
+										   sortcl->tleSortGroupRef,
+										   true);
+
+		if (pathkey_is_unique(pathkey, pathkeys))
+			pathkeys = lappend(pathkeys, pathkey);
+	}
+
+	return pathkeys;
+}
+
 /****************************************************************************
  *		PATHKEYS AND MERGECLAUSES
  ****************************************************************************/
diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c
new file mode 100644
index 0000000000..13d4ebb98c
--- /dev/null
+++ b/src/backend/optimizer/path/uniquekey.c
@@ -0,0 +1,147 @@
+/*-------------------------------------------------------------------------
+ *
+ * uniquekey.c
+ *	  Utilities for matching and building unique keys
+ *
+ * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/optimizer/path/uniquekey.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "nodes/pg_list.h"
+
+static UniqueKey *make_canonical_uniquekey(PlannerInfo *root, EquivalenceClass *eclass);
+
+/*
+ * Build a list of unique keys
+ */
+List*
+build_uniquekeys(PlannerInfo *root, List *sortclauses)
+{
+	List *result = NIL;
+	List *sortkeys;
+	ListCell *l;
+
+	sortkeys = make_pathkeys_for_uniquekeys(root,
+											sortclauses,
+											root->processed_tlist);
+
+	/* Create a uniquekey and add it to the list */
+	foreach(l, sortkeys)
+	{
+		PathKey    *pathkey = (PathKey *) lfirst(l);
+		EquivalenceClass *ec = pathkey->pk_eclass;
+		UniqueKey *unique_key = make_canonical_uniquekey(root, ec);
+
+		result = lappend(result, unique_key);
+	}
+
+	return result;
+}
+
+/*
+ * uniquekeys_contained_in
+ *	  Are the keys2 included in the keys1 superset
+ */
+bool
+uniquekeys_contained_in(List *keys1, List *keys2)
+{
+	ListCell   *key1,
+			   *key2;
+
+	/*
+	 * Fall out quickly if we are passed two identical lists.  This mostly
+	 * catches the case where both are NIL, but that's common enough to
+	 * warrant the test.
+	 */
+	if (keys1 == keys2)
+		return true;
+
+	foreach(key2, keys2)
+	{
+		bool found = false;
+		UniqueKey  *uniquekey2 = (UniqueKey *) lfirst(key2);
+
+		foreach(key1, keys1)
+		{
+			UniqueKey  *uniquekey1 = (UniqueKey *) lfirst(key1);
+
+			if (uniquekey1->eq_clause == uniquekey2->eq_clause)
+			{
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * has_useful_uniquekeys
+ *		Detect whether the planner could have any uniquekeys that are
+ *		useful.
+ */
+bool
+has_useful_uniquekeys(PlannerInfo *root)
+{
+	if (root->query_uniquekeys != NIL)
+		return true;	/* there are some */
+	return false;		/* definitely useless */
+}
+
+/*
+ * make_canonical_uniquekey
+ *	  Given the parameters for a UniqueKey, find any pre-existing matching
+ *	  uniquekey in the query's list of "canonical" uniquekeys.  Make a new
+ *	  entry if there's not one already.
+ *
+ * Note that this function must not be used until after we have completed
+ * merging EquivalenceClasses.  (We don't try to enforce that here; instead,
+ * equivclass.c will complain if a merge occurs after root->canon_uniquekeys
+ * has become nonempty.)
+ */
+static UniqueKey *
+make_canonical_uniquekey(PlannerInfo *root,
+						 EquivalenceClass *eclass)
+{
+	UniqueKey  *uk;
+	ListCell   *lc;
+	MemoryContext oldcontext;
+
+	/* The passed eclass might be non-canonical, so chase up to the top */
+	while (eclass->ec_merged)
+		eclass = eclass->ec_merged;
+
+	foreach(lc, root->canon_uniquekeys)
+	{
+		uk = (UniqueKey *) lfirst(lc);
+		if (eclass == uk->eq_clause)
+			return uk;
+	}
+
+	/*
+	 * Be sure canonical uniquekeys are allocated in the main planning context.
+	 * Not an issue in normal planning, but it is for GEQO.
+	 */
+	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+
+	uk = makeNode(UniqueKey);
+	uk->eq_clause = eclass;
+
+	root->canon_uniquekeys = lappend(root->canon_uniquekeys, uk);
+
+	MemoryContextSwitchTo(oldcontext);
+
+	return uk;
+}
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 8f51f59f8a..5ee9ee6595 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3656,15 +3656,31 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 	 * much easier, since we know that the parser ensured that one is a
 	 * superset of the other.
 	 */
+	root->canon_uniquekeys = NIL;
+	root->query_uniquekeys = NIL;
+
 	if (root->group_pathkeys)
+	{
 		root->query_pathkeys = root->group_pathkeys;
+
+		if (!root->parse->hasAggs)
+			root->query_uniquekeys = build_uniquekeys(root, qp_extra->groupClause);
+	}
 	else if (root->window_pathkeys)
 		root->query_pathkeys = root->window_pathkeys;
 	else if (list_length(root->distinct_pathkeys) >
 			 list_length(root->sort_pathkeys))
+	{
 		root->query_pathkeys = root->distinct_pathkeys;
+		root->query_uniquekeys = build_uniquekeys(root, parse->distinctClause);
+	}
 	else if (root->sort_pathkeys)
+	{
 		root->query_pathkeys = root->sort_pathkeys;
+
+		if (root->distinct_pathkeys)
+			root->query_uniquekeys = build_uniquekeys(root, parse->distinctClause);
+	}
 	else
 		root->query_pathkeys = NIL;
 }
@@ -6222,7 +6238,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
 
 	/* Estimate the cost of index scan */
 	indexScanPath = create_index_path(root, indexInfo,
-									  NIL, NIL, NIL, NIL,
+									  NIL, NIL, NIL, NIL, NIL,
 									  ForwardScanDirection, false,
 									  NULL, 1.0, false);
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 0ac73984d2..ac0b937895 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -941,6 +941,7 @@ create_seqscan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = parallel_workers;
 	pathnode->pathkeys = NIL;	/* seqscan has unordered result */
+	pathnode->uniquekeys = NIL;
 
 	cost_seqscan(pathnode, root, rel, pathnode->param_info);
 
@@ -965,6 +966,7 @@ create_samplescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
 	pathnode->pathkeys = NIL;	/* samplescan has unordered result */
+	pathnode->uniquekeys = NIL;
 
 	cost_samplescan(pathnode, root, rel, pathnode->param_info);
 
@@ -1001,6 +1003,7 @@ create_index_path(PlannerInfo *root,
 				  List *indexorderbys,
 				  List *indexorderbycols,
 				  List *pathkeys,
+				  List *uniquekeys,
 				  ScanDirection indexscandir,
 				  bool indexonly,
 				  Relids required_outer,
@@ -1019,6 +1022,7 @@ create_index_path(PlannerInfo *root,
 	pathnode->path.parallel_safe = rel->consider_parallel;
 	pathnode->path.parallel_workers = 0;
 	pathnode->path.pathkeys = pathkeys;
+	pathnode->path.uniquekeys = uniquekeys;
 
 	pathnode->indexinfo = index;
 	pathnode->indexclauses = indexclauses;
@@ -1062,6 +1066,7 @@ create_bitmap_heap_path(PlannerInfo *root,
 	pathnode->path.parallel_safe = rel->consider_parallel;
 	pathnode->path.parallel_workers = parallel_degree;
 	pathnode->path.pathkeys = NIL;	/* always unordered */
+	pathnode->path.uniquekeys = NIL;
 
 	pathnode->bitmapqual = bitmapqual;
 
@@ -1923,6 +1928,7 @@ create_functionscan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
 	pathnode->pathkeys = pathkeys;
+	pathnode->uniquekeys = NIL;
 
 	cost_functionscan(pathnode, root, rel, pathnode->param_info);
 
@@ -1949,6 +1955,7 @@ create_tablefuncscan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
 	pathnode->pathkeys = NIL;	/* result is always unordered */
+	pathnode->uniquekeys = NIL;
 
 	cost_tablefuncscan(pathnode, root, rel, pathnode->param_info);
 
@@ -1975,6 +1982,7 @@ create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
 	pathnode->pathkeys = NIL;	/* result is always unordered */
+	pathnode->uniquekeys = NIL;
 
 	cost_valuesscan(pathnode, root, rel, pathnode->param_info);
 
@@ -2000,6 +2008,7 @@ create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, Relids required_outer)
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
 	pathnode->pathkeys = NIL;	/* XXX for now, result is always unordered */
+	pathnode->uniquekeys = NIL;
 
 	cost_ctescan(pathnode, root, rel, pathnode->param_info);
 
@@ -2026,6 +2035,7 @@ create_namedtuplestorescan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
 	pathnode->pathkeys = NIL;	/* result is always unordered */
+	pathnode->uniquekeys = NIL;
 
 	cost_namedtuplestorescan(pathnode, root, rel, pathnode->param_info);
 
@@ -2052,6 +2062,7 @@ create_resultscan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
 	pathnode->pathkeys = NIL;	/* result is always unordered */
+	pathnode->uniquekeys = NIL;
 
 	cost_resultscan(pathnode, root, rel, pathnode->param_info);
 
@@ -2078,6 +2089,7 @@ create_worktablescan_path(PlannerInfo *root, RelOptInfo *rel,
 	pathnode->parallel_safe = rel->consider_parallel;
 	pathnode->parallel_workers = 0;
 	pathnode->pathkeys = NIL;	/* result is always unordered */
+	pathnode->uniquekeys = NIL;
 
 	/* Cost is the same as for a regular CTE scan */
 	cost_ctescan(pathnode, root, rel, pathnode->param_info);
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 7ccb10e4e1..618032e82c 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -427,7 +427,6 @@ get_sortgrouplist_exprs(List *sgClauses, List *targetList)
 	return result;
 }
 
-
 /*****************************************************************************
  *		Functions to extract data from a list of SortGroupClauses
  *
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 4e2fb39105..a9b67c64f8 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -261,6 +261,7 @@ typedef enum NodeTag
 	T_EquivalenceMember,
 	T_PathKey,
 	T_PathTarget,
+	T_UniqueKey,
 	T_RestrictInfo,
 	T_IndexClause,
 	T_PlaceHolderVar,
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index e3c579ee44..c1d6f33fc0 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -269,6 +269,8 @@ struct PlannerInfo
 
 	List	   *canon_pathkeys; /* list of "canonical" PathKeys */
 
+	List	   *canon_uniquekeys; /* list of "canonical" UniqueKeys */
+
 	List	   *left_join_clauses;	/* list of RestrictInfos for mergejoinable
 									 * outer join clauses w/nonnullable var on
 									 * left */
@@ -297,6 +299,8 @@ struct PlannerInfo
 
 	List	   *query_pathkeys; /* desired pathkeys for query_planner() */
 
+	List	   *query_uniquekeys; /* */
+
 	List	   *group_pathkeys; /* groupClause pathkeys, if any */
 	List	   *window_pathkeys;	/* pathkeys of bottom window, if any */
 	List	   *distinct_pathkeys;	/* distinctClause pathkeys, if any */
@@ -1077,6 +1081,15 @@ typedef struct ParamPathInfo
 	List	   *ppi_clauses;	/* join clauses available from outer rels */
 } ParamPathInfo;
 
+/*
+ * UniqueKey
+ */
+typedef struct UniqueKey
+{
+	NodeTag		type;
+
+	EquivalenceClass *eq_clause;	/* equivalence class */
+} UniqueKey;
 
 /*
  * Type "Path" is used as-is for sequential-scan paths, as well as some other
@@ -1106,6 +1119,9 @@ typedef struct ParamPathInfo
  *
  * "pathkeys" is a List of PathKey nodes (see above), describing the sort
  * ordering of the path's output rows.
+ *
+ * "uniquekeys", if not NIL, is a list of UniqueKey nodes (see above),
+ * describing the XXX.
  */
 typedef struct Path
 {
@@ -1129,6 +1145,8 @@ typedef struct Path
 
 	List	   *pathkeys;		/* sort ordering of path's output */
 	/* pathkeys is a List of PathKey nodes; see above */
+
+	List	   *uniquekeys;	/* the unique keys, or NIL if none */
 } Path;
 
 /* Macro for extracting a path's parameterization relids; beware double eval */
diff --git a/src/include/nodes/print.h b/src/include/nodes/print.h
index cbff56a724..31e8f0686e 100644
--- a/src/include/nodes/print.h
+++ b/src/include/nodes/print.h
@@ -16,7 +16,6 @@
 
 #include "executor/tuptable.h"
 
-
 #define nodeDisplay(x)		pprint(x)
 
 extern void print(const void *obj);
@@ -28,6 +27,7 @@ extern char *pretty_format_node_dump(const char *dump);
 extern void print_rt(const List *rtable);
 extern void print_expr(const Node *expr, const List *rtable);
 extern void print_pathkeys(const List *pathkeys, const List *rtable);
+extern void print_uniquekeys(const List *uniquekeys, const List *rtable);
 extern void print_tl(const List *tlist, const List *rtable);
 extern void print_slot(TupleTableSlot *slot);
 
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 182ffeef4b..374cac157b 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -44,6 +44,7 @@ extern IndexPath *create_index_path(PlannerInfo *root,
 									List *indexorderbys,
 									List *indexorderbycols,
 									List *pathkeys,
+									List *uniquekeys,
 									ScanDirection indexscandir,
 									bool indexonly,
 									Relids required_outer,
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 7345137d1d..c7976d4a90 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -209,6 +209,9 @@ extern List *build_join_pathkeys(PlannerInfo *root,
 extern List *make_pathkeys_for_sortclauses(PlannerInfo *root,
 										   List *sortclauses,
 										   List *tlist);
+extern List *make_pathkeys_for_uniquekeys(PlannerInfo *root,
+										  List *sortclauses,
+										  List *tlist);
 extern void initialize_mergeclause_eclasses(PlannerInfo *root,
 											RestrictInfo *restrictinfo);
 extern void update_mergeclause_eclasses(PlannerInfo *root,
@@ -235,4 +238,12 @@ extern PathKey *make_canonical_pathkey(PlannerInfo *root,
 extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 									List *live_childrels);
 
+/*
+ * uniquekey.c
+ *	  Utilities for matching and building unique keys
+ */
+extern List *build_uniquekeys(PlannerInfo *root, List *sortclauses);
+extern bool uniquekeys_contained_in(List *keys1, List *keys2);
+extern bool has_useful_uniquekeys(PlannerInfo *root);
+
 #endif							/* PATHS_H */
-- 
2.21.0

>From 79df11a9f74d781a4eaded67772ce0ef1890df80 Mon Sep 17 00:00:00 2001
From: jesperpedersen <[email protected]>
Date: Fri, 2 Aug 2019 08:10:05 -0400
Subject: [PATCH 2/2] Index skip scan

Implementation of Index Skip Scan (see Loose Index Scan in the wiki [1])
on top of IndexOnlyScan and IndexScan. To make it suitable for both
situations when there are small number of distinct values and
significant amount of distinct values the following approach is taken -
instead of searching from the root for every value we're searching for
then first on the current page, and then if not found continue searching
from the root.

Original patch and design were proposed by Thomas Munro [2], revived and
improved by Dmitry Dolgov and Jesper Pedersen.

[1] https://wiki.postgresql.org/wiki/Loose_indexscan
[2] https://www.postgresql.org/message-id/flat/CADLWmXXbTSBxP-MzJuPAYSsL_2f0iPm5VWPbCvDbVvfX93FKkw%40mail.gmail.com

Author: Jesper Pedersen, Dmitry Dolgov
Reviewed-by: Thomas Munro, David Rowley, Floris Van Nee, Kyotaro Horiguchi
---
 contrib/bloom/blutils.c                       |   1 +
 doc/src/sgml/config.sgml                      |  15 +
 doc/src/sgml/indexam.sgml                     |  63 ++
 doc/src/sgml/indices.sgml                     |  24 +
 src/backend/access/brin/brin.c                |   1 +
 src/backend/access/gin/ginutil.c              |   1 +
 src/backend/access/gist/gist.c                |   1 +
 src/backend/access/hash/hash.c                |   1 +
 src/backend/access/index/indexam.c            |  18 +
 src/backend/access/nbtree/nbtree.c            |  13 +
 src/backend/access/nbtree/nbtsearch.c         | 652 +++++++++++++++++-
 src/backend/access/spgist/spgutils.c          |   1 +
 src/backend/commands/explain.c                |  29 +
 src/backend/executor/nodeIndexonlyscan.c      |  46 +-
 src/backend/executor/nodeIndexscan.c          |  43 +-
 src/backend/nodes/copyfuncs.c                 |   2 +
 src/backend/nodes/outfuncs.c                  |   2 +
 src/backend/nodes/readfuncs.c                 |   2 +
 src/backend/optimizer/path/costsize.c         |   1 +
 src/backend/optimizer/plan/createplan.c       |  20 +-
 src/backend/optimizer/plan/planner.c          |  76 ++
 src/backend/optimizer/util/pathnode.c         |  40 ++
 src/backend/optimizer/util/plancat.c          |   1 +
 src/backend/utils/misc/guc.c                  |   9 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/access/amapi.h                    |   8 +
 src/include/access/genam.h                    |   2 +
 src/include/access/nbtree.h                   |   7 +
 src/include/nodes/execnodes.h                 |   6 +
 src/include/nodes/pathnodes.h                 |   5 +
 src/include/nodes/plannodes.h                 |   2 +
 src/include/optimizer/cost.h                  |   1 +
 src/include/optimizer/pathnode.h              |   5 +
 src/test/regress/expected/create_index.out    |   1 +
 src/test/regress/expected/select_distinct.out | 434 ++++++++++++
 src/test/regress/expected/sysviews.out        |   3 +-
 src/test/regress/sql/create_index.sql         |   2 +
 src/test/regress/sql/select_distinct.sql      | 163 +++++
 38 files changed, 1692 insertions(+), 10 deletions(-)

diff --git a/contrib/bloom/blutils.c b/contrib/bloom/blutils.c
index cc1670934f..ab9f0a7177 100644
--- a/contrib/bloom/blutils.c
+++ b/contrib/bloom/blutils.c
@@ -129,6 +129,7 @@ blhandler(PG_FUNCTION_ARGS)
 	amroutine->ambulkdelete = blbulkdelete;
 	amroutine->amvacuumcleanup = blvacuumcleanup;
 	amroutine->amcanreturn = NULL;
+	amroutine->amskip = NULL;
 	amroutine->amcostestimate = blcostestimate;
 	amroutine->amoptions = bloptions;
 	amroutine->amproperty = NULL;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index c91e3e1550..e202589e98 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -4400,6 +4400,21 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-enable-indexskipscan" xreflabel="enable_indexskipscan">
+      <term><varname>enable_indexskipscan</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>enable_indexskipscan</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables or disables the query planner's use of index-skip-scan plan
+        types (see <xref linkend="indexes-index-skip-scans"/>). The default is
+        <literal>on</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-enable-material" xreflabel="enable_material">
       <term><varname>enable_material</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/doc/src/sgml/indexam.sgml b/doc/src/sgml/indexam.sgml
index dd54c68802..73b1b4fcf7 100644
--- a/doc/src/sgml/indexam.sgml
+++ b/doc/src/sgml/indexam.sgml
@@ -144,6 +144,7 @@ typedef struct IndexAmRoutine
     amendscan_function amendscan;
     ammarkpos_function ammarkpos;       /* can be NULL */
     amrestrpos_function amrestrpos;     /* can be NULL */
+    amskip_function amskip;     /* can be NULL */
 
     /* interface functions to support parallel index scans */
     amestimateparallelscan_function amestimateparallelscan;    /* can be NULL */
@@ -687,6 +688,68 @@ amrestrpos (IndexScanDesc scan);
 
   <para>
 <programlisting>
+bool
+amskip (IndexScanDesc scan,
+        ScanDirection direction,
+        ScanDirection indexdir,
+        bool scanstart,
+        int prefix);
+</programlisting>
+  Skip past all tuples where the first 'prefix' columns have the same value as
+  the last tuple returned in the current scan. The arguments are:
+
+   <variablelist>
+    <varlistentry>
+     <term><parameter>scan</parameter></term>
+     <listitem>
+      <para>
+       Index scan information
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><parameter>direction</parameter></term>
+     <listitem>
+      <para>
+       The direction in which data is advancing.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><parameter>indexdir</parameter></term>
+     <listitem>
+      <para>
+        The index direction, in which data must be read.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><parameter>scanstart</parameter></term>
+     <listitem>
+      <para>
+        Whether or not it is a start of the scan.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term><parameter>prefix</parameter></term>
+     <listitem>
+      <para>
+        Distinct prefix size.
+      </para>
+     </listitem>
+    </varlistentry>
+
+   </variablelist>
+
+  </para>
+
+  <para>
+<programlisting>
 Size
 amestimateparallelscan (void);
 </programlisting>
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml
index 95c0a1926c..567141046f 100644
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -1235,6 +1235,30 @@ SELECT target FROM tests WHERE subject = 'some-subject' AND success;
    and later will recognize such cases and allow index-only scans to be
    generated, but older versions will not.
   </para>
+
+  <sect2 id="indexes-index-skip-scans">
+    <title>Index Skip Scans</title>
+
+    <indexterm zone="indexes-index-skip-scans">
+      <primary>index</primary>
+      <secondary>index-skip scans</secondary>
+    </indexterm>
+    <indexterm zone="indexes-index-skip-scans">
+      <primary>index-skip scan</primary>
+    </indexterm>
+
+    <para>
+     In case if index scan is used to retrieve the distinct values of a column
+     efficiently, it can be not very efficient, since it requires to scan all
+     the equal values of a key. In such cases planner will consider apply index
+     skip scan approach, which is based on the idea of
+     <firstterm>Loose index scan</firstterm>. Rather than scanning all equal
+     values of a key, as soon as a new value is found, it will search for a
+     larger value on the same index page, and if not found, restart the search
+     by looking for a larger value. This is much faster when the index has many
+     equal keys.
+    </para>
+  </sect2>
  </sect1>
 
 
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index ae7b729edd..233ea9e5ec 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -109,6 +109,7 @@ brinhandler(PG_FUNCTION_ARGS)
 	amroutine->ambulkdelete = brinbulkdelete;
 	amroutine->amvacuumcleanup = brinvacuumcleanup;
 	amroutine->amcanreturn = NULL;
+	amroutine->amskip = NULL;
 	amroutine->amcostestimate = brincostestimate;
 	amroutine->amoptions = brinoptions;
 	amroutine->amproperty = NULL;
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index cf9699ad18..9817f34c34 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -61,6 +61,7 @@ ginhandler(PG_FUNCTION_ARGS)
 	amroutine->ambulkdelete = ginbulkdelete;
 	amroutine->amvacuumcleanup = ginvacuumcleanup;
 	amroutine->amcanreturn = NULL;
+	amroutine->amskip = NULL;
 	amroutine->amcostestimate = gincostestimate;
 	amroutine->amoptions = ginoptions;
 	amroutine->amproperty = NULL;
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index e9ca4b8252..55a8b16b8a 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -83,6 +83,7 @@ gisthandler(PG_FUNCTION_ARGS)
 	amroutine->ambulkdelete = gistbulkdelete;
 	amroutine->amvacuumcleanup = gistvacuumcleanup;
 	amroutine->amcanreturn = gistcanreturn;
+	amroutine->amskip = NULL;
 	amroutine->amcostestimate = gistcostestimate;
 	amroutine->amoptions = gistoptions;
 	amroutine->amproperty = gistproperty;
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 5cc30dac42..019e330cff 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -82,6 +82,7 @@ hashhandler(PG_FUNCTION_ARGS)
 	amroutine->ambulkdelete = hashbulkdelete;
 	amroutine->amvacuumcleanup = hashvacuumcleanup;
 	amroutine->amcanreturn = NULL;
+	amroutine->amskip = NULL;
 	amroutine->amcostestimate = hashcostestimate;
 	amroutine->amoptions = hashoptions;
 	amroutine->amproperty = NULL;
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 28edd4aca7..ae7a882571 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -33,6 +33,7 @@
  *		index_can_return	- does index support index-only scans?
  *		index_getprocid - get a support procedure OID
  *		index_getprocinfo - get a support procedure's lookup info
+ *		index_skip		- advance past duplicate key values in a scan
  *
  * NOTES
  *		This file contains the index_ routines which used
@@ -730,6 +731,23 @@ index_can_return(Relation indexRelation, int attno)
 	return indexRelation->rd_indam->amcanreturn(indexRelation, attno);
 }
 
+/* ----------------
+ *		index_skip
+ *
+ *		Skip past all tuples where the first 'prefix' columns have the
+ *		same value as the last tuple returned in the current scan.
+ * ----------------
+ */
+bool
+index_skip(IndexScanDesc scan, ScanDirection direction,
+		   ScanDirection indexdir, bool scanstart, int prefix)
+{
+	SCAN_CHECKS;
+
+	return scan->indexRelation->rd_indam->amskip(scan, direction,
+												 indexdir, scanstart, prefix);
+}
+
 /* ----------------
  *		index_getprocid
  *
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 4cfd5289ad..46471598d1 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -131,6 +131,7 @@ bthandler(PG_FUNCTION_ARGS)
 	amroutine->ambulkdelete = btbulkdelete;
 	amroutine->amvacuumcleanup = btvacuumcleanup;
 	amroutine->amcanreturn = btcanreturn;
+	amroutine->amskip = btskip;
 	amroutine->amcostestimate = btcostestimate;
 	amroutine->amoptions = btoptions;
 	amroutine->amproperty = btproperty;
@@ -380,6 +381,8 @@ btbeginscan(Relation rel, int nkeys, int norderbys)
 	 */
 	so->currTuples = so->markTuples = NULL;
 
+	so->skipScanKey = NULL;
+
 	scan->xs_itupdesc = RelationGetDescr(rel);
 
 	scan->opaque = so;
@@ -447,6 +450,16 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 	_bt_preprocess_array_keys(scan);
 }
 
+/*
+ * btskip() -- skip to the beginning of the next key prefix
+ */
+bool
+btskip(IndexScanDesc scan, ScanDirection direction,
+	   ScanDirection indexdir, bool start, int prefix)
+{
+	return _bt_skip(scan, direction, indexdir, start, prefix);
+}
+
 /*
  *	btendscan() -- close down a scan
  */
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 19735bf733..c7c7b77b8c 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -28,6 +28,8 @@ static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
 static OffsetNumber _bt_binsrch(Relation rel, BTScanInsert key, Buffer buf);
 static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
 						 OffsetNumber offnum);
+static bool _bt_read_closest(IndexScanDesc scan, ScanDirection dir,
+							 ScanDirection indexdir, OffsetNumber offnum);
 static void _bt_saveitem(BTScanOpaque so, int itemIndex,
 						 OffsetNumber offnum, IndexTuple itup);
 static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
@@ -37,7 +39,10 @@ static bool _bt_parallel_readpage(IndexScanDesc scan, BlockNumber blkno,
 static Buffer _bt_walk_left(Relation rel, Buffer buf, Snapshot snapshot);
 static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
 static inline void _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir);
-
+static inline void _bt_update_skip_scankeys(IndexScanDesc scan,
+											Relation indexRel);
+static inline bool _bt_scankey_within_page(IndexScanDesc scan, BTScanInsert key,
+										Buffer buf, ScanDirection dir);
 
 /*
  *	_bt_drop_lock_and_maybe_pin()
@@ -1380,6 +1385,315 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
 	return true;
 }
 
+/*
+ *  _bt_skip() -- Skip items that have the same prefix as the most recently
+ * 				  fetched index tuple.
+ *
+ * 		The current position is set so that a subsequent call to _bt_next will
+ * 		fetch the first tuple that differs in the leading 'prefix' keys.
+ *
+ * 		There are four different kinds of skipping (depending on dir and
+ * 		indexdir, that are important to distinguish, especially in the presense
+ * 		of an index condition:
+ *
+ * 		* Advancing forward and reading forward
+ * 			simple scan
+ *
+ * 		* Advancing forward and reading backward
+ * 			scan inside a cursor fetching backward, when skipping is necessary
+ * 			right from the start
+ *
+ * 		* Advancing backward and reading forward
+ * 			scan with order by desc inside a cursor fetching forward, when
+ * 			skipping is necessary right from the start
+ *
+ * 		* Advancing backward and reading backward
+ * 			simple scan with order by desc
+ *
+ * 		This function in conjunction with _bt_read_closest handles them all.
+ */
+bool
+_bt_skip(IndexScanDesc scan, ScanDirection dir,
+		 ScanDirection indexdir, bool scanstart, int prefix)
+{
+	BTScanOpaque so = (BTScanOpaque) scan->opaque;
+	BTStack stack;
+	Buffer buf;
+	OffsetNumber offnum;
+	BTScanPosItem *currItem;
+	Relation 	 indexRel = scan->indexRelation;
+	OffsetNumber startOffset = ItemPointerGetOffsetNumber(&scan->xs_itup->t_tid);
+
+	/* We want to return tuples, and we need a starting point */
+	Assert(scan->xs_want_itup);
+	Assert(scan->xs_itup);
+
+	/*
+	 * If skipScanKey is NULL then we initialize it with _bt_mkscankey,
+	 * otherwise we will just update the sk_flags / sk_argument elements
+	 * in order to eliminate repeated free/realloc.
+	 */
+	if (so->skipScanKey == NULL)
+	{
+		so->skipScanKey = _bt_mkscankey(indexRel, scan->xs_itup);
+		so->skipScanKey->keysz = prefix;
+		so->skipScanKey->scantid = NULL;
+	}
+	else
+	{
+		_bt_update_skip_scankeys(scan, indexRel);
+	}
+	_bt_update_skip_scankeys(scan, indexRel);
+
+	/* Check if the next unique key can be found within the current page */
+	if (BTScanPosIsValid(so->currPos) &&
+		_bt_scankey_within_page(scan, so->skipScanKey, so->currPos.buf, dir))
+	{
+		bool keyFound = false;
+
+		LockBuffer(so->currPos.buf, BT_READ);
+		offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, so->currPos.buf);
+
+		/* Lock the page for SERIALIZABLE transactions */
+		PredicateLockPage(scan->indexRelation, BufferGetBlockNumber(so->currPos.buf),
+						  scan->xs_snapshot);
+
+		/* We know in which direction to look */
+		_bt_initialize_more_data(so, dir);
+
+		if (ScanDirectionIsForward(dir))
+		{
+			/* Move back for _bt_next */
+			offnum = OffsetNumberPrev(offnum);
+		}
+
+		/* Now read the data */
+		keyFound = _bt_read_closest(scan, dir, indexdir, offnum);
+		_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+
+		if (keyFound)
+		{
+			/* set IndexTuple */
+			currItem = &so->currPos.items[so->currPos.itemIndex];
+			scan->xs_heaptid = currItem->heapTid;
+			if (scan->xs_want_itup)
+				scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
+			return true;
+		}
+	}
+
+	if (BTScanPosIsValid(so->currPos))
+	{
+		ReleaseBuffer(so->currPos.buf);
+		so->currPos.buf = InvalidBuffer;
+	}
+
+	/*
+	 * We haven't found scan key within the current page, so let's scan from
+	 * the root. Use _bt_search and _bt_binsrch to get the buffer and offset
+	 * number
+	 */
+	so->skipScanKey->nextkey = ScanDirectionIsForward(dir);
+	stack = _bt_search(scan->indexRelation, so->skipScanKey,
+					   &buf, BT_READ, scan->xs_snapshot);
+	_bt_freestack(stack);
+	so->currPos.buf = buf;
+	offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf);
+
+	/* Lock the page for SERIALIZABLE transactions */
+	PredicateLockPage(scan->indexRelation, BufferGetBlockNumber(buf),
+					  scan->xs_snapshot);
+
+	/* We know in which direction to look */
+	_bt_initialize_more_data(so, dir);
+
+	/*
+	 * Simplest case, advance forward and read also forward. At this moment we
+	 * are at the next distinct key at the beginning of the series. Go back one
+	 * step and let _bt_read_closest figure out about index condition.
+	 */
+	if (ScanDirectionIsForward(dir) && ScanDirectionIsForward(indexdir))
+		offnum = OffsetNumberPrev(offnum);
+
+	/*
+	 * Andvance backward but read forward. At this moment we are at the next
+	 * distinct key at the beginning of the series. In case if scan just
+	 * started, we can read forward without doing anything else. Otherwise find
+	 * previous distinct key and the beginning of it's series and read forward
+	 * from there. To do so, go back one step, perform binary search to find
+	 * the first item in the series and let _bt_read_closest do everything
+	 * else.
+	 */
+	else if (ScanDirectionIsBackward(dir) && ScanDirectionIsForward(indexdir))
+	{
+		offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf);
+
+		if (!scanstart)
+		{
+			_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+
+			/* One step back to find a previous value */
+			_bt_read_closest(scan, dir, dir, offnum);
+
+			if (_bt_next(scan, dir))
+			{
+				_bt_update_skip_scankeys(scan, indexRel);
+
+				/* And now find the last item from the sequence for the current,
+				 * value with the intention do OffsetNumberNext. As a result we
+				 * end up on a first element from the sequence. */
+				if (_bt_scankey_within_page(scan, so->skipScanKey,
+											so->currPos.buf, dir))
+					offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf);
+				else
+				{
+					ReleaseBuffer(so->currPos.buf);
+					so->currPos.buf = InvalidBuffer;
+
+					stack = _bt_search(scan->indexRelation, so->skipScanKey,
+									   &buf, BT_READ, scan->xs_snapshot);
+					_bt_freestack(stack);
+					so->currPos.buf = buf;
+					offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf);
+
+				}
+			}
+			else
+			{
+				pfree(so->skipScanKey);
+				so->skipScanKey = NULL;
+				return false;
+			}
+		}
+	}
+
+	/*
+	 * Andvance forward but read backward. At this moment we are at the next
+	 * distinct key at the beginning of the series. In case if scan just
+	 * started, we can go one step back and read forward without doing anything
+	 * else. Otherwise find the next distinct key and the beginning of it's
+	 * series, go one step back and read backward from there.
+	 *
+	 * An interesting situation can happen if one of distinct keys do not pass
+	 * a corresponding index condition at all. In this case reading backward
+	 * can lead to a previous distinc key being found, creating a loop. To
+	 * avoid that check the value to be returned, and jump one more time if
+	 * it's the same as at the beginning.
+	 */
+	else if (ScanDirectionIsForward(dir) && ScanDirectionIsBackward(indexdir))
+	{
+		if (scanstart)
+			offnum = OffsetNumberPrev(offnum);
+		else
+		{
+			OffsetNumber nextOffset = startOffset;
+
+			while(nextOffset == startOffset)
+			{
+				/*
+				 * Find a next index tuple to update scan key. It could be at
+				 * the end, so check for max offset
+				 */
+				OffsetNumber curOffnum = offnum;
+				Page page = BufferGetPage(so->currPos.buf);
+				OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+				ItemId itemid = PageGetItemId(page, Min(offnum, maxoff));
+
+				_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+
+				scan->xs_itup = (IndexTuple) PageGetItem(page, itemid);
+				so->skipScanKey->nextkey = ScanDirectionIsForward(dir);
+
+				_bt_update_skip_scankeys(scan, indexRel);
+
+				if (BTScanPosIsValid(so->currPos))
+				{
+					ReleaseBuffer(so->currPos.buf);
+					so->currPos.buf = InvalidBuffer;
+				}
+
+				stack = _bt_search(scan->indexRelation, so->skipScanKey,
+								   &buf, BT_READ, scan->xs_snapshot);
+				_bt_freestack(stack);
+				so->currPos.buf = buf;
+				offnum = _bt_binsrch(scan->indexRelation, so->skipScanKey, buf);
+
+				/*
+				 * Jump to the next key returned the same offset, which means
+				 * we are at the end and need to return
+				 */
+				if (offnum == curOffnum)
+				{
+					_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+
+					BTScanPosUnpinIfPinned(so->currPos);
+					BTScanPosInvalidate(so->currPos)
+
+					pfree(so->skipScanKey);
+					so->skipScanKey = NULL;
+					return false;
+				}
+
+				offnum = OffsetNumberPrev(offnum);
+
+				/* Check if _bt_read_closest returns already found item */
+				if (_bt_read_closest(scan, dir, indexdir, offnum))
+				{
+					IndexTuple itup;
+
+					currItem = &so->currPos.items[so->currPos.lastItem];
+					itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
+					nextOffset = ItemPointerGetOffsetNumber(&itup->t_tid);
+				}
+				else
+				{
+					elog(ERROR, "Could not read closest index tuples: %d", offnum);
+					pfree(so->skipScanKey);
+					so->skipScanKey = NULL;
+					return false;
+				}
+
+				/*
+				 * If the nextOffset is the same as before, it means we are in
+				 * the loop, return offnum to the original position and jump
+				 * further
+				 */
+				if (nextOffset == startOffset)
+					offnum = OffsetNumberNext(offnum);
+			}
+		}
+	}
+
+	/* Now read the data */
+	if (!_bt_read_closest(scan, dir, indexdir, offnum))
+	{
+		/*
+		 * There's no actually-matching data on this page.  Try to advance to
+		 * the next page.  Return false if there's no matching data at all.
+		 */
+		LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
+		if (!_bt_steppage(scan, dir))
+		{
+			pfree(so->skipScanKey);
+			so->skipScanKey = NULL;
+			return false;
+		}
+	}
+	else
+	{
+		/* Drop the lock, and maybe the pin, on the current page */
+		_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+	}
+
+	/* And set IndexTuple */
+	currItem = &so->currPos.items[so->currPos.itemIndex];
+	scan->xs_heaptid = currItem->heapTid;
+	if (scan->xs_want_itup)
+		scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
+
+	return true;
+}
+
 /*
  *	_bt_readpage() -- Load data from current index page into so->currPos
  *
@@ -1596,6 +1910,293 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
 	return (so->currPos.firstItem <= so->currPos.lastItem);
 }
 
+/*
+ *	_bt_read_closest() -- Load data from closest two items, previous and
+ *						  current on one the current index page into
+ *						  so->currPos. Previous may be not passing index
+ *						  condition, but it is needed for skip scan.
+ *
+ * 		Similar to _bt_readpage, except that it reads only a current and a
+ * 		previous item. So far it is being used for _bt_skip.
+ *
+ * 		Returns true if required two matching items found on the page, false
+ * 		otherwise.
+ */
+static bool
+_bt_read_closest(IndexScanDesc scan, ScanDirection dir,
+				 ScanDirection indexdir, OffsetNumber offnum)
+{
+	BTScanOpaque so = (BTScanOpaque) scan->opaque;
+	Page		page;
+	BTPageOpaque opaque;
+	OffsetNumber minoff;
+	OffsetNumber maxoff;
+	IndexTuple 	 prevItup = NULL;
+	int			itemIndex;
+	bool		continuescan;
+	int			indnatts;
+
+	/*
+	 * We must have the buffer pinned and locked, but the usual macro can't be
+	 * used here; this function is what makes it good for currPos.
+	 */
+	Assert(BufferIsValid(so->currPos.buf));
+
+	page = BufferGetPage(so->currPos.buf);
+	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+	/* allow next page be processed by parallel worker */
+	if (scan->parallel_scan)
+	{
+		if (ScanDirectionIsForward(dir))
+			_bt_parallel_release(scan, opaque->btpo_next);
+		else
+			_bt_parallel_release(scan, BufferGetBlockNumber(so->currPos.buf));
+	}
+
+	continuescan = true;		/* default assumption */
+	indnatts = IndexRelationGetNumberOfAttributes(scan->indexRelation);
+	minoff = P_FIRSTDATAKEY(opaque);
+	maxoff = PageGetMaxOffsetNumber(page);
+
+	/*
+	 * We note the buffer's block number so that we can release the pin later.
+	 * This allows us to re-read the buffer if it is needed again for hinting.
+	 */
+	so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
+
+	/*
+	 * We save the LSN of the page as we read it, so that we know whether it
+	 * safe to apply LP_DEAD hints to the page later.  This allows us to drop
+	 * the pin for MVCC scans, which allows vacuum to avoid blocking.
+	 */
+	so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
+
+	/*
+	 * we must save the page's right-link while scanning it; this tells us
+	 * where to step right to after we're done with these items.  There is no
+	 * corresponding need for the left-link, since splits always go right.
+	 */
+	so->currPos.nextPage = opaque->btpo_next;
+
+	/* initialize tuple workspace to empty */
+	so->currPos.nextTupleOffset = 0;
+
+	/*
+	 * Now that the current page has been made consistent, the macro should be
+	 * good.
+	 */
+	Assert(BTScanPosIsPinned(so->currPos));
+
+	if (ScanDirectionIsForward(indexdir))
+	{
+		/* load items[] in ascending order */
+		itemIndex = 0;
+
+		offnum = Max(offnum, minoff);
+
+		while (offnum <= maxoff)
+		{
+			ItemId		iid = PageGetItemId(page, offnum);
+			IndexTuple	itup;
+
+			/*
+			 * If the scan specifies not to return killed tuples, then we
+			 * treat a killed tuple as not passing the qual
+			 */
+			if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
+			{
+				offnum = OffsetNumberNext(offnum);
+				continue;
+			}
+
+			itup = (IndexTuple) PageGetItem(page, iid);
+
+			if (_bt_checkkeys(scan, itup, indnatts, dir, &continuescan))
+			{
+				/* tuple passes all scan key conditions, so remember it */
+				if (ScanDirectionIsBackward(dir))
+				{
+					_bt_saveitem(so, itemIndex, offnum, itup);
+					itemIndex++;
+				}
+				else if (prevItup != NULL)
+				{
+					/*
+					 * Save the current item and the previous, even if the
+					 * latter does not pass scan key conditions
+					 */
+					ItemPointerData tid = prevItup->t_tid;
+					OffsetNumber prevOffnum = ItemPointerGetOffsetNumber(&tid);
+
+					_bt_saveitem(so, itemIndex, prevOffnum, prevItup);
+					itemIndex++;
+
+					_bt_saveitem(so, itemIndex, offnum, itup);
+					itemIndex++;
+				}
+
+				if (itemIndex == 2)
+				{
+					Assert(itemIndex <= MaxIndexTuplesPerPage);
+					so->currPos.firstItem = 0;
+					/*
+					 * Actual itemIndex depends on in which direction do we
+					 * advance if this direction is different from indexdir
+					 */
+					so->currPos.itemIndex = ScanDirectionIsForward(dir) ? 0 : 1;
+					so->currPos.lastItem = 1;
+
+					/*
+					 * All of the closest items were found, so we can report
+					 * success
+					 */
+					return true;
+				}
+			}
+			/* When !continuescan, there can't be any more matches, so stop */
+			if (!continuescan)
+				break;
+
+			prevItup = itup;
+			offnum = OffsetNumberNext(offnum);
+		}
+
+		/*
+		 * We don't need to visit page to the right when the high key
+		 * indicates that no more matches will be found there.
+		 *
+		 * Checking the high key like this works out more often than you might
+		 * think.  Leaf page splits pick a split point between the two most
+		 * dissimilar tuples (this is weighed against the need to evenly share
+		 * free space).  Leaf pages with high key attribute values that can
+		 * only appear on non-pivot tuples on the right sibling page are
+		 * common.
+		 */
+		if (continuescan && !P_RIGHTMOST(opaque))
+		{
+			ItemId		iid = PageGetItemId(page, P_HIKEY);
+			IndexTuple	itup = (IndexTuple) PageGetItem(page, iid);
+			int			truncatt;
+
+			truncatt = BTreeTupleGetNAtts(itup, scan->indexRelation);
+			_bt_checkkeys(scan, itup, truncatt, dir, &continuescan);
+		}
+
+		if (!continuescan)
+			so->currPos.moreRight = false;
+
+		Assert(itemIndex <= MaxIndexTuplesPerPage);
+		so->currPos.firstItem = 0;
+		so->currPos.lastItem = itemIndex - 1;
+		so->currPos.itemIndex = 0;
+	}
+	else
+	{
+		/* load items[] in descending order */
+		itemIndex = MaxIndexTuplesPerPage;
+
+		offnum = Min(offnum, maxoff);
+
+		while (offnum >= minoff)
+		{
+			ItemId		iid = PageGetItemId(page, offnum);
+			IndexTuple	itup;
+			bool		tuple_alive;
+			bool		passes_quals;
+
+			/*
+			 * If the scan specifies not to return killed tuples, then we
+			 * treat a killed tuple as not passing the qual.  Most of the
+			 * time, it's a win to not bother examining the tuple's index
+			 * keys, but just skip to the next tuple (previous, actually,
+			 * since we're scanning backwards).  However, if this is the first
+			 * tuple on the page, we do check the index keys, to prevent
+			 * uselessly advancing to the page to the left.  This is similar
+			 * to the high key optimization used by forward scans.
+			 */
+			if (scan->ignore_killed_tuples && ItemIdIsDead(iid))
+			{
+				Assert(offnum >= P_FIRSTDATAKEY(opaque));
+				if (offnum > P_FIRSTDATAKEY(opaque))
+				{
+					offnum = OffsetNumberPrev(offnum);
+					continue;
+				}
+
+				tuple_alive = false;
+			}
+			else
+				tuple_alive = true;
+
+			itup = (IndexTuple) PageGetItem(page, iid);
+
+			passes_quals = _bt_checkkeys(scan, itup, indnatts, dir,
+										 &continuescan);
+			if (passes_quals && tuple_alive)
+			{
+				/* tuple passes all scan key conditions, so remember it */
+				if (ScanDirectionIsForward(dir))
+				{
+					itemIndex--;
+					_bt_saveitem(so, itemIndex, offnum, itup);
+				}
+				else if (prevItup != NULL)
+				{
+					/*
+					 * Save the current item and the previous, even if the
+					 * latter does not pass scan key conditions
+					 */
+					ItemPointerData tid = prevItup->t_tid;
+					OffsetNumber prevOffnum = ItemPointerGetOffsetNumber(&tid);
+
+					itemIndex--;
+					_bt_saveitem(so, itemIndex, prevOffnum, prevItup);
+
+					itemIndex--;
+					_bt_saveitem(so, itemIndex, offnum, itup);
+				}
+
+				if (MaxIndexTuplesPerPage - itemIndex == 2)
+				{
+					Assert(itemIndex <= MaxIndexTuplesPerPage);
+					so->currPos.firstItem = MaxIndexTuplesPerPage - 2;
+					/*
+					 * Actual itemIndex depends on in which direction do we
+					 * advance if this direction is different from indexdir
+					 */
+					so->currPos.itemIndex = MaxIndexTuplesPerPage -
+						(ScanDirectionIsForward(dir) ? 2 : 1);
+					so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
+
+					/*
+					 * All of the closest items were found, so we can report
+					 * success
+					 */
+					return true;
+				}
+			}
+			if (!continuescan)
+			{
+				/* there can't be any more matches, so stop */
+				so->currPos.moreLeft = false;
+				break;
+			}
+
+			prevItup = itup;
+			offnum = OffsetNumberPrev(offnum);
+		}
+
+		Assert(itemIndex >= 0);
+		so->currPos.firstItem = itemIndex;
+		so->currPos.lastItem = MaxIndexTuplesPerPage - 1;
+		so->currPos.itemIndex = MaxIndexTuplesPerPage - 1;
+	}
+
+	/* Not all of the closest items were found */
+	return false;
+}
+
 /* Save an index item into so->currPos.items[itemIndex] */
 static void
 _bt_saveitem(BTScanOpaque so, int itemIndex,
@@ -2251,3 +2852,52 @@ _bt_initialize_more_data(BTScanOpaque so, ScanDirection dir)
 	so->numKilled = 0;			/* just paranoia */
 	so->markItemIndex = -1;		/* ditto */
 }
+
+/*
+ * _bt_update_skip_scankeys() -- set up a new values for the existing scankeys
+ * 								 based on the current index tuple
+ */
+static inline void
+_bt_update_skip_scankeys(IndexScanDesc scan, Relation indexRel)
+{
+	TupleDesc		itupdesc;
+	int				indnkeyatts, i;
+	BTScanOpaque 	so = (BTScanOpaque) scan->opaque;
+	ScanKey			scankeys = so->skipScanKey->scankeys;
+
+	itupdesc = RelationGetDescr(indexRel);
+	indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRel);
+	for (i = 0; i < indnkeyatts; i++)
+	{
+		Datum datum;
+		bool null;
+		int flags;
+
+		datum = index_getattr(scan->xs_itup, i + 1, itupdesc, &null);
+		flags = (null ? SK_ISNULL : 0) |
+				(indexRel->rd_indoption[i] << SK_BT_INDOPTION_SHIFT);
+		scankeys[i].sk_flags = flags;
+		scankeys[i].sk_argument = datum;
+	}
+}
+
+/*
+ * _bt_scankey_within_page() -- check if the provided scankey could be found
+ * 								within a page, specified by the buffer.
+ */
+static inline bool
+_bt_scankey_within_page(IndexScanDesc scan, BTScanInsert key,
+						Buffer buf, ScanDirection dir)
+{
+	OffsetNumber low, high, compare_offset;
+	Page page = BufferGetPage(buf);
+	BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+	int 		 compare_value = ScanDirectionIsForward(dir) ? 0 : 1;
+
+	low = P_FIRSTDATAKEY(opaque);
+	high = PageGetMaxOffsetNumber(page);
+	compare_offset = ScanDirectionIsForward(dir) ? high : low;
+
+	return _bt_compare(scan->indexRelation,
+					   key, page, compare_offset) > compare_value;
+}
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c
index 45472db147..dc151ecf09 100644
--- a/src/backend/access/spgist/spgutils.c
+++ b/src/backend/access/spgist/spgutils.c
@@ -64,6 +64,7 @@ spghandler(PG_FUNCTION_ARGS)
 	amroutine->ambulkdelete = spgbulkdelete;
 	amroutine->amvacuumcleanup = spgvacuumcleanup;
 	amroutine->amcanreturn = spgcanreturn;
+	amroutine->amskip = NULL;
 	amroutine->amcostestimate = spgcostestimate;
 	amroutine->amoptions = spgoptions;
 	amroutine->amproperty = spgproperty;
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 62fb3434a3..ad500de12b 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -130,6 +130,7 @@ static void ExplainDummyGroup(const char *objtype, const char *labelname,
 static void ExplainXMLTag(const char *tagname, int flags, ExplainState *es);
 static void ExplainJSONLineEnding(ExplainState *es);
 static void ExplainYAMLLineStarting(ExplainState *es);
+static void ExplainIndexSkipScanKeys(ExplainState *es, int skipPrefixSize);
 static void escape_yaml(StringInfo buf, const char *str);
 
 
@@ -1041,6 +1042,22 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
 	return planstate_tree_walker(planstate, ExplainPreScanNode, rels_used);
 }
 
+/*
+ * ExplainIndexSkipScanKeys -
+ *	  Append information about index skip scan to es->str.
+ *
+ * Can be used to print the skip prefix size.
+ */
+static void
+ExplainIndexSkipScanKeys(ExplainState *es, int skipPrefixSize)
+{
+	if (skipPrefixSize > 0)
+	{
+		if (es->format != EXPLAIN_FORMAT_TEXT)
+			ExplainPropertyInteger("Distinct Prefix", NULL, skipPrefixSize, es);
+	}
+}
+
 /*
  * ExplainNode -
  *	  Appends a description of a plan tree to es->str
@@ -1363,6 +1380,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			{
 				IndexScan  *indexscan = (IndexScan *) plan;
 
+				ExplainIndexSkipScanKeys(es, indexscan->indexskipprefixsize);
+
 				ExplainIndexScanDetails(indexscan->indexid,
 										indexscan->indexorderdir,
 										es);
@@ -1373,6 +1392,8 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			{
 				IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan;
 
+				ExplainIndexSkipScanKeys(es, indexonlyscan->indexskipprefixsize);
+
 				ExplainIndexScanDetails(indexonlyscan->indexid,
 										indexonlyscan->indexorderdir,
 										es);
@@ -1582,6 +1603,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	switch (nodeTag(plan))
 	{
 		case T_IndexScan:
+			if (((IndexScan *) plan)->indexskipprefixsize > 0)
+			{
+				ExplainPropertyBool("Skip scan mode", true, es);
+			}
 			show_scan_qual(((IndexScan *) plan)->indexqualorig,
 						   "Index Cond", planstate, ancestors, es);
 			if (((IndexScan *) plan)->indexqualorig)
@@ -1595,6 +1620,10 @@ ExplainNode(PlanState *planstate, List *ancestors,
 										   planstate, es);
 			break;
 		case T_IndexOnlyScan:
+			if (((IndexOnlyScan *) plan)->indexskipprefixsize > 0)
+			{
+				ExplainPropertyBool("Skip scan mode", true, es);
+			}
 			show_scan_qual(((IndexOnlyScan *) plan)->indexqual,
 						   "Index Cond", planstate, ancestors, es);
 			if (((IndexOnlyScan *) plan)->indexqual)
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 652a9afc75..21f169f5ea 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -65,6 +65,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 	IndexScanDesc scandesc;
 	TupleTableSlot *slot;
 	ItemPointer tid;
+	IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) node->ss.ps.plan;
 
 	/*
 	 * extract necessary information from index scan node
@@ -72,7 +73,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 	estate = node->ss.ps.state;
 	direction = estate->es_direction;
 	/* flip direction if this is an overall backward scan */
-	if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
+	if (ScanDirectionIsBackward(indexonlyscan->indexorderdir))
 	{
 		if (ScanDirectionIsForward(direction))
 			direction = BackwardScanDirection;
@@ -115,6 +116,45 @@ IndexOnlyNext(IndexOnlyScanState *node)
 						 node->ioss_NumOrderByKeys);
 	}
 
+	/*
+	 * Check if we need to skip to the next key prefix, because we've been
+	 * asked to implement DISTINCT.
+	 */
+	if (node->ioss_SkipPrefixSize > 0)
+	{
+		bool startscan = false;
+
+		/*
+		 * When fetching a cursor in the direction opposite to a general scan
+		 * direction, the result must be what normal fetching should have
+		 * returned, but in reversed order. In other words, return the last or
+		 * first scanned tuple in a DISTINCT set, depending on a cursor
+		 * direction. Skip to that tuple before returning the first tuple.
+		 */
+		if (direction * indexonlyscan->indexorderdir < 0 &&
+			!node->ioss_FirstTupleEmitted)
+		{
+			if (index_getnext_tid(scandesc, direction))
+			{
+				node->ioss_FirstTupleEmitted = true;
+				startscan = true;
+			}
+		}
+
+		if (node->ioss_FirstTupleEmitted &&
+			!index_skip(scandesc, direction, indexonlyscan->indexorderdir,
+						startscan, node->ioss_SkipPrefixSize))
+		{
+			/* Reached end of index. At this point currPos is invalidated,
+			 * and we need to reset ioss_FirstTupleEmitted, since otherwise
+			 * after going backwards, reaching the end of index, and going
+			 * forward again we apply skip again. It would be incorrect and
+			 * lead to an extra skipped item. */
+			node->ioss_FirstTupleEmitted = false;
+			return ExecClearTuple(slot);
+		}
+	}
+
 	/*
 	 * OK, now that we have what we need, fetch the next tuple.
 	 */
@@ -250,6 +290,8 @@ IndexOnlyNext(IndexOnlyScanState *node)
 							  ItemPointerGetBlockNumber(tid),
 							  estate->es_snapshot);
 
+		node->ioss_FirstTupleEmitted = true;
+
 		return slot;
 	}
 
@@ -500,6 +542,8 @@ ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
 	indexstate->ss.ps.plan = (Plan *) node;
 	indexstate->ss.ps.state = estate;
 	indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
+	indexstate->ioss_SkipPrefixSize = node->indexskipprefixsize;
+	indexstate->ioss_FirstTupleEmitted = false;
 
 	/*
 	 * Miscellaneous initialization
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index ac7aa81f67..6e649930c2 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -85,6 +85,7 @@ IndexNext(IndexScanState *node)
 	ScanDirection direction;
 	IndexScanDesc scandesc;
 	TupleTableSlot *slot;
+	IndexScan *indexscan = (IndexScan *) node->ss.ps.plan;
 
 	/*
 	 * extract necessary information from index scan node
@@ -92,7 +93,7 @@ IndexNext(IndexScanState *node)
 	estate = node->ss.ps.state;
 	direction = estate->es_direction;
 	/* flip direction if this is an overall backward scan */
-	if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
+	if (ScanDirectionIsBackward(indexscan->indexorderdir))
 	{
 		if (ScanDirectionIsForward(direction))
 			direction = BackwardScanDirection;
@@ -116,6 +117,7 @@ IndexNext(IndexScanState *node)
 								   node->iss_NumOrderByKeys);
 
 		node->iss_ScanDesc = scandesc;
+		node->iss_ScanDesc->xs_want_itup = true;
 
 		/*
 		 * If no run-time keys to calculate or they are ready, go ahead and
@@ -127,6 +129,42 @@ IndexNext(IndexScanState *node)
 						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
 	}
 
+	/*
+	 * Check if we need to skip to the next key prefix, because we've been
+	 * asked to implement DISTINCT.
+	 */
+	if (node->ioss_SkipPrefixSize > 0)
+	{
+		bool startscan = false;
+
+		/*
+		 * If advancing direction is different from index direction, we must
+		 * skip right away, but _bt_skip requires a starting point.
+		 */
+		if (direction * indexscan->indexorderdir < 0 &&
+			!node->ioss_FirstTupleEmitted)
+		{
+			if (index_getnext_slot(scandesc, direction, slot))
+			{
+				node->ioss_FirstTupleEmitted = true;
+				startscan = true;
+			}
+		}
+
+		if (node->ioss_FirstTupleEmitted &&
+			!index_skip(scandesc, direction, indexscan->indexorderdir,
+						startscan, node->ioss_SkipPrefixSize))
+		{
+			/* Reached end of index. At this point currPos is invalidated,
+			 * and we need to reset ioss_FirstTupleEmitted, since otherwise
+			 * after going backwards, reaching the end of index, and going
+			 * forward again we apply skip again. It would be incorrect and
+			 * lead to an extra skipped item. */
+			node->ioss_FirstTupleEmitted = false;
+			return ExecClearTuple(slot);
+		}
+	}
+
 	/*
 	 * ok, now that we have what we need, fetch the next tuple.
 	 */
@@ -149,6 +187,7 @@ IndexNext(IndexScanState *node)
 			}
 		}
 
+		node->ioss_FirstTupleEmitted = true;
 		return slot;
 	}
 
@@ -906,6 +945,8 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
 	indexstate->ss.ps.plan = (Plan *) node;
 	indexstate->ss.ps.state = estate;
 	indexstate->ss.ps.ExecProcNode = ExecIndexScan;
+	indexstate->ioss_SkipPrefixSize = node->indexskipprefixsize;
+	indexstate->ioss_FirstTupleEmitted = false;
 
 	/*
 	 * Miscellaneous initialization
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index a2617c7cfd..20495c9e52 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -490,6 +490,7 @@ _copyIndexScan(const IndexScan *from)
 	COPY_NODE_FIELD(indexorderbyorig);
 	COPY_NODE_FIELD(indexorderbyops);
 	COPY_SCALAR_FIELD(indexorderdir);
+	COPY_SCALAR_FIELD(indexskipprefixsize);
 
 	return newnode;
 }
@@ -515,6 +516,7 @@ _copyIndexOnlyScan(const IndexOnlyScan *from)
 	COPY_NODE_FIELD(indexorderby);
 	COPY_NODE_FIELD(indextlist);
 	COPY_SCALAR_FIELD(indexorderdir);
+	COPY_SCALAR_FIELD(indexskipprefixsize);
 
 	return newnode;
 }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 9a4f3e8e4b..b09f8982ff 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -559,6 +559,7 @@ _outIndexScan(StringInfo str, const IndexScan *node)
 	WRITE_NODE_FIELD(indexorderbyorig);
 	WRITE_NODE_FIELD(indexorderbyops);
 	WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
+	WRITE_INT_FIELD(indexskipprefixsize);
 }
 
 static void
@@ -573,6 +574,7 @@ _outIndexOnlyScan(StringInfo str, const IndexOnlyScan *node)
 	WRITE_NODE_FIELD(indexorderby);
 	WRITE_NODE_FIELD(indextlist);
 	WRITE_ENUM_FIELD(indexorderdir, ScanDirection);
+	WRITE_INT_FIELD(indexskipprefixsize);
 }
 
 static void
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 764e3bb90c..0fc3c5ea68 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -1787,6 +1787,7 @@ _readIndexScan(void)
 	READ_NODE_FIELD(indexorderbyorig);
 	READ_NODE_FIELD(indexorderbyops);
 	READ_ENUM_FIELD(indexorderdir, ScanDirection);
+	READ_INT_FIELD(indexskipprefixsize);
 
 	READ_DONE();
 }
@@ -1806,6 +1807,7 @@ _readIndexOnlyScan(void)
 	READ_NODE_FIELD(indexorderby);
 	READ_NODE_FIELD(indextlist);
 	READ_ENUM_FIELD(indexorderdir, ScanDirection);
+	READ_INT_FIELD(indexskipprefixsize);
 
 	READ_DONE();
 }
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 2565dcf296..3d33160cde 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -124,6 +124,7 @@ int			max_parallel_workers_per_gather = 2;
 bool		enable_seqscan = true;
 bool		enable_indexscan = true;
 bool		enable_indexonlyscan = true;
+bool		enable_indexskipscan = true;
 bool		enable_bitmapscan = true;
 bool		enable_tidscan = true;
 bool		enable_sort = true;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index f2325694c5..1805bbc07d 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -175,12 +175,14 @@ static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
 								 Oid indexid, List *indexqual, List *indexqualorig,
 								 List *indexorderby, List *indexorderbyorig,
 								 List *indexorderbyops,
-								 ScanDirection indexscandir);
+								 ScanDirection indexscandir,
+								 int skipprefix);
 static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual,
 										 Index scanrelid, Oid indexid,
 										 List *indexqual, List *indexorderby,
 										 List *indextlist,
-										 ScanDirection indexscandir);
+										 ScanDirection indexscandir,
+										 int skipprefix);
 static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid,
 											  List *indexqual,
 											  List *indexqualorig);
@@ -2905,7 +2907,8 @@ create_indexscan_plan(PlannerInfo *root,
 												fixed_indexquals,
 												fixed_indexorderbys,
 												best_path->indexinfo->indextlist,
-												best_path->indexscandir);
+												best_path->indexscandir,
+												best_path->indexskipprefix);
 	else
 		scan_plan = (Scan *) make_indexscan(tlist,
 											qpqual,
@@ -2916,7 +2919,8 @@ create_indexscan_plan(PlannerInfo *root,
 											fixed_indexorderbys,
 											indexorderbys,
 											indexorderbyops,
-											best_path->indexscandir);
+											best_path->indexscandir,
+											best_path->indexskipprefix);
 
 	copy_generic_path_info(&scan_plan->plan, &best_path->path);
 
@@ -5179,7 +5183,8 @@ make_indexscan(List *qptlist,
 			   List *indexorderby,
 			   List *indexorderbyorig,
 			   List *indexorderbyops,
-			   ScanDirection indexscandir)
+			   ScanDirection indexscandir,
+			   int skipPrefixSize)
 {
 	IndexScan  *node = makeNode(IndexScan);
 	Plan	   *plan = &node->scan.plan;
@@ -5196,6 +5201,7 @@ make_indexscan(List *qptlist,
 	node->indexorderbyorig = indexorderbyorig;
 	node->indexorderbyops = indexorderbyops;
 	node->indexorderdir = indexscandir;
+	node->indexskipprefixsize = skipPrefixSize;
 
 	return node;
 }
@@ -5208,7 +5214,8 @@ make_indexonlyscan(List *qptlist,
 				   List *indexqual,
 				   List *indexorderby,
 				   List *indextlist,
-				   ScanDirection indexscandir)
+				   ScanDirection indexscandir,
+				   int skipPrefixSize)
 {
 	IndexOnlyScan *node = makeNode(IndexOnlyScan);
 	Plan	   *plan = &node->scan.plan;
@@ -5223,6 +5230,7 @@ make_indexonlyscan(List *qptlist,
 	node->indexorderby = indexorderby;
 	node->indextlist = indextlist;
 	node->indexorderdir = indexscandir;
+	node->indexskipprefixsize = skipPrefixSize;
 
 	return node;
 }
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 5ee9ee6595..53930af364 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4834,6 +4834,82 @@ create_distinct_paths(PlannerInfo *root,
 												  path,
 												  list_length(root->distinct_pathkeys),
 												  numDistinctRows));
+
+				/* Consider index skip scan as well */
+				if (enable_indexskipscan &&
+					IsA(path, IndexPath) &&
+					((IndexPath *) path)->indexinfo->amcanskip &&
+					root->distinct_pathkeys != NIL)
+				{
+					ListCell   		*lc;
+					IndexOptInfo 	*index = NULL;
+					bool 			different_columns_order = false,
+									not_empty_qual = false;
+					int 			i = 0;
+					int 			distinctPrefixKeys;
+
+					Assert(path->pathtype == T_IndexOnlyScan ||
+						   path->pathtype == T_IndexScan);
+
+					index = ((IndexPath *) path)->indexinfo;
+					distinctPrefixKeys = list_length(root->query_uniquekeys);
+
+					/*
+					 * Normally we can think about distinctPrefixKeys as just a
+					 * number of distinct keys. But if lets say we have a
+					 * distinct key a, and the index contains b, a in exactly
+					 * this order. In such situation we need to use position of
+					 * a in the index as distinctPrefixKeys, otherwise skip
+					 * will happen only by the first column.
+					 */
+					foreach(lc, root->query_uniquekeys)
+					{
+						UniqueKey *uniquekey = (UniqueKey *) lfirst(lc);
+						EquivalenceMember *em =
+							lfirst_node(EquivalenceMember,
+										list_head(uniquekey->eq_clause->ec_members));
+						Var *var = (Var *) em->em_expr;
+
+						Assert(i < index->ncolumns);
+
+						for (i = 0; i < index->ncolumns; i++)
+						{
+							if (index->indexkeys[i] == var->varattno)
+							{
+								distinctPrefixKeys = Max(i + 1, distinctPrefixKeys);
+								break;
+							}
+						}
+					}
+
+					/*
+					 * XXX: In case of index scan quals evaluation happens
+					 * after ExecScanFetch, which means skip results could be
+					 * fitered out. Consider the following query:
+					 *
+					 * 		select distinct (a, b) a, b, c from t where  c < 100;
+					 *
+					 * Skip scan returns one tuple for one distinct set of (a, b)
+					 * with arbitrary one of c, so if the choosed c does not
+					 * match the qual and there is any c that matches the qual,
+					 * we miss that tuple.
+					 */
+					if (path->pathtype == T_IndexScan &&
+						parse->jointree != NULL &&
+						parse->jointree->quals != NULL &&
+						list_length((List*) parse->jointree->quals) != 0)
+							not_empty_qual = true;
+
+					if (!different_columns_order &&	!not_empty_qual)
+					{
+						add_path(distinct_rel, (Path *)
+								 create_skipscan_unique_path(root,
+															 distinct_rel,
+															 path,
+															 distinctPrefixKeys,
+															 numDistinctRows));
+					}
+				}
 			}
 		}
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index ac0b937895..6bde8a8647 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2916,6 +2916,46 @@ create_upper_unique_path(PlannerInfo *root,
 	return pathnode;
 }
 
+/*
+ * create_skipscan_unique_path
+ *	  Creates a pathnode the same as an existing IndexPath except based on
+ *	  skipping duplicate values.  This may or may not be cheaper than using
+ *	  create_upper_unique_path.
+ *
+ * The input path must be an IndexPath for an index that supports amskip.
+ */
+IndexPath *
+create_skipscan_unique_path(PlannerInfo *root,
+							RelOptInfo *rel,
+							Path *basepath,
+							int distinctPrefixKeys,
+							double numGroups)
+{
+	IndexPath *pathnode = makeNode(IndexPath);
+
+	Assert(IsA(basepath, IndexPath));
+
+	/* We don't want to modify basepath, so make a copy. */
+	memcpy(pathnode, basepath, sizeof(IndexPath));
+
+	/* The size of the prefix we'll use for skipping. */
+	Assert(pathnode->indexinfo->amcanskip);
+	Assert(distinctPrefixKeys > 0);
+	/*Assert(distinctPrefixKeys <= list_length(pathnode->path.pathkeys));*/
+	pathnode->indexskipprefix = distinctPrefixKeys;
+
+	/*
+	 * The cost to skip to each distinct value should be roughly the same as
+	 * the cost of finding the first key times the number of distinct values
+	 * we expect to find.
+	 */
+	pathnode->path.startup_cost = basepath->startup_cost;
+	pathnode->path.total_cost = basepath->startup_cost * numGroups;
+	pathnode->path.rows = numGroups;
+
+	return pathnode;
+}
+
 /*
  * create_agg_path
  *	  Creates a pathnode that represents performing aggregation/grouping
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 98e99481c6..7a6db36a57 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -269,6 +269,7 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 			info->amoptionalkey = amroutine->amoptionalkey;
 			info->amsearcharray = amroutine->amsearcharray;
 			info->amsearchnulls = amroutine->amsearchnulls;
+			info->amcanskip = (amroutine->amskip != NULL);
 			info->amcanparallel = amroutine->amcanparallel;
 			info->amhasgettuple = (amroutine->amgettuple != NULL);
 			info->amhasgetbitmap = amroutine->amgetbitmap != NULL &&
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index fc463601ff..4f061f2e14 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -911,6 +911,15 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"enable_indexskipscan", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables the planner's use of index-skip-scan plans."),
+			NULL
+		},
+		&enable_indexskipscan,
+		true,
+		NULL, NULL, NULL
+	},
 	{
 		{"enable_bitmapscan", PGC_USERSET, QUERY_TUNING_METHOD,
 			gettext_noop("Enables the planner's use of bitmap-scan plans."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index cfad86c02a..21d1effe61 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -351,6 +351,7 @@
 #enable_hashjoin = on
 #enable_indexscan = on
 #enable_indexonlyscan = on
+#enable_indexskipscan = on
 #enable_material = on
 #enable_mergejoin = on
 #enable_nestloop = on
diff --git a/src/include/access/amapi.h b/src/include/access/amapi.h
index 6e3db06eed..f84791e358 100644
--- a/src/include/access/amapi.h
+++ b/src/include/access/amapi.h
@@ -130,6 +130,13 @@ typedef void (*amrescan_function) (IndexScanDesc scan,
 typedef bool (*amgettuple_function) (IndexScanDesc scan,
 									 ScanDirection direction);
 
+/* skip past duplicates in a given prefix */
+typedef bool (*amskip_function) (IndexScanDesc scan,
+								 ScanDirection dir,
+								 ScanDirection indexdir,
+								 bool start,
+								 int prefix);
+
 /* fetch all valid tuples */
 typedef int64 (*amgetbitmap_function) (IndexScanDesc scan,
 									   TIDBitmap *tbm);
@@ -225,6 +232,7 @@ typedef struct IndexAmRoutine
 	amendscan_function amendscan;
 	ammarkpos_function ammarkpos;	/* can be NULL */
 	amrestrpos_function amrestrpos; /* can be NULL */
+	amskip_function amskip;				/* can be NULL */
 
 	/* interface functions to support parallel index scans */
 	amestimateparallelscan_function amestimateparallelscan; /* can be NULL */
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 8c053be2ca..e5ec5b07c8 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -173,6 +173,8 @@ extern IndexBulkDeleteResult *index_bulk_delete(IndexVacuumInfo *info,
 extern IndexBulkDeleteResult *index_vacuum_cleanup(IndexVacuumInfo *info,
 												   IndexBulkDeleteResult *stats);
 extern bool index_can_return(Relation indexRelation, int attno);
+extern bool index_skip(IndexScanDesc scan, ScanDirection direction,
+					   ScanDirection indexdir, bool start, int prefix);
 extern RegProcedure index_getprocid(Relation irel, AttrNumber attnum,
 									uint16 procnum);
 extern FmgrInfo *index_getprocinfo(Relation irel, AttrNumber attnum,
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 83e0e6c28e..c775554f0b 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -663,6 +663,9 @@ typedef struct BTScanOpaqueData
 	 */
 	int			markItemIndex;	/* itemIndex, or -1 if not valid */
 
+	/* Work space for _bt_skip */
+	BTScanInsert	skipScanKey;	/* used to control skipping */
+
 	/* keep these last in struct for efficiency */
 	BTScanPosData currPos;		/* current position data */
 	BTScanPosData markPos;		/* marked position, if any */
@@ -777,6 +780,8 @@ extern OffsetNumber _bt_binsrch_insert(Relation rel, BTInsertState insertstate);
 extern int32 _bt_compare(Relation rel, BTScanInsert key, Page page, OffsetNumber offnum);
 extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
 extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
+extern bool _bt_skip(IndexScanDesc scan, ScanDirection dir,
+					 ScanDirection indexdir, bool start, int prefix);
 extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
 							   Snapshot snapshot);
 
@@ -801,6 +806,8 @@ extern void _bt_end_vacuum_callback(int code, Datum arg);
 extern Size BTreeShmemSize(void);
 extern void BTreeShmemInit(void);
 extern bytea *btoptions(Datum reloptions, bool validate);
+extern bool btskip(IndexScanDesc scan, ScanDirection dir,
+				   ScanDirection indexdir, bool start, int prefix);
 extern bool btproperty(Oid index_oid, int attno,
 					   IndexAMProperty prop, const char *propname,
 					   bool *res, bool *isnull);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 4ec78491f6..9fbb822653 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1377,6 +1377,8 @@ typedef struct IndexScanState
 	ExprContext *iss_RuntimeContext;
 	Relation	iss_RelationDesc;
 	struct IndexScanDescData *iss_ScanDesc;
+	int         ioss_SkipPrefixSize;
+	bool		ioss_FirstTupleEmitted;
 
 	/* These are needed for re-checking ORDER BY expr ordering */
 	pairingheap *iss_ReorderQueue;
@@ -1406,6 +1408,8 @@ typedef struct IndexScanState
  *		TableSlot		   slot for holding tuples fetched from the table
  *		VMBuffer		   buffer in use for visibility map testing, if any
  *		PscanLen		   size of parallel index-only scan descriptor
+ *		SkipPrefixSize	   number of keys for skip-based DISTINCT
+ *		FirstTupleEmitted  has the first tuple been emitted
  * ----------------
  */
 typedef struct IndexOnlyScanState
@@ -1424,6 +1428,8 @@ typedef struct IndexOnlyScanState
 	struct IndexScanDescData *ioss_ScanDesc;
 	TupleTableSlot *ioss_TableSlot;
 	Buffer		ioss_VMBuffer;
+	int         ioss_SkipPrefixSize;
+	bool		ioss_FirstTupleEmitted;
 	Size		ioss_PscanLen;
 } IndexOnlyScanState;
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index c1d6f33fc0..754f224817 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -839,6 +839,7 @@ struct IndexOptInfo
 	bool		amsearchnulls;	/* can AM search for NULL/NOT NULL entries? */
 	bool		amhasgettuple;	/* does AM have amgettuple interface? */
 	bool		amhasgetbitmap; /* does AM have amgetbitmap interface? */
+	bool		amcanskip;		/* can AM skip duplicate values? */
 	bool		amcanparallel;	/* does AM support parallel scan? */
 	/* Rather than include amapi.h here, we declare amcostestimate like this */
 	void		(*amcostestimate) ();	/* AM's cost estimator */
@@ -1189,6 +1190,9 @@ typedef struct Path
  * we need not recompute them when considering using the same index in a
  * bitmap index/heap scan (see BitmapHeapPath).  The costs of the IndexPath
  * itself represent the costs of an IndexScan or IndexOnlyScan plan type.
+ *
+ * 'indexskipprefix' represents the number of columns to consider for skip
+ * scans.
  *----------
  */
 typedef struct IndexPath
@@ -1201,6 +1205,7 @@ typedef struct IndexPath
 	ScanDirection indexscandir;
 	Cost		indextotalcost;
 	Selectivity indexselectivity;
+	int			indexskipprefix;
 } IndexPath;
 
 /*
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 8e6594e355..04e871ae83 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -405,6 +405,7 @@ typedef struct IndexScan
 	List	   *indexorderbyorig;	/* the same in original form */
 	List	   *indexorderbyops;	/* OIDs of sort ops for ORDER BY exprs */
 	ScanDirection indexorderdir;	/* forward or backward or don't care */
+	int			indexskipprefixsize;	/* the size of the prefix for distinct scans */
 } IndexScan;
 
 /* ----------------
@@ -432,6 +433,7 @@ typedef struct IndexOnlyScan
 	List	   *indexorderby;	/* list of index ORDER BY exprs */
 	List	   *indextlist;		/* TargetEntry list describing index's cols */
 	ScanDirection indexorderdir;	/* forward or backward or don't care */
+	int			indexskipprefixsize;	/* the size of the prefix for distinct scans */
 } IndexOnlyScan;
 
 /* ----------------
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index b3d0b4f6fb..9abfdfb6bd 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -50,6 +50,7 @@ extern PGDLLIMPORT int max_parallel_workers_per_gather;
 extern PGDLLIMPORT bool enable_seqscan;
 extern PGDLLIMPORT bool enable_indexscan;
 extern PGDLLIMPORT bool enable_indexonlyscan;
+extern PGDLLIMPORT bool enable_indexskipscan;
 extern PGDLLIMPORT bool enable_bitmapscan;
 extern PGDLLIMPORT bool enable_tidscan;
 extern PGDLLIMPORT bool enable_sort;
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 374cac157b..56bb16d589 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -201,6 +201,11 @@ extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root,
 												 Path *subpath,
 												 int numCols,
 												 double numGroups);
+extern IndexPath *create_skipscan_unique_path(PlannerInfo *root,
+											  RelOptInfo *rel,
+											  Path *subpath,
+											  int numCols,
+											  double numGroups);
 extern AggPath *create_agg_path(PlannerInfo *root,
 								RelOptInfo *rel,
 								Path *subpath,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index c6d575a2f9..4f5c82f49d 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -19,6 +19,7 @@ CREATE INDEX tenk1_unique1 ON tenk1 USING btree(unique1 int4_ops);
 CREATE INDEX tenk1_unique2 ON tenk1 USING btree(unique2 int4_ops);
 CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
 CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
+CREATE INDEX tenk1_four ON tenk1 (four);
 CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
 CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
 CREATE INDEX tenk2_hundred ON tenk2 USING btree(hundred int4_ops);
diff --git a/src/test/regress/expected/select_distinct.out b/src/test/regress/expected/select_distinct.out
index f3696c6d1d..dcae34e9c0 100644
--- a/src/test/regress/expected/select_distinct.out
+++ b/src/test/regress/expected/select_distinct.out
@@ -244,3 +244,437 @@ SELECT null IS NOT DISTINCT FROM null as "yes";
  t
 (1 row)
 
+-- index only skip scan
+SELECT DISTINCT four FROM tenk1;
+ four 
+------
+    0
+    1
+    2
+    3
+(4 rows)
+
+SELECT DISTINCT four FROM tenk1 WHERE four = 1;
+ four 
+------
+    1
+(1 row)
+
+SELECT DISTINCT four FROM tenk1 ORDER BY four DESC;
+ four 
+------
+    3
+    2
+    1
+    0
+(4 rows)
+
+CREATE TABLE distinct_a (a int, b int, c int);
+INSERT INTO distinct_a (
+    SELECT five, hundred, 10 FROM
+    generate_series(1, 5) five,
+    generate_series(1, 10000) hundred
+);
+CREATE INDEX ON distinct_a (a, b);
+ANALYZE a;
+-- test index skip scan with a condition on a non unique field
+SELECT DISTINCT ON (a) a, b FROM distinct_a WHERE b = 2;
+ a | b 
+---+---
+ 1 | 2
+ 2 | 2
+ 3 | 2
+ 4 | 2
+ 5 | 2
+(5 rows)
+
+-- test index skip scan backwards
+SELECT DISTINCT ON (a) a, b FROM distinct_a ORDER BY a DESC, b DESC;
+ a |   b   
+---+-------
+ 5 | 10000
+ 4 | 10000
+ 3 | 10000
+ 2 | 10000
+ 1 | 10000
+(5 rows)
+
+-- test opposite scan/index directions inside a cursor
+-- forward/backward
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a, b;
+FETCH FROM c;
+ a | b 
+---+---
+ 1 | 1
+(1 row)
+
+FETCH BACKWARD FROM c;
+ a | b 
+---+---
+(0 rows)
+
+FETCH 6 FROM c;
+ a | b 
+---+---
+ 1 | 1
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 5 | 1
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a | b 
+---+---
+ 5 | 1
+ 4 | 1
+ 3 | 1
+ 2 | 1
+ 1 | 1
+(5 rows)
+
+FETCH 6 FROM c;
+ a | b 
+---+---
+ 1 | 1
+ 2 | 1
+ 3 | 1
+ 4 | 1
+ 5 | 1
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a | b 
+---+---
+ 5 | 1
+ 4 | 1
+ 3 | 1
+ 2 | 1
+ 1 | 1
+(5 rows)
+
+END;
+-- backward/forward
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a DESC, b DESC;
+FETCH FROM c;
+ a |   b   
+---+-------
+ 5 | 10000
+(1 row)
+
+FETCH BACKWARD FROM c;
+ a | b 
+---+---
+(0 rows)
+
+FETCH 6 FROM c;
+ a |   b   
+---+-------
+ 5 | 10000
+ 4 | 10000
+ 3 | 10000
+ 2 | 10000
+ 1 | 10000
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a |   b   
+---+-------
+ 1 | 10000
+ 2 | 10000
+ 3 | 10000
+ 4 | 10000
+ 5 | 10000
+(5 rows)
+
+FETCH 6 FROM c;
+ a |   b   
+---+-------
+ 5 | 10000
+ 4 | 10000
+ 3 | 10000
+ 2 | 10000
+ 1 | 10000
+(5 rows)
+
+FETCH BACKWARD 6 FROM c;
+ a |   b   
+---+-------
+ 1 | 10000
+ 2 | 10000
+ 3 | 10000
+ 4 | 10000
+ 5 | 10000
+(5 rows)
+
+END;
+DROP TABLE distinct_a;
+-- test missing values and skipping from the end
+CREATE TABLE distinct_abc(a int, b int, c int);
+CREATE INDEX ON distinct_abc(a, b, c);
+INSERT INTO distinct_abc
+	VALUES (1, 1, 1),
+		   (1, 1, 2),
+		   (1, 2, 2),
+		   (1, 2, 3),
+		   (2, 2, 1),
+		   (2, 2, 3),
+		   (3, 1, 1),
+		   (3, 1, 2),
+		   (3, 2, 2),
+		   (3, 2, 3);
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2;
+                          QUERY PLAN                          
+--------------------------------------------------------------
+ Index Only Scan using distinct_abc_a_b_c_idx on distinct_abc
+   Skip scan mode: true
+   Index Cond: (c = 2)
+(3 rows)
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2;
+FETCH ALL FROM c;
+ a | b | c 
+---+---+---
+ 1 | 1 | 2
+ 3 | 1 | 2
+(2 rows)
+
+FETCH BACKWARD ALL FROM c;
+ a | b | c 
+---+---+---
+ 3 | 1 | 2
+ 1 | 1 | 2
+(2 rows)
+
+END;
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2
+ORDER BY a DESC, b DESC;
+                              QUERY PLAN                               
+-----------------------------------------------------------------------
+ Index Only Scan Backward using distinct_abc_a_b_c_idx on distinct_abc
+   Skip scan mode: true
+   Index Cond: (c = 2)
+(3 rows)
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2
+ORDER BY a DESC, b DESC;
+FETCH ALL FROM c;
+ a | b | c 
+---+---+---
+ 3 | 2 | 2
+ 1 | 2 | 2
+(2 rows)
+
+FETCH BACKWARD ALL FROM c;
+ a | b | c 
+---+---+---
+ 1 | 2 | 2
+ 3 | 2 | 2
+(2 rows)
+
+END;
+DROP TABLE distinct_abc;
+-- index skip scan
+SELECT DISTINCT ON (four) four, ten
+FROM tenk1 ORDER BY four;
+ four | ten 
+------+-----
+    0 |   0
+    1 |   9
+    2 |   0
+    3 |   1
+(4 rows)
+
+SELECT DISTINCT ON (four) four, ten
+FROM tenk1 WHERE four = 1 ORDER BY four;
+ four | ten 
+------+-----
+    1 |   9
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four, ten
+FROM tenk1 ORDER BY four;
+              QUERY PLAN              
+--------------------------------------
+ Index Scan using tenk1_four on tenk1
+   Skip scan mode: true
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four, ten
+FROM tenk1 WHERE four = 1 ORDER BY four;
+                    QUERY PLAN                     
+---------------------------------------------------
+ Result
+   ->  Unique
+         ->  Bitmap Heap Scan on tenk1
+               Recheck Cond: (four = 1)
+               ->  Bitmap Index Scan on tenk1_four
+                     Index Cond: (four = 1)
+(6 rows)
+
+-- check colums order
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+ four 
+------
+    0
+    2
+(2 rows)
+
+CREATE INDEX tenk1_four_ten on tenk1 (four, ten);
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+ four 
+------
+    0
+    2
+(2 rows)
+
+SELECT DISTINCT on (four, ten) four, ten FROM tenk1 WHERE four = 0;
+ four | ten 
+------+-----
+    0 |   0
+    0 |   2
+    0 |   4
+    0 |   6
+    0 |   8
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Index Only Scan using tenk1_four_ten on tenk1
+   Skip scan mode: true
+   Index Cond: (ten = 2)
+(3 rows)
+
+-- test uniq_distinct_pathkeys
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT on (four, ten) four, ten FROM tenk1 WHERE four = 0;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Index Only Scan using tenk1_four_ten on tenk1
+   Skip scan mode: true
+   Index Cond: (four = 0)
+(3 rows)
+
+DROP INDEX tenk1_four_ten;
+CREATE INDEX tenk1_ten_four on tenk1 (ten, four);
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+ four 
+------
+    0
+    2
+(2 rows)
+
+SELECT DISTINCT on (four, ten) four, ten FROM tenk1 WHERE ten = 2;
+ four | ten 
+------+-----
+    0 |   2
+    2 |   2
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Index Only Scan using tenk1_ten_four on tenk1
+   Skip scan mode: true
+   Index Cond: (ten = 2)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT on (four, ten) four, ten FROM tenk1 WHERE ten = 2;
+                  QUERY PLAN                   
+-----------------------------------------------
+ Index Only Scan using tenk1_ten_four on tenk1
+   Skip scan mode: true
+   Index Cond: (ten = 2)
+(3 rows)
+
+DROP INDEX tenk1_ten_four;
+-- check projection case
+SELECT DISTINCT four, four FROM tenk1 WHERE ten = 2;
+ four | four 
+------+------
+    0 |    0
+    2 |    2
+(2 rows)
+
+SELECT DISTINCT four, 1 FROM tenk1 WHERE ten = 2;
+ four | ?column? 
+------+----------
+    2 |        1
+    0 |        1
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1;
+                QUERY PLAN                 
+-------------------------------------------
+ Index Only Scan using tenk1_four on tenk1
+   Skip scan mode: true
+(2 rows)
+
+-- test cursor forward/backward movements
+BEGIN;
+DECLARE c SCROLL CURSOR FOR SELECT DISTINCT four FROM tenk1;
+FETCH FROM c;
+ four 
+------
+    0
+(1 row)
+
+FETCH BACKWARD FROM c;
+ four 
+------
+(0 rows)
+
+FETCH 5 FROM c;
+ four 
+------
+    0
+    1
+    2
+    3
+(4 rows)
+
+FETCH BACKWARD 5 FROM c;
+ four 
+------
+    3
+    2
+    1
+    0
+(4 rows)
+
+FETCH 5 FROM c;
+ four 
+------
+    0
+    1
+    2
+    3
+(4 rows)
+
+FETCH BACKWARD 5 FROM c;
+ four 
+------
+    3
+    2
+    1
+    0
+(4 rows)
+
+END;
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index a1c90eb905..bd3b373515 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -78,6 +78,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_hashjoin                | on
  enable_indexonlyscan           | on
  enable_indexscan               | on
+ enable_indexskipscan           | on
  enable_material                | on
  enable_mergejoin               | on
  enable_nestloop                | on
@@ -89,7 +90,7 @@ select name, setting from pg_settings where name like 'enable%';
  enable_seqscan                 | on
  enable_sort                    | on
  enable_tidscan                 | on
-(17 rows)
+(18 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index f96bebf410..a3be42a725 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -26,6 +26,8 @@ CREATE INDEX tenk1_hundred ON tenk1 USING btree(hundred int4_ops);
 
 CREATE INDEX tenk1_thous_tenthous ON tenk1 (thousand, tenthous);
 
+CREATE INDEX tenk1_four ON tenk1 (four);
+
 CREATE INDEX tenk2_unique1 ON tenk2 USING btree(unique1 int4_ops);
 
 CREATE INDEX tenk2_unique2 ON tenk2 USING btree(unique2 int4_ops);
diff --git a/src/test/regress/sql/select_distinct.sql b/src/test/regress/sql/select_distinct.sql
index a605e86449..22222592ee 100644
--- a/src/test/regress/sql/select_distinct.sql
+++ b/src/test/regress/sql/select_distinct.sql
@@ -73,3 +73,166 @@ SELECT 1 IS NOT DISTINCT FROM 2 as "no";
 SELECT 2 IS NOT DISTINCT FROM 2 as "yes";
 SELECT 2 IS NOT DISTINCT FROM null as "no";
 SELECT null IS NOT DISTINCT FROM null as "yes";
+
+-- index only skip scan
+SELECT DISTINCT four FROM tenk1;
+SELECT DISTINCT four FROM tenk1 WHERE four = 1;
+SELECT DISTINCT four FROM tenk1 ORDER BY four DESC;
+
+CREATE TABLE distinct_a (a int, b int, c int);
+INSERT INTO distinct_a (
+    SELECT five, hundred, 10 FROM
+    generate_series(1, 5) five,
+    generate_series(1, 10000) hundred
+);
+CREATE INDEX ON distinct_a (a, b);
+ANALYZE a;
+
+-- test index skip scan with a condition on a non unique field
+SELECT DISTINCT ON (a) a, b FROM distinct_a WHERE b = 2;
+
+-- test index skip scan backwards
+SELECT DISTINCT ON (a) a, b FROM distinct_a ORDER BY a DESC, b DESC;
+
+-- test opposite scan/index directions inside a cursor
+-- forward/backward
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a, b;
+
+FETCH FROM c;
+FETCH BACKWARD FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+END;
+
+-- backward/forward
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b FROM distinct_a ORDER BY a DESC, b DESC;
+
+FETCH FROM c;
+FETCH BACKWARD FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+FETCH 6 FROM c;
+FETCH BACKWARD 6 FROM c;
+
+END;
+
+DROP TABLE distinct_a;
+
+-- test missing values and skipping from the end
+CREATE TABLE distinct_abc(a int, b int, c int);
+CREATE INDEX ON distinct_abc(a, b, c);
+INSERT INTO distinct_abc
+	VALUES (1, 1, 1),
+		   (1, 1, 2),
+		   (1, 2, 2),
+		   (1, 2, 3),
+		   (2, 2, 1),
+		   (2, 2, 3),
+		   (3, 1, 1),
+		   (3, 1, 2),
+		   (3, 2, 2),
+		   (3, 2, 3);
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2;
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2;
+
+FETCH ALL FROM c;
+FETCH BACKWARD ALL FROM c;
+
+END;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2
+ORDER BY a DESC, b DESC;
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR
+SELECT DISTINCT ON (a) a,b,c FROM distinct_abc WHERE c = 2
+ORDER BY a DESC, b DESC;
+
+FETCH ALL FROM c;
+FETCH BACKWARD ALL FROM c;
+
+END;
+
+DROP TABLE distinct_abc;
+
+-- index skip scan
+SELECT DISTINCT ON (four) four, ten
+FROM tenk1 ORDER BY four;
+SELECT DISTINCT ON (four) four, ten
+FROM tenk1 WHERE four = 1 ORDER BY four;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four, ten
+FROM tenk1 ORDER BY four;
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT ON (four) four, ten
+FROM tenk1 WHERE four = 1 ORDER BY four;
+
+-- check colums order
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+
+CREATE INDEX tenk1_four_ten on tenk1 (four, ten);
+
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+SELECT DISTINCT on (four, ten) four, ten FROM tenk1 WHERE four = 0;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+
+-- test uniq_distinct_pathkeys
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT on (four, ten) four, ten FROM tenk1 WHERE four = 0;
+
+DROP INDEX tenk1_four_ten;
+
+CREATE INDEX tenk1_ten_four on tenk1 (ten, four);
+
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+SELECT DISTINCT on (four, ten) four, ten FROM tenk1 WHERE ten = 2;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1 WHERE ten = 2;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT on (four, ten) four, ten FROM tenk1 WHERE ten = 2;
+
+DROP INDEX tenk1_ten_four;
+
+-- check projection case
+SELECT DISTINCT four, four FROM tenk1 WHERE ten = 2;
+SELECT DISTINCT four, 1 FROM tenk1 WHERE ten = 2;
+
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT four FROM tenk1;
+
+-- test cursor forward/backward movements
+BEGIN;
+DECLARE c SCROLL CURSOR FOR SELECT DISTINCT four FROM tenk1;
+
+FETCH FROM c;
+FETCH BACKWARD FROM c;
+
+FETCH 5 FROM c;
+FETCH BACKWARD 5 FROM c;
+
+FETCH 5 FROM c;
+FETCH BACKWARD 5 FROM c;
+
+END;
-- 
2.21.0

Re: Index Skip Scan

Reply via email to