Re: [PATCH] Support SK_SEARCHNULL / SK_SEARCHNOTNULL for heap-only scans

Jacob Champion Wed, 19 Jul 2023 16:44:57 -0700

On Mon, Feb 27, 2023 at 12:24 AM Heikki Linnakangas <hlinn...@iki.fi> wrote:
> On 22/02/2023 15:03, Aleksander Alekseev wrote:
> > If memory serves I noticed that WHERE ... IS NULL queries don't even
> > hit HeapKeyTest() and I was curious where the check for NULLs is
> > actually made. As I understand, SeqNext() in nodeSeqscan.c simply
> > iterates over all the tuples it can find and pushes them to the parent
> > node. We could get a slightly better performance for certain queries
> > if SeqNext() did the check internally.
>
> Right, it might be faster to perform the NULL-checks before checking
> visibility, for example. Arbitrary quals cannot be evaluated before
> checking visibility, but NULL checks could be.


Hi Heikki,

There's quite a bit of work left to do, but I wanted to check if the
attached patch (0002, based on top of Aleks' 0001 from upthread) was
going in the direction you were thinking. This patch pushes down any
forced-null and not-null Vars as ScanKeys. It doesn't remove the
redundant quals after turning them into ScanKeys, so it's needlessly
inefficient, but there's still a decent speedup for some of the basic
benchmarks in 0003.

Plans look something like this:

# EXPLAIN SELECT * FROM t WHERE i IS NULL;
                         QUERY PLAN
------------------------------------------------------------
 Seq Scan on t  (cost=0.00..1393.00 rows=49530 width=4)
   Scan Cond: (i IS NULL)
   Filter: (i IS NULL)
(3 rows)

# EXPLAIN SELECT * FROM t WHERE i = 3;
                       QUERY PLAN
--------------------------------------------------------
 Seq Scan on t  (cost=0.00..1643.00 rows=1 width=4)
   Scan Cond: (i IS NOT NULL)
   Filter: (i = 3)
(3 rows)

The non-nullable case worries me a bit because so many things imply IS
NOT NULL. I think I need to do some sort of cost analysis using the
null_frac statistics -- it probably only makes sense to push an
implicit SK_SEARCHNOTNULL down to the AM layer if some fraction of
rows would actually be filtered out -- but I'm not really sure how to
choose a threshold.

It would also be neat if `COUNT(col)` could push down
SK_SEARCHNOTNULL, but I think that would require a new support
function to rewrite the plan for an aggregate.

Am I on the right track?

Thanks,
--Jacob

From 45ed1948b6aac4fc8a268a77211327786fd4315f Mon Sep 17 00:00:00 2001
From: Jacob Champion <jchamp...@timescale.com>
Date: Tue, 6 Jun 2023 15:57:19 -0700
Subject: [PATCH 3/3] WIP: naive benchmarks

Three benchmarks (bench1-3) for varying degrees of selectivity depending
on the table used. Two benchmarks (bench4-5) for 1.0 selectivity, on
both a single column and a larger number of columns.

$ psql -f ./init postgres
$ for i in 1 2 3 4; do for t in zero twenty fifty eighty hundred; do
    echo "= $i: $t ="
    pgbench -n -f ./bench$i -T5 -c1 -j1 -Dtable=$t postgres
done; done
$ pgbench -n -f ./bench5 -T5 -c1 -j1 postgres
---
 bench1 |  1 +
 bench2 |  1 +
 bench3 |  1 +
 bench4 |  1 +
 bench5 |  4 ++++
 init   | 37 +++++++++++++++++++++++++++++++++++++
 6 files changed, 45 insertions(+)
 create mode 100644 bench1
 create mode 100644 bench2
 create mode 100644 bench3
 create mode 100644 bench4
 create mode 100644 bench5
 create mode 100755 init

diff --git a/bench1 b/bench1
new file mode 100644
index 0000000000..9cb32d4fcb
--- /dev/null
+++ b/bench1
@@ -0,0 +1 @@
+SELECT COUNT(i) FROM :table;
diff --git a/bench2 b/bench2
new file mode 100644
index 0000000000..1377bae0f5
--- /dev/null
+++ b/bench2
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM :table WHERE i IS NOT NULL;
diff --git a/bench3 b/bench3
new file mode 100644
index 0000000000..524a140c0a
--- /dev/null
+++ b/bench3
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM :table WHERE i IS NULL;
diff --git a/bench4 b/bench4
new file mode 100644
index 0000000000..c8216537db
--- /dev/null
+++ b/bench4
@@ -0,0 +1 @@
+SELECT COUNT(*) FROM :table WHERE i > 0;
diff --git a/bench5 b/bench5
new file mode 100644
index 0000000000..90a77741f6
--- /dev/null
+++ b/bench5
@@ -0,0 +1,4 @@
+SELECT COUNT(*) FROM wide
+ WHERE i <> 0 AND j <> 0 AND k <> 0 AND l <> 0 AND m <> 0 AND n <> 0 AND o <> 0
+   AND p <> 0 AND q <> 0 AND r <> 0 AND s <> 0 AND t <> 0 AND u <> 0 AND v <> 0
+   AND w <> 0 AND x <> 0 AND y <> 0 AND z <> 0;
diff --git a/init b/init
new file mode 100755
index 0000000000..98c4a0acdb
--- /dev/null
+++ b/init
@@ -0,0 +1,37 @@
+-- Tables are named after the percentage of values that are non-NULL.
+DROP TABLE IF EXISTS zero;
+DROP TABLE IF EXISTS twenty;
+DROP TABLE IF EXISTS fifty;
+DROP TABLE IF EXISTS eighty;
+DROP TABLE IF EXISTS hundred;
+
+DROP TABLE IF EXISTS wide;
+
+\if :{?scale}
+\else
+  \set scale 1
+\endif
+
+CREATE TABLE zero AS
+  SELECT NULL::int AS i FROM generate_series(1, 100000 * :scale);
+
+CREATE TABLE twenty AS
+  SELECT CASE WHEN random() < 0.2 THEN i ELSE NULL END AS i
+    FROM generate_series(1, 100000 * :scale) i;
+
+CREATE TABLE fifty AS
+  SELECT CASE WHEN random() < 0.5 THEN i ELSE NULL END AS i
+    FROM generate_series(1, 100000 * :scale) i;
+
+CREATE TABLE eighty AS
+  SELECT CASE WHEN random() < 0.8 THEN i ELSE NULL END AS i
+    FROM generate_series(1, 100000 * :scale) i;
+
+CREATE TABLE hundred AS
+  SELECT i FROM generate_series(1, 100000 * :scale) i;
+
+CREATE TABLE wide AS
+  SELECT i, 2 AS j, 3 AS k, 4 AS l, 5 AS m, 6 AS n, 7 AS o, 8 AS p, 9 AS q,
+         10 AS r, 11 AS s, 12 AS t, 13 AS u, 14 AS v, 15 AS w, 16 AS x, 17 AS y,
+		 18 AS z
+    FROM generate_series(1, 100000 * :scale) i;
-- 
2.25.1

From 4d904883a68dc2025716948b816e43cb847afe72 Mon Sep 17 00:00:00 2001
From: Aleksander Alekseev <aleksan...@timescale.com>
Date: Mon, 13 Feb 2023 16:15:45 +0300
Subject: [PATCH 1/3] Support SK_SEARCHNULL / SK_SEARCHNOTNULL for heap-only
 scans

Previously it was not supported which could be of inconvenience for the
extension authors.

Author: Aleksander Alekseev
Reviewed-by: Andres Freund
Discussion: https://postgr.es/m/caj7c6tpkeh7uwen9orqp_dmr8uxifhxt8pecq01zw1hkptb...@mail.gmail.com
---
 src/include/access/skey.h                |  7 +--
 src/include/access/valid.h               | 41 ++++++++++-----
 src/test/regress/expected/heapscan.out   | 33 ++++++++++++
 src/test/regress/expected/test_setup.out | 13 +++++
 src/test/regress/parallel_schedule       |  2 +-
 src/test/regress/regress.c               | 67 ++++++++++++++++++++++++
 src/test/regress/sql/heapscan.sql        | 12 +++++
 src/test/regress/sql/test_setup.sql      | 16 ++++++
 8 files changed, 175 insertions(+), 16 deletions(-)
 create mode 100644 src/test/regress/expected/heapscan.out
 create mode 100644 src/test/regress/sql/heapscan.sql

diff --git a/src/include/access/skey.h b/src/include/access/skey.h
index fbdb23c5c7..81b0530277 100644
--- a/src/include/access/skey.h
+++ b/src/include/access/skey.h
@@ -46,9 +46,10 @@
  * and the sk_strategy, sk_subtype, sk_collation, and sk_func fields are
  * not used (unless set by the index AM).
  *
- * SK_SEARCHARRAY, SK_SEARCHNULL and SK_SEARCHNOTNULL are supported only
- * for index scans, not heap scans; and not all index AMs support them,
- * only those that set amsearcharray or amsearchnulls respectively.
+ * SK_SEARCHARRAY is supported only for index scans, not heap scans; and not all
+ * index AMs support it, only those that set amsearcharray. SK_SEARCHNULL and
+ * SK_SEARCHNOTNULL are supported for heap and index scans but similarly not all
+ * index AMs support them, only those that set amsearchnulls.
  *
  * A ScanKey can also represent an ordering operator invocation, that is
  * an ordering requirement "ORDER BY indexedcol op constant".  This looks
diff --git a/src/include/access/valid.h b/src/include/access/valid.h
index 85d476aab5..477ac75242 100644
--- a/src/include/access/valid.h
+++ b/src/include/access/valid.h
@@ -32,24 +32,41 @@ HeapKeyTest(HeapTuple tuple, TupleDesc tupdesc, int nkeys, ScanKey keys)
 
 	for (; cur_nkeys--; cur_key++)
 	{
-		Datum		atp;
 		bool		isnull;
-		Datum		test;
 
-		if (cur_key->sk_flags & SK_ISNULL)
-			return false;
+		if (cur_key->sk_flags & (SK_SEARCHNULL | SK_SEARCHNOTNULL))
+		{
+			/* special case: looking for NULL / NOT NULL values */
+			Assert(cur_key->sk_flags & SK_ISNULL);
 
-		atp = heap_getattr(tuple, cur_key->sk_attno, tupdesc, &isnull);
+			isnull = heap_attisnull(tuple, cur_key->sk_attno, tupdesc);
 
-		if (isnull)
-			return false;
+			if (isnull && (cur_key->sk_flags & SK_SEARCHNOTNULL))
+				return false;
 
-		test = FunctionCall2Coll(&cur_key->sk_func,
-								 cur_key->sk_collation,
-								 atp, cur_key->sk_argument);
+			if (!isnull && (cur_key->sk_flags & SK_SEARCHNULL))
+				return false;
+		}
+		else
+		{
+			Datum		atp;
+			Datum		test;
 
-		if (!DatumGetBool(test))
-			return false;
+			if (cur_key->sk_flags & SK_ISNULL)
+				return false;
+
+			atp = heap_getattr(tuple, cur_key->sk_attno, tupdesc, &isnull);
+
+			if (isnull)
+				return false;
+
+			test = FunctionCall2Coll(&cur_key->sk_func,
+									 cur_key->sk_collation,
+									 atp, cur_key->sk_argument);
+
+			if (!DatumGetBool(test))
+				return false;
+		}
 	}
 
 	return true;
diff --git a/src/test/regress/expected/heapscan.out b/src/test/regress/expected/heapscan.out
new file mode 100644
index 0000000000..055bf9bc62
--- /dev/null
+++ b/src/test/regress/expected/heapscan.out
@@ -0,0 +1,33 @@
+-- make sure that initially the table is empty
+SELECT * FROM phonebook;
+ id | name | phone 
+----+------+-------
+(0 rows)
+
+SELECT phonebook_find_first_phone(isnull => false);
+ phonebook_find_first_phone 
+----------------------------
+                         -1
+(1 row)
+
+SELECT phonebook_find_first_phone(isnull => true);
+ phonebook_find_first_phone 
+----------------------------
+                         -1
+(1 row)
+
+INSERT INTO phonebook (id, name, phone) VALUES
+(1, 'Alice', 123456),
+(2, 'Bob', NULL);
+SELECT phonebook_find_first_phone(isnull => false);
+ phonebook_find_first_phone 
+----------------------------
+                          1
+(1 row)
+
+SELECT phonebook_find_first_phone(isnull => true);
+ phonebook_find_first_phone 
+----------------------------
+                          2
+(1 row)
+
diff --git a/src/test/regress/expected/test_setup.out b/src/test/regress/expected/test_setup.out
index 5d9e6bf12b..70db759697 100644
--- a/src/test/regress/expected/test_setup.out
+++ b/src/test/regress/expected/test_setup.out
@@ -213,6 +213,19 @@ CREATE FUNCTION get_columns_length(oid[])
     RETURNS int
     AS :'regresslib'
     LANGUAGE C STRICT STABLE PARALLEL SAFE;
+--
+-- These table and function are used for testing the support of SK_SEARCHNULL
+-- and SK_SEARCHNOTNULL flags for heap-only scans.
+--
+CREATE TABLE phonebook(
+    id INT PRIMARY KEY NOT NULL,
+    name NAME NOT NULL,
+    phone INT /* nullable */
+);
+CREATE FUNCTION phonebook_find_first_phone(isnull bool)
+    RETURNS int
+    AS :'regresslib', 'phonebook_find_first_phone'
+    LANGUAGE C;
 -- Use hand-rolled hash functions and operator classes to get predictable
 -- result on different machines.  The hash function for int4 simply returns
 -- the sum of the values passed to it and the one for text returns the length
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 4df9d8503b..d0eb5433eb 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -119,7 +119,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr
 # The stats test resets stats, so nothing else needing stats access can be in
 # this group.
 # ----------
-test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats
+test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats heapscan
 
 # event_trigger depends on create_am and cannot run concurrently with
 # any test that runs DDL
diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c
index bcbc6d910f..c7f2eed699 100644
--- a/src/test/regress/regress.c
+++ b/src/test/regress/regress.c
@@ -20,6 +20,7 @@
 #include <signal.h>
 
 #include "access/detoast.h"
+#include "access/heapam.h"
 #include "access/htup_details.h"
 #include "access/transam.h"
 #include "access/xact.h"
@@ -45,6 +46,7 @@
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
+#include "utils/snapmgr.h"
 #include "utils/typcache.h"
 
 #define EXPECT_TRUE(expr)	\
@@ -1262,3 +1264,68 @@ get_columns_length(PG_FUNCTION_ARGS)
 
 	PG_RETURN_INT32(column_offset);
 }
+
+/*
+ * Used for testing SK_SEARCHNULL/SK_SEARCHNOTNULL support for heap-only scans.
+ *
+ * Returns the primary key of the first row found in the "phonebook" table for
+ * which "phone" is NULL or NOT NULL, depending on the "isnull" argument.
+ *
+ * Returns -1 if nothing was found.
+ */
+PG_FUNCTION_INFO_V1(phonebook_find_first_phone);
+
+typedef enum Anum_phonebook
+{
+	Anum_phonebook_id = 1,
+	Anum_phonebook_name,
+	Anum_phonebook_phone,
+	_Anum_phonebook_max,
+}			Anum_phonebook;
+
+#define Natts_phonebook (_Anum_phonebook_max - 1)
+
+Datum
+phonebook_find_first_phone(PG_FUNCTION_ARGS)
+{
+	bool		isnull = PG_GETARG_BOOL(0);
+	int			flags = SK_ISNULL;
+	int32		found_id = -1;
+	Relation	rel;
+	HeapTuple	tup;
+	TableScanDesc scan;
+	Datum		tbl_oid_datum;
+	ScanKeyData scanKey;
+
+	flags |= isnull ? SK_SEARCHNULL : SK_SEARCHNOTNULL;
+	ScanKeyEntryInitialize(
+						   &scanKey,
+						   flags,
+						   Anum_phonebook_phone,
+						   InvalidStrategy, /* no strategy */
+						   InvalidOid,	/* no strategy subtype */
+						   InvalidOid,	/* no collation */
+						   InvalidOid,	/* no reg proc for this */
+						   (Datum) 0);	/* constant */
+
+	tbl_oid_datum = DirectFunctionCall1(to_regclass,
+										CStringGetTextDatum("phonebook"));
+
+	rel = table_open(DatumGetObjectId(tbl_oid_datum), AccessShareLock);
+	scan = table_beginscan(rel, GetTransactionSnapshot(), 1, &scanKey);
+
+	while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
+	{
+		Datum		values[Natts_phonebook];
+		bool		isnull[Natts_phonebook];
+
+		heap_deform_tuple(tup, RelationGetDescr(rel), values, isnull);
+		found_id = DatumGetInt32(values[Anum_phonebook_id - 1]);
+		break;
+	}
+
+	table_endscan(scan);
+	table_close(rel, AccessShareLock);
+
+	PG_RETURN_INT32(found_id);
+}
diff --git a/src/test/regress/sql/heapscan.sql b/src/test/regress/sql/heapscan.sql
new file mode 100644
index 0000000000..de6528f3dd
--- /dev/null
+++ b/src/test/regress/sql/heapscan.sql
@@ -0,0 +1,12 @@
+-- make sure that initially the table is empty
+SELECT * FROM phonebook;
+
+SELECT phonebook_find_first_phone(isnull => false);
+SELECT phonebook_find_first_phone(isnull => true);
+
+INSERT INTO phonebook (id, name, phone) VALUES
+(1, 'Alice', 123456),
+(2, 'Bob', NULL);
+
+SELECT phonebook_find_first_phone(isnull => false);
+SELECT phonebook_find_first_phone(isnull => true);
\ No newline at end of file
diff --git a/src/test/regress/sql/test_setup.sql b/src/test/regress/sql/test_setup.sql
index 1b2d434683..25b16e75d6 100644
--- a/src/test/regress/sql/test_setup.sql
+++ b/src/test/regress/sql/test_setup.sql
@@ -262,6 +262,22 @@ CREATE FUNCTION get_columns_length(oid[])
     AS :'regresslib'
     LANGUAGE C STRICT STABLE PARALLEL SAFE;
 
+--
+-- These table and function are used for testing the support of SK_SEARCHNULL
+-- and SK_SEARCHNOTNULL flags for heap-only scans.
+--
+
+CREATE TABLE phonebook(
+    id INT PRIMARY KEY NOT NULL,
+    name NAME NOT NULL,
+    phone INT /* nullable */
+);
+
+CREATE FUNCTION phonebook_find_first_phone(isnull bool)
+    RETURNS int
+    AS :'regresslib', 'phonebook_find_first_phone'
+    LANGUAGE C;
+
 -- Use hand-rolled hash functions and operator classes to get predictable
 -- result on different machines.  The hash function for int4 simply returns
 -- the sum of the values passed to it and the one for text returns the length
-- 
2.25.1

From 49e93bda7ddc25d23b3b2d66bb65c9b3db256ee0 Mon Sep 17 00:00:00 2001
From: Jacob Champion <jchamp...@timescale.com>
Date: Wed, 7 Jun 2023 16:42:17 -0700
Subject: [PATCH 2/3] WIP: create ScanKeys from derived null tests

...for pushdown to the TableAM layer.

During the scan fixup, we have to also adjust the new splan->keyexprs
list. Otherwise our Vars won't reference the correct RTEs.

TODO:
- quals made redundant with the ScanKeys need to be removed
- costing
---
 src/backend/commands/explain.c          | 12 +++++-
 src/backend/executor/nodeSeqscan.c      | 53 +++++++++++++++++++++++-
 src/backend/optimizer/plan/createplan.c | 55 ++++++++++++++++++++++++-
 src/backend/optimizer/plan/setrefs.c    |  3 ++
 src/include/nodes/execnodes.h           |  2 +
 src/include/nodes/plannodes.h           |  1 +
 6 files changed, 122 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 8570b14f62..622d0615bc 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1821,12 +1821,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (es->analyze)
 				show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
 			break;
+		case T_SeqScan:
+			if (((SeqScan *) plan)->keyexprs)
+				show_scan_qual(((SeqScan *) plan)->keyexprs, "Scan Cond",
+							   planstate, ancestors, es);
+			show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+			if (plan->qual)
+				show_instrumentation_count("Rows Removed by Filter", 1,
+										   planstate, es);
+			break;
 		case T_SampleScan:
 			show_tablesample(((SampleScan *) plan)->tablesample,
 							 planstate, ancestors, es);
-			/* fall through to print additional fields the same as SeqScan */
+			/* fall through to print additional fields the same as basic scans */
 			/* FALLTHROUGH */
-		case T_SeqScan:
 		case T_ValuesScan:
 		case T_CteScan:
 		case T_NamedTuplestoreScan:
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index 4da0f28f7b..67ff9d1482 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -28,6 +28,7 @@
 #include "postgres.h"
 
 #include "access/relscan.h"
+#include "access/skey.h"
 #include "access/tableam.h"
 #include "executor/execdebug.h"
 #include "executor/nodeSeqscan.h"
@@ -70,7 +71,7 @@ SeqNext(SeqScanState *node)
 		 */
 		scandesc = table_beginscan(node->ss.ss_currentRelation,
 								   estate->es_snapshot,
-								   0, NULL);
+								   node->nkeys, node->scankeys);
 		node->ss.ss_currentScanDesc = scandesc;
 	}
 
@@ -91,6 +92,8 @@ SeqRecheck(SeqScanState *node, TupleTableSlot *slot)
 	/*
 	 * Note that unlike IndexScan, SeqScan never use keys in heap_beginscan
 	 * (and this is very bad) - so, here we do not check are keys ok or not.
+	 *
+	 * TODO: so I guess this isn't true anymore.
 	 */
 	return true;
 }
@@ -114,6 +117,48 @@ ExecSeqScan(PlanState *pstate)
 					(ExecScanRecheckMtd) SeqRecheck);
 }
 
+static void
+create_scankeys(SeqScanState *scanstate, List *keyexprs)
+{
+	struct ScanKeyData *keys;
+	int			nkeys;
+	ListCell   *l;
+
+	nkeys = list_length(keyexprs);
+	keys = palloc(sizeof(struct ScanKeyData) * nkeys);
+
+	foreach(l, keyexprs)
+	{
+		NullTest   *n;
+		Var		   *var;
+		int			typeflag;
+		int			i = foreach_current_index(l);
+
+		/*
+		 * create_seqscan_plan() only puts NullTests of Vars into the SeqScan's
+		 * key clauses, so that's all we handle here.
+		 */
+		n = lfirst_node(NullTest, l);
+		var = (Var *) n->arg;
+		Assert(IsA(var, Var));
+
+		typeflag = (n->nulltesttype == IS_NULL) ? SK_SEARCHNULL
+												: SK_SEARCHNOTNULL;
+
+		Assert(i < nkeys);
+		ScanKeyEntryInitialize(&keys[i],
+							   SK_ISNULL | typeflag,
+							   var->varattno,
+							   InvalidStrategy,
+							   InvalidOid,
+							   InvalidOid,
+							   InvalidOid,
+							   (Datum) 0);
+	}
+
+	scanstate->nkeys = nkeys;
+	scanstate->scankeys = keys;
+}
 
 /* ----------------------------------------------------------------
  *		ExecInitSeqScan
@@ -171,6 +216,12 @@ ExecInitSeqScan(SeqScan *node, EState *estate, int eflags)
 	scanstate->ss.ps.qual =
 		ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
 
+	/*
+	 * Populate scankeys, if necessary.
+	 */
+	if (node->keyexprs)
+		create_scankeys(scanstate, node->keyexprs);
+
 	return scanstate;
 }
 
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index af48109058..c7cfe59ea9 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -177,7 +177,8 @@ static void copy_generic_path_info(Plan *dest, Path *src);
 static void copy_plan_costsize(Plan *dest, Plan *src);
 static void label_sort_with_costsize(PlannerInfo *root, Sort *plan,
 									 double limit_tuples);
-static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid);
+static SeqScan *make_seqscan(List *qptlist, List *qpqual, List *scankeys,
+							 Index scanrelid);
 static SampleScan *make_samplescan(List *qptlist, List *qpqual, Index scanrelid,
 								   TableSampleClause *tsc);
 static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid,
@@ -2885,6 +2886,47 @@ create_limit_plan(PlannerInfo *root, LimitPath *best_path, int flags)
  *****************************************************************************/
 
 
+/*
+ * reconstruct_null_tests
+ *   Creates a list of NullTests, one for each Var in varset, which is a
+ *   multibitmapset of varno/varattno. Whole-row Vars are omitted.
+ */
+static List *
+reconstruct_null_tests(List *tests, NullTestType type, List *varset)
+{
+	ListCell   *lc;
+
+	foreach(lc, varset)
+	{
+		Bitmapset  *varattnos = lfirst_node(Bitmapset, lc);
+		int			i = -1;
+
+		while ((i = bms_next_member(varattnos, i)) >= 0)
+		{
+			AttrNumber	varattno = i + FirstLowInvalidHeapAttributeNumber;
+			Var		   *var;
+			NullTest   *n;
+
+			if (varattno == 0)
+				continue; /* skip whole-row vars */
+
+			var = makeNode(Var);
+			var->varno = foreach_current_index(lc);
+			var->varattno = varattno;
+
+			n = makeNode(NullTest);
+			n->arg = (Expr *) var;
+			n->nulltesttype = type;
+			n->location = -1;
+
+			tests = lappend(tests, n);
+		}
+	}
+
+	return tests;
+}
+
+
 /*
  * create_seqscan_plan
  *	 Returns a seqscan plan for the base relation scanned by 'best_path'
@@ -2896,6 +2938,7 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
 {
 	SeqScan    *scan_plan;
 	Index		scan_relid = best_path->parent->relid;
+	List	   *keyexprs = NIL;
 
 	/* it should be a base rel... */
 	Assert(scan_relid > 0);
@@ -2914,8 +2957,16 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path,
 			replace_nestloop_params(root, (Node *) scan_clauses);
 	}
 
+	keyexprs =
+		reconstruct_null_tests(keyexprs, IS_NULL,
+							   find_forced_null_vars((Node *) scan_clauses));
+	keyexprs =
+		reconstruct_null_tests(keyexprs, IS_NOT_NULL,
+							   find_nonnullable_vars((Node *) scan_clauses));
+
 	scan_plan = make_seqscan(tlist,
 							 scan_clauses,
+							 keyexprs,
 							 scan_relid);
 
 	copy_generic_path_info(&scan_plan->scan.plan, best_path);
@@ -5460,6 +5511,7 @@ bitmap_subplan_mark_shared(Plan *plan)
 static SeqScan *
 make_seqscan(List *qptlist,
 			 List *qpqual,
+			 List *keyexprs,
 			 Index scanrelid)
 {
 	SeqScan    *node = makeNode(SeqScan);
@@ -5470,6 +5522,7 @@ make_seqscan(List *qptlist,
 	plan->lefttree = NULL;
 	plan->righttree = NULL;
 	node->scan.scanrelid = scanrelid;
+	node->keyexprs = keyexprs;
 
 	return node;
 }
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 97fa561e4e..2b58b6c2b3 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -632,6 +632,9 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 				splan->scan.plan.qual =
 					fix_scan_list(root, splan->scan.plan.qual,
 								  rtoffset, NUM_EXEC_QUAL(plan));
+				splan->keyexprs =
+					fix_scan_list(root, splan->keyexprs,
+								  rtoffset, NUM_EXEC_QUAL(plan));
 			}
 			break;
 		case T_SampleScan:
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index cb714f4a19..d015a3a127 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1485,6 +1485,8 @@ typedef struct SeqScanState
 {
 	ScanState	ss;				/* its first field is NodeTag */
 	Size		pscan_len;		/* size of parallel heap scan descriptor */
+	struct ScanKeyData *scankeys;
+	int			nkeys;
 } SeqScanState;
 
 /* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 1b787fe031..b6dbf64d1e 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -394,6 +394,7 @@ typedef struct Scan
 typedef struct SeqScan
 {
 	Scan		scan;
+	List	   *keyexprs;		/* expressions to push down as ScanKeys */
 } SeqScan;
 
 /* ----------------
-- 
2.25.1

Re: [PATCH] Support SK_SEARCHNULL / SK_SEARCHNOTNULL for heap-only scans

Reply via email to