From a3d5879e868cbd4269747b07ecf1b3f16bcd2496 Mon Sep 17 00:00:00 2001
From: Mark Dilger <mark.dilger@enterprisedb.com>
Date: Wed, 16 Jun 2021 21:26:32 -0700
Subject: [PATCH v2] Fixing bug in logical replication

When replicating DML for a table with an index but no REPLICA
IDENTITY, the publisher was crashing with a segmentation fault, at
least under certain conditions.  Adding a fix for that along with a
simple test demonstrating the problem.
---
 src/backend/utils/cache/relcache.c            | 36 +++++++++++-
 .../subscription/t/021_no_replica_identity.pl | 57 +++++++++++++++++++
 2 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 src/test/subscription/t/021_no_replica_identity.pl

diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index fd05615e76..f57a3b1dc8 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5266,8 +5266,42 @@ RelationGetIdentityKeyBitmap(Relation relation)
 	if (indexoidlist == NIL)
 		return NULL;
 
-	/* Add referenced attributes to idindexattrs */
+	/* Fall out if the replica identity index is invalid */
+	if (!OidIsValid(relation->rd_replidindex))
+		return NULL;
+
+	/* Look up the description for the replica identity index */
 	indexDesc = RelationIdGetRelation(relation->rd_replidindex);
+
+	/*
+	 * Fall out if the description is not for an index.  The historic snapshot
+	 * should have been setup before the relation was opened, but if the caller
+	 * managed to set relation->rd_replidindex before taking a snapshot, there
+	 * is a chance that the index was dropped in between those two actions.
+	 * (The replication infrastructure in core should *never* do this, but we
+	 * can't anticipate how third party code might reuse these functions.)  If
+	 * so, the index relation will likely be merely invalid, but in the
+	 * extremely unlikely case that the Oid tracked by rd_replidindex got
+	 * reassigned to something else, we could have just opened a relation of
+	 * type other than index.  Beware that this race condition does not require
+	 * the Oid range to wrap around while we're doing logical replication.  The
+	 * Oid for the index could already have been quite old and nearing the
+	 * point where it would get reused if available.  We perform a half-measure
+	 * to check that the relation opened really is an index, though we do
+	 * nothing to recheck that it is an index assocaited with our table
+	 * relation.  Doing more would require reopening pg_index and rescanning
+	 * which hardly seems worth the performance penalty for something which is
+	 * arguably user error anyway.
+	 */
+	if (!RelationIsValid(indexDesc))
+		return NULL;
+	if (!indexDesc->rd_index)
+	{
+		RelationClose(indexDesc);
+		return NULL;
+	}
+
+	/* Add referenced attributes to idindexattrs */
 	for (i = 0; i < indexDesc->rd_index->indnatts; i++)
 	{
 		int			attrnum = indexDesc->rd_index->indkey.values[i];
diff --git a/src/test/subscription/t/021_no_replica_identity.pl b/src/test/subscription/t/021_no_replica_identity.pl
new file mode 100644
index 0000000000..9a6c8f5f74
--- /dev/null
+++ b/src/test/subscription/t/021_no_replica_identity.pl
@@ -0,0 +1,57 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# Test of logical replication in the absence of a proper replica
+# identity index
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 1;
+
+my $cmd;
+
+my $node_publisher = get_new_node('publisher');
+$node_publisher->init(allows_streaming => 'logical');
+$node_publisher->start;
+
+my $node_subscriber = get_new_node('subscriber');
+$node_subscriber->init(allows_streaming => 'logical');
+$node_subscriber->start;
+
+$cmd = qq(
+CREATE TABLE tbl (i INTEGER);
+CREATE INDEX tbl_idx ON tbl(i);
+);
+
+$node_publisher->safe_psql('postgres', $cmd);
+$node_subscriber->safe_psql('postgres', $cmd);
+
+$cmd = qq(
+CREATE PUBLICATION pub FOR TABLE tbl);
+$node_publisher->safe_psql('postgres', $cmd);
+
+my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
+$cmd = qq(
+CREATE SUBSCRIPTION sub
+	CONNECTION '$publisher_connstr'
+	PUBLICATION pub);
+$node_subscriber->safe_psql('postgres', $cmd);
+
+$node_publisher->wait_for_catchup("sub");
+
+my $synced_query =
+  "SELECT count(1) = 0 FROM pg_subscription_rel WHERE srsubstate NOT IN ('s', 'r');";
+$node_subscriber->poll_query_until('postgres', $synced_query)
+  or die "Timed out while waiting for subscriber to synchronize data";
+
+$cmd = qq(INSERT INTO tbl (i) VALUES (1));
+$node_publisher->safe_psql('postgres', $cmd);
+
+$node_publisher->wait_for_catchup("sub");
+
+is($node_subscriber->safe_psql('postgres', q(SELECT i FROM tbl)),
+   1, "value replicated to subscriber without replica identity index");
+
+$node_subscriber->stop('smart');
+$node_publisher->stop('smart');
-- 
2.21.1 (Apple Git-122.3)

