From 0cb648eeeb064aac2f36b4c5de02c3c3e7770d2f Mon Sep 17 00:00:00 2001
From: Amit Kapila <akapila@postgresql.org>
Date: Thu, 20 May 2021 11:04:37 +0530
Subject: [PATCH v13] Fix deadlock for multiple replicating truncates of the
 same table.

While applying the truncate change, the logical apply worker acquires
RowExclusiveLock on the relation being truncated. This allowed truncate on
the relation at a time by two apply workers which lead to a deadlock. The
reason was that one of the workers after updating the pg_class tuple tries
to acquire SHARE lock on the relation and started to wait for the second
worker which has acquired RowExclusiveLock on the relation. And when the
second worker tries to update the pg_class tuple, it starts to wait for
the first worker which leads to a deadlock. Fix it by acquiring
AccessExclusiveLock on the relation before applying the truncate change as
we do for normal truncate operation.

Author: Peter Smith, test case by Haiying Tang
Reviewed-by: Dilip Kumar, Amit Kapila
Backpatch-through: 11
Discussion: https://postgr.es/m/CAHut+PsNm43p0jM+idTvWwiGZPcP0hGrHMPK9TOAkc+a4UpUqw@mail.gmail.com
---
 src/backend/replication/logical/worker.c |  9 +++---
 src/test/subscription/t/010_truncate.pl  | 53 +++++++++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index ff887ea..e25ad67 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -1248,6 +1248,7 @@ apply_handle_truncate(StringInfo s)
 	List	   *relids = NIL;
 	List	   *relids_logged = NIL;
 	ListCell   *lc;
+	LOCKMODE    lockmode = AccessExclusiveLock;
 
 	ensure_transaction();
 
@@ -1258,14 +1259,14 @@ apply_handle_truncate(StringInfo s)
 		LogicalRepRelId relid = lfirst_oid(lc);
 		LogicalRepRelMapEntry *rel;
 
-		rel = logicalrep_rel_open(relid, RowExclusiveLock);
+		rel = logicalrep_rel_open(relid, lockmode);
 		if (!should_apply_changes_for_rel(rel))
 		{
 			/*
 			 * The relation can't become interesting in the middle of the
 			 * transaction so it's safe to unlock it.
 			 */
-			logicalrep_rel_close(rel, RowExclusiveLock);
+			logicalrep_rel_close(rel, lockmode);
 			continue;
 		}
 
@@ -1283,7 +1284,7 @@ apply_handle_truncate(StringInfo s)
 		{
 			ListCell   *child;
 			List	   *children = find_all_inheritors(rel->localreloid,
-													   RowExclusiveLock,
+													   lockmode,
 													   NULL);
 
 			foreach(child, children)
@@ -1303,7 +1304,7 @@ apply_handle_truncate(StringInfo s)
 				 */
 				if (RELATION_IS_OTHER_TEMP(childrel))
 				{
-					table_close(childrel, RowExclusiveLock);
+					table_close(childrel, lockmode);
 					continue;
 				}
 
diff --git a/src/test/subscription/t/010_truncate.pl b/src/test/subscription/t/010_truncate.pl
index be2c0bd..1f3719c 100644
--- a/src/test/subscription/t/010_truncate.pl
+++ b/src/test/subscription/t/010_truncate.pl
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use PostgresNode;
 use TestLib;
-use Test::More tests => 9;
+use Test::More tests => 12;
 
 # setup
 
@@ -13,6 +13,8 @@ $node_publisher->start;
 
 my $node_subscriber = get_new_node('subscriber');
 $node_subscriber->init(allows_streaming => 'logical');
+$node_subscriber->append_conf('postgresql.conf',
+	qq(max_logical_replication_workers = 6));
 $node_subscriber->start;
 
 my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
@@ -158,3 +160,52 @@ is($result, qq(0||), 'truncate of multiple tables some not published');
 $result = $node_subscriber->safe_psql('postgres',
 	"SELECT count(*), min(a), max(a) FROM tab2");
 is($result, qq(3|1|3), 'truncate of multiple tables some not published');
+
+# test that truncate works for logical replication when there are multiple
+# subscriptions for a single table
+
+$node_publisher->safe_psql('postgres',
+	"CREATE TABLE tab5 (a int)");
+
+$node_subscriber->safe_psql('postgres',
+	"CREATE TABLE tab5 (a int)");
+
+$node_publisher->safe_psql('postgres',
+	"CREATE PUBLICATION pub5 FOR TABLE tab5");
+$node_subscriber->safe_psql('postgres',
+	"CREATE SUBSCRIPTION sub5_1 CONNECTION '$publisher_connstr' PUBLICATION pub5"
+);
+$node_subscriber->safe_psql('postgres',
+	"CREATE SUBSCRIPTION sub5_2 CONNECTION '$publisher_connstr' PUBLICATION pub5"
+);
+
+# wait for initial data sync
+$node_subscriber->poll_query_until('postgres', $synced_query)
+  or die "Timed out while waiting for subscriber to synchronize data";
+
+# insert data to truncate
+
+$node_publisher->safe_psql('postgres',
+	"INSERT INTO tab5 VALUES (1), (2), (3)");
+
+$node_publisher->wait_for_catchup('sub5_1');
+$node_publisher->wait_for_catchup('sub5_2');
+
+$result = $node_subscriber->safe_psql('postgres',
+	"SELECT count(*), min(a), max(a) FROM tab5");
+is($result, qq(6|1|3), 'insert replicated for multiple subscriptions');
+
+$node_publisher->safe_psql('postgres', "TRUNCATE tab5");
+
+$node_publisher->wait_for_catchup('sub5_1');
+$node_publisher->wait_for_catchup('sub5_2');
+
+$result = $node_subscriber->safe_psql('postgres',
+	"SELECT count(*), min(a), max(a) FROM tab5");
+is($result, qq(0||),
+	'truncate replicated for multiple subscriptions');
+
+# check deadlocks
+$result = $node_subscriber->safe_psql('postgres',
+	"SELECT deadlocks FROM pg_stat_database WHERE datname='postgres'");
+is($result, qq(0), 'no deadlocks detected');
-- 
1.8.3.1

