From afbcde937f09a47115fd7241b632999274f89b8e Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Tue, 27 Oct 2020 14:21:21 +0500
Subject: [PATCH v3] Disallow cancelation of syncronous commit

Currently we allow to cancel awaiting of syncronous commit.
Some drivers cancel query after timeout. If application will retry
idempotent query, it will get confirmation of written data.
This can lead to split-brain in HA scenarios. To prevent it this
we add synchronous_commit_cancelation setting disalowing cancelation
of syncronous replication wait
---
 src/backend/access/transam/xact.c |  1 +
 src/backend/replication/syncrep.c | 15 +++++++++++----
 src/backend/utils/misc/guc.c      |  9 +++++++++
 src/include/access/xact.h         |  3 +++
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 9cd0b7c11b..61d0c4555d 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -81,6 +81,7 @@ bool		DefaultXactDeferrable = false;
 bool		XactDeferrable;
 
 int			synchronous_commit = SYNCHRONOUS_COMMIT_ON;
+bool		synchronous_commit_cancelation = true;
 
 /*
  * CheckXidAlive is a xid value pointing to a possibly ongoing (sub)
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index 6e8c76537a..3c005a24ba 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -282,11 +282,18 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
 		if (QueryCancelPending)
 		{
 			QueryCancelPending = false;
+			if (synchronous_commit_cancelation)
+			{
+				ereport(WARNING,
+						(errmsg("canceling wait for synchronous replication due to user request"),
+						 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
+				SyncRepCancelWait();
+				break;
+			}
+
 			ereport(WARNING,
-					(errmsg("canceling wait for synchronous replication due to user request"),
-					 errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
-			SyncRepCancelWait();
-			break;
+					(errmsg("canceling wait for synchronous replication due requested, but cancelation is not allowed"),
+					 errdetail("The COMMIT record has already flushed to WAL locally and might not have been replicated to the standby. We must wait here.")));
 		}
 
 		/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index dabcbb0736..b174e6d471 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1289,6 +1289,15 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"synchronous_commit_cancelation", PGC_USERSET, WAL_SETTINGS,
+			gettext_noop("Allow to cancel waiting for replication of transaction commited localy."),
+			NULL
+		},
+		&synchronous_commit_cancelation,
+		true, NULL, NULL, NULL
+	},
+
 	{
 		{"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
 			gettext_noop("Logs each checkpoint."),
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 7320de345c..9d9a6877d5 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -86,6 +86,9 @@ extern int	synchronous_commit;
 extern PGDLLIMPORT TransactionId CheckXidAlive;
 extern PGDLLIMPORT bool bsysscan;
 
+/* Allow cancelation of queries waiting for sync replication but commited locally */
+extern bool synchronous_commit_cancelation;
+
 /*
  * Miscellaneous flag bits to record events which occur on the top level
  * transaction. These flags are only persisted in MyXactFlags and are intended
-- 
2.24.3 (Apple Git-128)

