From 9e863ae54c88d4fef869e71591c7eeab77750b89 Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Wed, 30 Oct 2024 09:11:30 +0100
Subject: Consider pipeline implicit transaction as a transaction block

When using pipeline with implicit transaction, a transaction will
start from the first command and be committed with the Sync message.
Functions like IsInTransactionBlock and PreventInTransactionBlock
already assimilate this implicit transaction as a transaction block.

However, this is not the case in CheckTransactionBlock. This function is
called for things like warning when set local is used outside of a
transaction block.

This patch changes the transaction state to an implicit transaction
block when a new command is processed while pipelining was detected.
This remove the need to check for XACT_FLAGS_PIPELINING in
IsInTransactionBlock and PreventInTransactionBlock since the transaction
state now reflects the ongoing transaction block.

This allows to avoid warning when `set local` is called within a
pipeline, and makes the detection of transaction block coherent with
what's done in IsInTransactionBlock and PreventInTransactionBlock.

A known limitation of this patch: the first command won't be detected as
part of a transaction block. The implicit block only starts at the
second command so the `set local` warnings will still happen if called
first.
---
 doc/src/sgml/libpq.sgml            | 60 ++++++++++++++++++++++++++++++
 doc/src/sgml/ref/psql-ref.sgml     | 17 +++++++--
 src/backend/access/transam/xact.c  | 13 -------
 src/backend/tcop/postgres.c        | 18 +++++++++
 src/test/regress/expected/psql.out | 16 +++++---
 src/test/regress/sql/psql.sql      |  7 ++--
 6 files changed, 105 insertions(+), 26 deletions(-)

diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml
index bfefb1289e8..5e34d9159b8 100644
--- a/doc/src/sgml/libpq.sgml
+++ b/doc/src/sgml/libpq.sgml
@@ -6092,6 +6092,66 @@ UPDATE mytable SET x = x + 1 WHERE id = 42;
     multiple transactions (see <xref linkend="libpq-pipeline-errors"/>).
    </para>
   </sect2>
+
+  <sect2 id="libpq-pipeline-transaction-block">
+   <title>Pipeline and Implicit Transaction Block</title>
+
+   <para>
+    If no explicit transaction block is provided, statements executed in pipeline mode
+    are executed in an implicit transaction block which is committed at the next
+    synchronization point. If the pipeline is aborted due to a query error,
+    all changes done since the last synchronization point are rollbacked.
+<programlisting>
+-- \startpipeline, \endpipeline, \syncpipeline, \bind and \g are psql specific meta-commands
+\startpipeline
+UPDATE mytable SET x = 3 WHERE id = 42 \bind \g
+-- The synchronization point will commit the implicit transaction block
+\syncpipeline
+-- Starts of a new implicit transaction block
+UPDATE mytable SET x = 4 WHERE id = 42 \bind \g
+-- This will generate a syntax error
+UPDATE mytable \bind \g
+-- The pipeline is aborted due to the previous error, all changes
+-- done after \syncpipeline will be rollbacked
+-- Result at the end of the pipeline: x=3
+\endpipeline
+</programlisting>
+   </para>
+
+   <para>
+    From the server, the implicit transaction block is only detected when the first command
+    ends without a synchronisation point, starting the pipeline. This leads to a different
+    behavior between the initial command and the rest. Commands that need to run inside
+    a transaction block, like <literal>LOCK</literal>, will only work if executed after
+    the first command.
+<programlisting>
+-- LOCK is the first command of the pipeline and won't be seen as part of an implicit
+-- transaction block and will fail.
+\startpipeline
+LOCK mytable \bind \g
+SELECT * FROM mytable \bind \g
+\endpipeline
+
+-- LOCK is the second command of the pipeline and will succeed as it will detected as
+-- part of an implicit transaction block
+\startpipeline
+SELECT 1 \bind \g
+LOCK mytable \bind \g
+SELECT * FROM mytable \bind \g
+\endpipeline
+
+-- LOCK is the first command after a synchronisation point and will fail as it's
+-- not detected as part of an implicit transaction block
+\startpipeline
+SELECT 1 \bind \g
+\syncpipeline
+LOCK mytable \bind \g
+SELECT * FROM mytable \bind \g
+\endpipeline
+</programlisting>
+   </para>
+  </sect2>
+
  </sect1>
 
  <!-- keep this not-too-apropos sect1 ID for stability of doc URLs -->
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index 13dadc72615..44af95ebd63 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -3597,15 +3597,24 @@ testdb=&gt; <userinput>\setenv LESS -imx4F</userinput>
         Example:
 <programlisting>
 \startpipeline
-SELECT pg_current_xact_id() \bind \g
+-- Pipe a Parse, Bind and Execute of "SELECT 1"
+SELECT 1 \bind \g
+-- Pipe a Parse of "SELECT $1", storing the result in the prepared statement 'stmt1'
 SELECT $1 \parse stmt1
-SELECT $1, $2 \parse stmt2
+-- Pipe a Bind and Execute of prepared statement 'stmt1' with parameter 1
 \bind_named stmt1 1 \g
+-- Pipe a Parse, Bind and Execute of SELECT pg_current_xact_id()
+SELECT pg_current_xact_id() \bind \g
+-- Pipe a synchronisation point. This will commit the implicit transaction
+-- block started by the pipeline
 \syncpipeline
-\bind_named stmt2 1, 2 \g
+-- Pipe a Parse, Bind and Execute of SELECT pg_current_xact_id()
+-- This will display a different xid as a new transaction block
+-- was started after \syncpipeline
 SELECT pg_current_xact_id() \bind \g
+-- Pipe a Close of the prepared statement stmt1
 \close stmt1
-\close stmt2
+-- End the pipeline, sending all piped commands to the server and process the results
 \endpipeline
 </programlisting></para>
 
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b7ebcc2a557..1eccb78ddc4 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -3659,16 +3659,6 @@ PreventInTransactionBlock(bool isTopLevel, const char *stmtType)
 				 errmsg("%s cannot run inside a subtransaction",
 						stmtType)));
 
-	/*
-	 * inside a pipeline that has started an implicit transaction?
-	 */
-	if (MyXactFlags & XACT_FLAGS_PIPELINING)
-		ereport(ERROR,
-				(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
-		/* translator: %s represents an SQL statement name */
-				 errmsg("%s cannot be executed within a pipeline",
-						stmtType)));
-
 	/*
 	 * inside a function call?
 	 */
@@ -3780,9 +3770,6 @@ IsInTransactionBlock(bool isTopLevel)
 	if (IsSubTransaction())
 		return true;
 
-	if (MyXactFlags & XACT_FLAGS_PIPELINING)
-		return true;
-
 	if (!isTopLevel)
 		return true;
 
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 184b8301687..bd424767176 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -2780,6 +2780,17 @@ start_xact_command(void)
 
 		xact_started = true;
 	}
+	else if (MyXactFlags & XACT_FLAGS_PIPELINING)
+	{
+		/*
+		 * When the first Execute message is completed, following commands
+		 * will be done in an implicit transaction block created via
+		 * pipelining. The transaction state needs to be updated to an
+		 * implicit block if we're not already in a transaction block (like
+		 * one started by an explicit BEGIN).
+		 */
+		BeginImplicitTransactionBlock();
+	}
 
 	/*
 	 * Start statement timeout if necessary.  Note that this'll intentionally
@@ -4991,6 +5002,13 @@ PostgresMain(const char *dbname, const char *username)
 
 			case PqMsg_Sync:
 				pq_getmsgend(&input_message);
+
+				/*
+				 * If pipelining was used, we may be in an implicit
+				 * transaction block. Close it before calling
+				 * finish_xact_command.
+				 */
+				EndImplicitTransactionBlock();
 				finish_xact_command();
 				valgrind_report_error_query("SYNC message");
 				send_ready_for_query = true;
diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out
index 8bd9b26a257..addb3d7dcc4 100644
--- a/src/test/regress/expected/psql.out
+++ b/src/test/regress/expected/psql.out
@@ -7027,7 +7027,8 @@ SELECT COUNT(*) FROM psql_pipeline \bind \g
 -- pipelining and transaction block behaviour
 -- set local will issue a warning when modifying a GUC outside of a transaction block
 -- The change will still be valid as a pipeline runs within an implicit transaction block
--- Sending a sync will commit the implicit transaction block.
+-- Sending a sync will commit the implicit transaction block. The first command after a sync
+-- won't be seen as belonging to a pipeline.
 \startpipeline
 SET LOCAL statement_timeout='1h' \bind \g
 SHOW statement_timeout \bind \g
@@ -7042,7 +7043,6 @@ WARNING:  SET LOCAL can only be used in transaction blocks
  1h
 (1 row)
 
-WARNING:  SET LOCAL can only be used in transaction blocks
  statement_timeout 
 -------------------
  0
@@ -7064,7 +7064,7 @@ SELECT $1 \bind 2 \g
  1
 (1 row)
 
-ERROR:  REINDEX CONCURRENTLY cannot be executed within a pipeline
+ERROR:  REINDEX CONCURRENTLY cannot run inside a transaction block
 -- Reindex concurrently will work if it's the first command of a pipeline
 \startpipeline
 REINDEX TABLE CONCURRENTLY psql_pipeline \bind \g
@@ -7083,13 +7083,13 @@ ROLLBACK TO SAVEPOINT a \bind \g
 SELECT $1 \bind 2 \g
 \endpipeline
 ERROR:  SAVEPOINT can only be used in transaction blocks
--- Lock command will fail as pipeline is not seen as a transaction block
+-- Lock command will fail as first pipeline command is not seen as a transaction block
 \startpipeline
 LOCK psql_pipeline \bind \g
 SELECT $1 \bind 2 \g
 \endpipeline
 ERROR:  LOCK TABLE can only be used in transaction blocks
--- Lock command will fail in the middle of the pipeline as it's not seen as a transaction block
+-- Lock command will succeed after the first command as pipeline will be seen as an implicit transaction block
 \startpipeline
 SELECT $1 \bind 1 \g
 LOCK psql_pipeline \bind \g
@@ -7100,4 +7100,8 @@ SELECT $1 \bind 2 \g
  1
 (1 row)
 
-ERROR:  LOCK TABLE can only be used in transaction blocks
+ ?column? 
+----------
+ 2
+(1 row)
+
diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql
index ba613a6d435..fcb6d2b30b8 100644
--- a/src/test/regress/sql/psql.sql
+++ b/src/test/regress/sql/psql.sql
@@ -2046,7 +2046,8 @@ SELECT COUNT(*) FROM psql_pipeline \bind \g
 
 -- set local will issue a warning when modifying a GUC outside of a transaction block
 -- The change will still be valid as a pipeline runs within an implicit transaction block
--- Sending a sync will commit the implicit transaction block.
+-- Sending a sync will commit the implicit transaction block. The first command after a sync
+-- won't be seen as belonging to a pipeline.
 \startpipeline
 SET LOCAL statement_timeout='1h' \bind \g
 SHOW statement_timeout \bind \g
@@ -2077,13 +2078,13 @@ ROLLBACK TO SAVEPOINT a \bind \g
 SELECT $1 \bind 2 \g
 \endpipeline
 
--- Lock command will fail as pipeline is not seen as a transaction block
+-- Lock command will fail as first pipeline command is not seen as a transaction block
 \startpipeline
 LOCK psql_pipeline \bind \g
 SELECT $1 \bind 2 \g
 \endpipeline
 
--- Lock command will fail in the middle of the pipeline as it's not seen as a transaction block
+-- Lock command will succeed after the first command as pipeline will be seen as an implicit transaction block
 \startpipeline
 SELECT $1 \bind 1 \g
 LOCK psql_pipeline \bind \g
-- 
2.39.5 (Apple Git-154)

