This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit e685db063e88bd813293fd5afdbf79f10282711c
Author: dreamedcheng <[email protected]>
AuthorDate: Wed Jul 6 10:38:29 2022 +0800

    Fix idle_in_transaction_session_timeout works on QE (#13547)
    
    When a query spawn a write gang and multiple read gangs on segment
    and idle_in_transaction_session_timeout was set to a non-zero
    value, in this case, if a read gang is still in progress, but the
    write gang has done its execution, some unexpected errors may be
    caused due to the early termination of the write gang.
    
    In this fix, idle_in_transaction_session_timeout is disabled on QE.
    
    Co-authored-by: wuchengwen <[email protected]>
---
 src/backend/tcop/postgres.c                        |  4 +-
 src/backend/utils/time/sharedsnapshot.c            |  2 +
 ...te_gang_idle_in_transaction_session_timeout.out | 65 ++++++++++++++++++++++
 src/test/isolation2/isolation2_schedule            |  2 +
 ...te_gang_idle_in_transaction_session_timeout.sql | 32 +++++++++++
 5 files changed, 103 insertions(+), 2 deletions(-)

diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 91b6fbc3e8..05f0abd401 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -5421,7 +5421,7 @@ PostgresMain(int argc, char *argv[],
                                
pgstat_report_activity(STATE_IDLEINTRANSACTION_ABORTED, NULL);
 
                                /* Start the idle-in-transaction timer */
-                               if (IdleInTransactionSessionTimeout > 0)
+                               if (IdleInTransactionSessionTimeout > 0 && 
Gp_role != GP_ROLE_EXECUTE)
                                {
                                        idle_in_transaction_timeout_enabled = 
true;
                                        
enable_timeout_after(IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
@@ -5435,7 +5435,7 @@ PostgresMain(int argc, char *argv[],
                                pgstat_report_activity(STATE_IDLEINTRANSACTION, 
NULL);
 
                                /* Start the idle-in-transaction timer */
-                               if (IdleInTransactionSessionTimeout > 0)
+                               if (IdleInTransactionSessionTimeout > 0 && 
Gp_role != GP_ROLE_EXECUTE)
                                {
                                        idle_in_transaction_timeout_enabled = 
true;
                                        
enable_timeout_after(IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
diff --git a/src/backend/utils/time/sharedsnapshot.c 
b/src/backend/utils/time/sharedsnapshot.c
index 0e55f9f9f2..061d9a6d18 100644
--- a/src/backend/utils/time/sharedsnapshot.c
+++ b/src/backend/utils/time/sharedsnapshot.c
@@ -159,6 +159,7 @@
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/builtins.h"
+#include "utils/faultinjector.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
@@ -670,6 +671,7 @@ readSharedLocalSnapshot_forCursor(Snapshot snapshot, 
DtxContext distributedTrans
        Assert(SharedLocalSnapshotSlot != NULL);
        Assert(snapshot->xip != NULL);
 
+       SIMPLE_FAULT_INJECTOR("before_read_shared_snapshot_for_cursor");
 
        if (dumpHtab == NULL)
        {
diff --git 
a/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out
 
b/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out
new file mode 100644
index 0000000000..694c253fab
--- /dev/null
+++ 
b/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out
@@ -0,0 +1,65 @@
+-- GUC idle_in_transaction_session_timeout MUST not take effect on QE,
+-- this test guard that.
+-- In this test, session 2 uses a cursor, which will spawn a write gang
+-- and a read gang. And we set idle_in_transaction_session_timeout
+-- to 1s, when FETCH is executed, the read gang will suspend 1.5s because
+-- of the fault injection. However, without the fix, the write gang will be
+-- terminated 1s later when FETCH is issued due to the timeout of
+-- idle_in_transaction_session_timeout. So when the reader is going to read the
+-- shared snapshot, ERROR will be raised.
+
+1: CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a);
+CREATE
+1: INSERT INTO t_idle_trx_timeout VALUES (2),(3);
+INSERT 2
+1: SELECT gp_segment_id, * FROM t_idle_trx_timeout;
+ gp_segment_id | a 
+---------------+---
+ 0             | 2 
+ 0             | 3 
+(2 rows)
+
+1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 
'suspend', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
+ gp_inject_fault_infinite 
+--------------------------
+ Success:                 
+(1 row)
+2: SET idle_in_transaction_session_timeout = 1000;
+SET
+1&: SELECT 
gp_wait_until_triggered_fault('before_read_shared_snapshot_for_cursor', 1, 
dbid) FROM gp_segment_configuration where content =0 AND role = 'p';  <waiting 
...>
+2: BEGIN;
+BEGIN
+2: DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout;
+DECLARE
+2&: FETCH cur;  <waiting ...>
+1<:  <... completed>
+ gp_wait_until_triggered_fault 
+-------------------------------
+ Success:                      
+(1 row)
+1: SELECT pg_sleep(1.5);
+ pg_sleep 
+----------
+          
+(1 row)
+1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 
'reset', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
+ gp_inject_fault_infinite 
+--------------------------
+ Success:                 
+(1 row)
+2<:  <... completed>
+ a 
+---
+ 2 
+(1 row)
+2: FETCH cur;
+ a 
+---
+ 3 
+(1 row)
+2: END;
+END
+
+1: DROP TABLE t_idle_trx_timeout;
+DROP
+
diff --git a/src/test/isolation2/isolation2_schedule 
b/src/test/isolation2/isolation2_schedule
index cb5b234466..d6847bd2f1 100644
--- a/src/test/isolation2/isolation2_schedule
+++ b/src/test/isolation2/isolation2_schedule
@@ -215,6 +215,8 @@ test: vacuum_progress_row
 test: vacuum_progress_column
 test: enable_autovacuum
 test: idle_gang_cleaner
+# test idle_in_transaction_session_timeout
+test: write_gang_idle_in_transaction_session_timeout
 
 test: segwalrep/die_commit_pending_replication
 
diff --git 
a/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql 
b/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql
new file mode 100644
index 0000000000..666fc13b90
--- /dev/null
+++ b/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql
@@ -0,0 +1,32 @@
+-- GUC idle_in_transaction_session_timeout MUST not take effect on QE,
+-- this test guard that.
+-- In this test, session 2 uses a cursor, which will spawn a write gang
+-- and a read gang. And we set idle_in_transaction_session_timeout
+-- to 1s, when FETCH is executed, the read gang will suspend 1.5s because
+-- of the fault injection. However, without the fix, the write gang will be
+-- terminated 1s later when FETCH is issued due to the timeout of
+-- idle_in_transaction_session_timeout. So when the reader is going to read the
+-- shared snapshot, ERROR will be raised.
+
+1: CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a);
+1: INSERT INTO t_idle_trx_timeout VALUES (2),(3);
+1: SELECT gp_segment_id, * FROM t_idle_trx_timeout;
+
+1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 
'suspend', dbid)
+    FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
+2: SET idle_in_transaction_session_timeout = 1000;
+1&: SELECT 
gp_wait_until_triggered_fault('before_read_shared_snapshot_for_cursor', 1, dbid)
+     FROM gp_segment_configuration where content =0 AND role = 'p';
+2: BEGIN;
+2: DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout;
+2&: FETCH cur;
+1<:
+1: SELECT pg_sleep(1.5);
+1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 
'reset', dbid) 
+    FROM gp_segment_configuration WHERE content = 0 AND role = 'p';
+2<:
+2: FETCH cur;
+2: END;
+
+1: DROP TABLE t_idle_trx_timeout;
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to