This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit e685db063e88bd813293fd5afdbf79f10282711c Author: dreamedcheng <[email protected]> AuthorDate: Wed Jul 6 10:38:29 2022 +0800 Fix idle_in_transaction_session_timeout works on QE (#13547) When a query spawn a write gang and multiple read gangs on segment and idle_in_transaction_session_timeout was set to a non-zero value, in this case, if a read gang is still in progress, but the write gang has done its execution, some unexpected errors may be caused due to the early termination of the write gang. In this fix, idle_in_transaction_session_timeout is disabled on QE. Co-authored-by: wuchengwen <[email protected]> --- src/backend/tcop/postgres.c | 4 +- src/backend/utils/time/sharedsnapshot.c | 2 + ...te_gang_idle_in_transaction_session_timeout.out | 65 ++++++++++++++++++++++ src/test/isolation2/isolation2_schedule | 2 + ...te_gang_idle_in_transaction_session_timeout.sql | 32 +++++++++++ 5 files changed, 103 insertions(+), 2 deletions(-) diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 91b6fbc3e8..05f0abd401 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -5421,7 +5421,7 @@ PostgresMain(int argc, char *argv[], pgstat_report_activity(STATE_IDLEINTRANSACTION_ABORTED, NULL); /* Start the idle-in-transaction timer */ - if (IdleInTransactionSessionTimeout > 0) + if (IdleInTransactionSessionTimeout > 0 && Gp_role != GP_ROLE_EXECUTE) { idle_in_transaction_timeout_enabled = true; enable_timeout_after(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, @@ -5435,7 +5435,7 @@ PostgresMain(int argc, char *argv[], pgstat_report_activity(STATE_IDLEINTRANSACTION, NULL); /* Start the idle-in-transaction timer */ - if (IdleInTransactionSessionTimeout > 0) + if (IdleInTransactionSessionTimeout > 0 && Gp_role != GP_ROLE_EXECUTE) { idle_in_transaction_timeout_enabled = true; enable_timeout_after(IDLE_IN_TRANSACTION_SESSION_TIMEOUT, diff --git a/src/backend/utils/time/sharedsnapshot.c b/src/backend/utils/time/sharedsnapshot.c index 0e55f9f9f2..061d9a6d18 100644 --- a/src/backend/utils/time/sharedsnapshot.c +++ b/src/backend/utils/time/sharedsnapshot.c @@ -159,6 +159,7 @@ #include "storage/proc.h" #include "storage/procarray.h" #include "utils/builtins.h" +#include "utils/faultinjector.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/resowner.h" @@ -670,6 +671,7 @@ readSharedLocalSnapshot_forCursor(Snapshot snapshot, DtxContext distributedTrans Assert(SharedLocalSnapshotSlot != NULL); Assert(snapshot->xip != NULL); + SIMPLE_FAULT_INJECTOR("before_read_shared_snapshot_for_cursor"); if (dumpHtab == NULL) { diff --git a/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out b/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out new file mode 100644 index 0000000000..694c253fab --- /dev/null +++ b/src/test/isolation2/expected/write_gang_idle_in_transaction_session_timeout.out @@ -0,0 +1,65 @@ +-- GUC idle_in_transaction_session_timeout MUST not take effect on QE, +-- this test guard that. +-- In this test, session 2 uses a cursor, which will spawn a write gang +-- and a read gang. And we set idle_in_transaction_session_timeout +-- to 1s, when FETCH is executed, the read gang will suspend 1.5s because +-- of the fault injection. However, without the fix, the write gang will be +-- terminated 1s later when FETCH is issued due to the timeout of +-- idle_in_transaction_session_timeout. So when the reader is going to read the +-- shared snapshot, ERROR will be raised. + +1: CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a); +CREATE +1: INSERT INTO t_idle_trx_timeout VALUES (2),(3); +INSERT 2 +1: SELECT gp_segment_id, * FROM t_idle_trx_timeout; + gp_segment_id | a +---------------+--- + 0 | 2 + 0 | 3 +(2 rows) + +1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) +2: SET idle_in_transaction_session_timeout = 1000; +SET +1&: SELECT gp_wait_until_triggered_fault('before_read_shared_snapshot_for_cursor', 1, dbid) FROM gp_segment_configuration where content =0 AND role = 'p'; <waiting ...> +2: BEGIN; +BEGIN +2: DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout; +DECLARE +2&: FETCH cur; <waiting ...> +1<: <... completed> + gp_wait_until_triggered_fault +------------------------------- + Success: +(1 row) +1: SELECT pg_sleep(1.5); + pg_sleep +---------- + +(1 row) +1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 'reset', dbid) FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; + gp_inject_fault_infinite +-------------------------- + Success: +(1 row) +2<: <... completed> + a +--- + 2 +(1 row) +2: FETCH cur; + a +--- + 3 +(1 row) +2: END; +END + +1: DROP TABLE t_idle_trx_timeout; +DROP + diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index cb5b234466..d6847bd2f1 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -215,6 +215,8 @@ test: vacuum_progress_row test: vacuum_progress_column test: enable_autovacuum test: idle_gang_cleaner +# test idle_in_transaction_session_timeout +test: write_gang_idle_in_transaction_session_timeout test: segwalrep/die_commit_pending_replication diff --git a/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql b/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql new file mode 100644 index 0000000000..666fc13b90 --- /dev/null +++ b/src/test/isolation2/sql/write_gang_idle_in_transaction_session_timeout.sql @@ -0,0 +1,32 @@ +-- GUC idle_in_transaction_session_timeout MUST not take effect on QE, +-- this test guard that. +-- In this test, session 2 uses a cursor, which will spawn a write gang +-- and a read gang. And we set idle_in_transaction_session_timeout +-- to 1s, when FETCH is executed, the read gang will suspend 1.5s because +-- of the fault injection. However, without the fix, the write gang will be +-- terminated 1s later when FETCH is issued due to the timeout of +-- idle_in_transaction_session_timeout. So when the reader is going to read the +-- shared snapshot, ERROR will be raised. + +1: CREATE TABLE t_idle_trx_timeout (a int) DISTRIBUTED BY(a); +1: INSERT INTO t_idle_trx_timeout VALUES (2),(3); +1: SELECT gp_segment_id, * FROM t_idle_trx_timeout; + +1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 'suspend', dbid) + FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; +2: SET idle_in_transaction_session_timeout = 1000; +1&: SELECT gp_wait_until_triggered_fault('before_read_shared_snapshot_for_cursor', 1, dbid) + FROM gp_segment_configuration where content =0 AND role = 'p'; +2: BEGIN; +2: DECLARE cur CURSOR FOR SELECT * FROM t_idle_trx_timeout; +2&: FETCH cur; +1<: +1: SELECT pg_sleep(1.5); +1: SELECT gp_inject_fault_infinite('before_read_shared_snapshot_for_cursor', 'reset', dbid) + FROM gp_segment_configuration WHERE content = 0 AND role = 'p'; +2<: +2: FETCH cur; +2: END; + +1: DROP TABLE t_idle_trx_timeout; + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
