This is an automated email from the ASF dual-hosted git repository.
gfphoenix78 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new e51dd0806a5 Fix the creation timeout retry logic of
cdbgang_createGang_async should be synchronized with the reader to avoid slow
creation due to platform, container, network and other reasons, which would
cause the reader to prematurely consider it an abnormal termination.
e51dd0806a5 is described below
commit e51dd0806a5cc9f10d2a0951c9c53a6e20cfdff1
Author: zhaoxi <[email protected]>
AuthorDate: Fri Nov 21 21:09:57 2025 +0800
Fix the creation timeout retry logic of cdbgang_createGang_async should be
synchronized with the reader to avoid slow creation due to platform, container,
network and other reasons, which would cause the reader to prematurely consider
it an abnormal termination.
2025-11-20 11:48:27.925475
CST,"gpadmin","regression",p14056,th-1958096896,"172.18.0.2","40060",2025-11-20
11:48:27 CST,0,con33,,seg0,,,,sx1,"WARNING","58M01","reader could not find
writer proc entry","lock [0,1260] AccessShareLock 0. Probably because writer
gang is gone somehow. Maybe try rerunning.",,,,,,0,,"lock.c",963,"Stack trace:
1 0xaaaab4db9f14 postgres errstart + 0x494
2 0xaaaab4b9b064 postgres LockAcquireExtended + 0x76c
3 0xaaaab4b97d98 postgres LockRelationOid + 0x3c
4 0xaaaab44a6e30 postgres relation_open + 0x60
5 0xaaaab45a04e8 postgres table_open + 0x1c
6 0xaaaab4d7f3e8 postgres <symbol not found> + 0xb4d7f3e8
7 0xaaaab4d7fdcc postgres <symbol not found> + 0xb4d7fdcc
8 0xaaaab4d7fc5c postgres SearchCatCache1 + 0x2c
9 0xaaaab4da0258 postgres SearchSysCache1 + 0xb4
10 0xaaaab4dd6f48 postgres InitializeSessionUserId + 0x98
11 0xaaaab4dda874 postgres InitPostgres + 0x504
12 0xaaaab4bc93bc postgres PostgresMain + 0x390
13 0xaaaab4ac90b8 postgres <symbol not found> + 0xb4ac90b8
14 0xaaaab4ac8918 postgres <symbol not found> + 0xb4ac8918
15 0xaaaab4ac3114 postgres <symbol not found> + 0xb4ac3114
16 0xaaaab4ac2804 postgres PostmasterMain + 0x1668
17 0xaaaab4936b50 postgres <symbol not found> + 0xb4936b50
18 0xffff8b4f1724 libc.so.6 __libc_start_main + 0xf0
19 0xaaaab448327c postgres <symbol not found> + 0xb448327c
---
src/backend/storage/lmgr/lock.c | 23 +++++++++++++++++++----
1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index cbc873e0b70..faf69b1581f 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -939,11 +939,26 @@ LockAcquireExtended(const LOCKTAG *locktag,
{
/* Find the guy who should manage our locks */
volatile PGPROC * proc =
FindProcByGpSessionId(gp_session_id);
- int count = 0;
- while(proc==NULL && count <
find_writer_proc_retry_time)
+ TimestampTz current_time;
+ TimestampTz start_time;
+ long elapsed_secs;
+ int elapsed_usecs;
+ start_time = GetCurrentTimestamp();
+
+ while (proc == NULL)
{
+ /*
+ * The creation timeout retry logic of
cdbgang_createGang_async
+ * should be synchronized with the
reader to avoid slow creation
+ * due to platform, container, network
and other reasons,
+ * which would cause the reader to
prematurely consider it an abnormal termination.
+ */
+ current_time = GetCurrentTimestamp();
+ TimestampDifference(start_time,
current_time, &elapsed_secs, &elapsed_usecs);
+ if (elapsed_secs >=
gp_segment_connect_timeout / 2)
+ break;
+
pg_usleep( /* microseconds */ 2000);
- count++;
CHECK_FOR_INTERRUPTS();
/*
* The reason for using
pg_memory_barrier() is to ensure that
@@ -954,7 +969,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
}
if (proc != NULL)
{
- elog(DEBUG1,"Found writer proc entry.
My Pid %d, his pid %d", MyProc-> pid, proc->pid);
+ elog(DEBUG1, "Found writer proc entry.
My Pid %d, his pid %d", MyProc-> pid, proc->pid);
lockHolderProcPtr = (PGPROC*) proc;
}
else
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]