From b48c3d80bafe6a72314427d20dcfdd080c1d2542 Mon Sep 17 00:00:00 2001
From: "duankunren.dkr" <duankunren.dkr@alibaba-inc.com>
Date: Thu, 13 Nov 2025 23:23:35 +0800
Subject: [PATCH] autovacuum cost-based delay improvements

---
 src/backend/commands/vacuum.c             | 54 +++++++++++++++
 src/backend/postmaster/autovacuum.c       | 83 +++++++++++++++++++++++
 src/backend/storage/ipc/procarray.c       | 14 ++++
 src/backend/utils/misc/guc_parameters.dat |  8 +++
 src/include/postmaster/autovacuum.h       |  5 ++
 src/include/storage/procarray.h           |  1 +
 src/include/utils/wait_event.h            | 12 ++++
 7 files changed, 177 insertions(+)

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e785dd55ce5..fa3106ace40 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -117,6 +117,13 @@ pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
 pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
 int			VacuumCostBalanceLocal = 0;
 
+typedef enum AutoVacuumMode
+{
+	AUTOVACUUM_MODE_NORMAL = 0,
+	AUTOVACUUM_MODE_FAST,
+	AUTOVACUUM_MODE_SLOW
+} AutoVacuumMode;
+
 /* non-export function prototypes */
 static List *expand_vacuum_rel(VacuumRelation *vrel,
 							   MemoryContext vac_context, int options);
@@ -130,6 +137,7 @@ static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
 static double compute_parallel_delay(void);
 static VacOptValue get_vacoptval_from_boolean(DefElem *def);
 static bool vac_tid_reaped(ItemPointer itemptr, void *state);
+static AutoVacuumMode get_autovacuum_mode_by_io_load(void);
 
 /*
  * GUC check function to ensure GUC value specified is within the allowable
@@ -2425,6 +2433,9 @@ vacuum_delay_point(bool is_analyze)
 {
 	double		msec = 0;
 
+	/* flag for autovacuum mode */
+	AutoVacuumMode mode = AUTOVACUUM_MODE_NORMAL;
+
 	/* Always check for interrupts */
 	CHECK_FOR_INTERRUPTS();
 
@@ -2461,11 +2472,22 @@ vacuum_delay_point(bool is_analyze)
 	else if (VacuumCostBalance >= vacuum_cost_limit)
 		msec = vacuum_cost_delay * VacuumCostBalance / vacuum_cost_limit;
 
+	if (debug_autovacuum_adaptive_cost_delay && AmAutoVacuumWorkerProcess())
+		{
+			mode = get_autovacuum_mode_by_io_load();
+			if (mode == AUTOVACUUM_MODE_FAST)
+				msec = 0;
+		}
+
 	/* Nap if appropriate */
 	if (msec > 0)
 	{
 		instr_time	delay_start;
 
+		if (debug_autovacuum_adaptive_cost_delay && 
+			mode == AUTOVACUUM_MODE_SLOW)
+			msec = msec + AUTOVACUUM_EXTRA_DELAY;
+
 		if (msec > vacuum_cost_delay * 4)
 			msec = vacuum_cost_delay * 4;
 
@@ -2681,3 +2703,35 @@ vac_tid_reaped(ItemPointer itemptr, void *state)
 
 	return TidStoreIsMember(dead_items, itemptr);
 }
+
+
+/*
+ * get_autovacuum_mode_by_io_load
+ * 		set autovacuum mode based on current IO load.
+ */
+static AutoVacuumMode get_autovacuum_mode_by_io_load(void)
+{
+	int			backend_num;
+	uint32		io_wait_count;
+	uint32		upper_threshNum;
+	uint32		lower_threshNum;
+	AutoVacuumMode mode;
+
+	backend_num = GetNumProcs();
+
+	/*
+	 * Upper threshold: when more than 80% of processes are in IO wait.
+	 * Lower threshold: when less than 10% of processes are in IO wait.
+	 */
+	upper_threshNum = Max((backend_num * 8) / 10, 3);
+	lower_threshNum = Max((backend_num * 1) / 10, 1);
+	io_wait_count = AutoVacuumGetIOWaitStats();
+
+	if (io_wait_count > upper_threshNum)
+		mode = AUTOVACUUM_MODE_SLOW;
+	else if (io_wait_count < lower_threshNum)
+		mode = AUTOVACUUM_MODE_FAST;
+	else
+		mode = AUTOVACUUM_MODE_NORMAL;
+	return mode;
+}
\ No newline at end of file
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 1c55e9fa9d3..c344ad4ad82 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -135,6 +135,7 @@ int			autovacuum_vac_cost_limit;
 
 int			Log_autovacuum_min_duration = 600000;
 int			Log_autoanalyze_min_duration = 600000;
+int			debug_autovacuum_adaptive_cost_delay = false;
 
 /* the minimum allowed time between two awakenings of the launcher */
 #define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */
@@ -300,6 +301,7 @@ typedef struct
 	WorkerInfo	av_startingWorker;
 	AutoVacuumWorkItem av_workItems[NUM_WORKITEMS];
 	pg_atomic_uint32 av_nworkersForBalance;
+	pg_atomic_uint32 av_io_wait_count;
 } AutoVacuumShmemStruct;
 
 static AutoVacuumShmemStruct *AutoVacuumShmem;
@@ -372,6 +374,7 @@ static void autovac_report_workitem(AutoVacuumWorkItem *workitem,
 static void avl_sigusr2_handler(SIGNAL_ARGS);
 static bool av_worker_available(void);
 static void check_av_worker_gucs(void);
+static void io_wait_count_update(bool increment);
 
 
 
@@ -3553,6 +3556,7 @@ AutoVacuumShmemInit(void)
 		}
 
 		pg_atomic_init_u32(&AutoVacuumShmem->av_nworkersForBalance, 0);
+		pg_atomic_init_u32(&AutoVacuumShmem->av_io_wait_count, 0);
 
 	}
 	else
@@ -3616,3 +3620,82 @@ check_av_worker_gucs(void)
 				 errdetail("The server will only start up to \"autovacuum_worker_slots\" (%d) autovacuum workers at a given time.",
 						   autovacuum_worker_slots)));
 }
+
+/*
+ * The IO wait events are counted at all wait event reporting points.
+ * Events that occur before shared memory initialization are excluded,
+ * e.g. WAIT_EVENT_DSM_ALLOCATE,WAIT_EVENT_CONTROL_FILE_READ,
+ * WAIT_EVENT_LOCK_FILE_CREATE_WRITE and other startup-related events.
+ */
+void 
+AutoVacuumUpdateIOWaitStats(uint32 wait_event_info, bool increment)
+{
+	switch (wait_event_info)
+		{
+			case WAIT_EVENT_CONTROL_FILE_READ:
+			case WAIT_EVENT_DSM_ALLOCATE:
+			case WAIT_EVENT_DSM_FILL_ZERO_WRITE:
+			case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ:
+			case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC:
+			case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE:
+			case WAIT_EVENT_LOCK_FILE_CREATE_READ:
+			case WAIT_EVENT_LOCK_FILE_CREATE_SYNC:
+			case WAIT_EVENT_LOCK_FILE_CREATE_WRITE:
+			case WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ:
+			case WAIT_EVENT_SNAPBUILD_SYNC:
+			case WAIT_EVENT_WAL_READ:
+				break;
+			default:
+				io_wait_count_update(increment);
+				break;
+		}
+}
+
+uint32 
+AutoVacuumGetIOWaitStats(void)
+{
+	return pg_atomic_read_u32(&AutoVacuumShmem->av_io_wait_count);
+}
+
+/*
+ * io_wait_count_update
+ * Count IO wait events based on BackendType to adjust autovacuum behavior.
+ * Categorize backends into three types to ensure user applications
+ * are not significantly impacted by autovacuum worker IO:
+ *
+ * 1. IO-sensitive backends (e.g. checkpointer, standalone backend):
+ * 	Their IO waits are counted with higher weight as they have
+ * 	significant impact on performance.
+ *
+ * 2. Normal backends:
+ * 	Regular backend processes that have normal IO sensitivity.
+ * 	Their IO waits are counted with normal weight (increment by 1).
+ *
+ * 3. Autovacuum workers:
+ * 	Their IO waits are not counted since we want to adjust their
+ * 	behavior based on other processes' IO load.
+ *
+ * The goal is to dynamically adjust autovacuum aggressive level
+ * based on system IO load to minimize its impact on user queries.
+ */
+static void
+io_wait_count_update(bool increment)
+{
+	if (MyBackendType == B_BG_WRITER ||
+		MyBackendType == B_CHECKPOINTER ||
+		MyBackendType == B_STANDALONE_BACKEND ||
+		MyBackendType == B_BACKEND)
+	{
+		if (increment)
+			pg_atomic_fetch_add_u32(&AutoVacuumShmem->av_io_wait_count, 3);
+		else
+			pg_atomic_fetch_sub_u32(&AutoVacuumShmem->av_io_wait_count, 3);
+	}
+	else if (MyBackendType != B_AUTOVAC_WORKER)
+	{
+		if (increment)
+			pg_atomic_fetch_add_u32(&AutoVacuumShmem->av_io_wait_count, 1);
+		else
+			pg_atomic_fetch_sub_u32(&AutoVacuumShmem->av_io_wait_count, 1);
+	}
+}
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 200f72c6e25..bf0e6a99798 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -5227,3 +5227,17 @@ KnownAssignedXidsReset(void)
 
 	LWLockRelease(ProcArrayLock);
 }
+
+/*
+ * GetNumProcs
+ *		Returns the number of currently active procs according to the proc array.
+ */
+int GetNumProcs(void)
+{
+	int			n;
+
+	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	n = procArray->numProcs;
+	LWLockRelease(ProcArrayLock);
+	return n;
+}
\ No newline at end of file
diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat
index 1128167c025..38890ce3eb9 100644
--- a/src/backend/utils/misc/guc_parameters.dat
+++ b/src/backend/utils/misc/guc_parameters.dat
@@ -591,6 +591,14 @@
   boot_val => 'DEFAULT_ASSERT_ENABLED',
 },
 
+{ name => 'debug_autovacuum_adaptive_cost_delay', type => 'bool', context => 'PGC_USERSET', group => 'DEVELOPER_OPTIONS',
+  short_desc => 'Enables/Disables the autovacuum adaptive ajust of cost-based delay.',
+  long_desc => 'This can be useful for testing the effect of adaptive ajust of cost-based delay in autovacuum.',
+  flags => 'GUC_NOT_IN_SAMPLE | GUC_EXPLAIN',
+  variable => 'debug_autovacuum_adaptive_cost_delay',
+  boot_val => 'false',
+},
+
 { name => 'debug_copy_parse_plan_trees', type => 'bool', context => 'PGC_SUSET', group => 'DEVELOPER_OPTIONS',
   short_desc => 'Set this to force all parse and plan trees to be passed through copyObject(), to facilitate catching errors and omissions in copyObject().',
   flags => 'GUC_NOT_IN_SAMPLE',
diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h
index 023ac6d5fa8..8a0afc5f427 100644
--- a/src/include/postmaster/autovacuum.h
+++ b/src/include/postmaster/autovacuum.h
@@ -16,6 +16,8 @@
 
 #include "storage/block.h"
 
+#define  AUTOVACUUM_EXTRA_DELAY 1
+
 /*
  * Other processes can request specific work from autovacuum, identified by
  * AutoVacuumWorkItem elements.
@@ -43,6 +45,7 @@ extern PGDLLIMPORT int autovacuum_freeze_max_age;
 extern PGDLLIMPORT int autovacuum_multixact_freeze_max_age;
 extern PGDLLIMPORT double autovacuum_vac_cost_delay;
 extern PGDLLIMPORT int autovacuum_vac_cost_limit;
+extern PGDLLIMPORT int debug_autovacuum_adaptive_cost_delay;
 
 /* autovacuum launcher PID, only valid when worker is shutting down */
 extern PGDLLIMPORT int AutovacuumLauncherPid;
@@ -69,4 +72,6 @@ extern bool AutoVacuumRequestWork(AutoVacuumWorkItemType type,
 extern Size AutoVacuumShmemSize(void);
 extern void AutoVacuumShmemInit(void);
 
+extern void AutoVacuumUpdateIOWaitStats(uint32 wait_event_info, bool increment);
+extern uint32 AutoVacuumGetIOWaitStats(void);
 #endif							/* AUTOVACUUM_H */
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index 2f4ae06c279..9383740a555 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -99,5 +99,6 @@ extern void ProcArraySetReplicationSlotXmin(TransactionId xmin,
 
 extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
 											TransactionId *catalog_xmin);
+extern int GetNumProcs(void);
 
 #endif							/* PROCARRAY_H */
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index f5815b4994a..94ac08b7510 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -12,6 +12,7 @@
 
 /* enums for wait events */
 #include "utils/wait_event_types.h"
+#include "postmaster/autovacuum.h"
 
 extern const char *pgstat_get_wait_event(uint32 wait_event_info);
 extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
@@ -73,6 +74,10 @@ pgstat_report_wait_start(uint32 wait_event_info)
 	 * four-bytes, updates are atomic.
 	 */
 	*(volatile uint32 *) my_wait_event_info = wait_event_info;
+	/* for adaptive autovacuum delay based on io wait */
+	if (debug_autovacuum_adaptive_cost_delay && 
+		(wait_event_info & 0xFF000000) == PG_WAIT_IO)
+		AutoVacuumUpdateIOWaitStats(wait_event_info, true);
 }
 
 /* ----------
@@ -84,8 +89,15 @@ pgstat_report_wait_start(uint32 wait_event_info)
 static inline void
 pgstat_report_wait_end(void)
 {
+	uint32		wait_event_info = *(volatile uint32 *) my_wait_event_info;
+
 	/* see pgstat_report_wait_start() */
 	*(volatile uint32 *) my_wait_event_info = 0;
+
+	/* for adaptive autovacuum based on io wait */
+	if (debug_autovacuum_adaptive_cost_delay && 
+		(wait_event_info & 0xFF000000) == PG_WAIT_IO)
+		AutoVacuumUpdateIOWaitStats(wait_event_info, false);
 }
 
 
-- 
2.32.0.3.g01195cf9f

