diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index a9d9f31..50b7b1d 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -137,6 +137,7 @@
 #define PARALLEL_VACUUM_KEY_SHARED			1
 #define PARALLEL_VACUUM_KEY_DEAD_TUPLES		2
 #define PARALLEL_VACUUM_KEY_QUERY_TEXT		3
+#define PARALLEL_VACUUM_KEY_COST_DELAY		4
 
 /*
  * PARALLEL_VACUUM_DISABLE_LEADER_PARTICIPATION disables the leader's
@@ -257,6 +258,21 @@ typedef struct LVSharedIndStats
 #define GetIndexBulkDeleteResult(s) \
 	((IndexBulkDeleteResult *)((char *)(s) + sizeof(LVSharedIndStats)))
 
+typedef struct LVDelayStats
+{
+	double	time;
+	int		hit;
+	int		miss;
+	int		dirty;
+} LVDelayStats;
+
+typedef struct LVCostDelay
+{
+	pg_atomic_uint32	nslot;
+	LVDelayStats 		stats[FLEXIBLE_ARRAY_MEMBER];
+} LVCostDelay;
+#define SizeOfLVCostDelay offsetof(LVCostDelay, stats) + sizeof(LVDelayStats)
+
 /* Struct for parallel lazy vacuum */
 typedef struct LVParallelState
 {
@@ -265,6 +281,8 @@ typedef struct LVParallelState
 	/* Shared information among parallel vacuum workers */
 	LVShared		*lvshared;
 
+	/* Shared cost delay. */
+	LVCostDelay		*lvcostdelay;
 	/*
 	 * Always true except for a debugging case where
 	 * PARALLEL_VACUUM_DISABLE_LEADER_PARTICIPATION are defined.
@@ -757,6 +775,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		parallel_workers = compute_parallel_workers(Irel, nindexes,
 													params->nworkers);
 
+	VacuumCostTotalDelay = 0;
 	if (parallel_workers > 0)
 	{
 		/*
@@ -1732,6 +1751,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 					tups_vacuumed, num_tuples,
 					vacrelstats->scanned_pages, nblocks),
 			 errdetail_internal("%s", buf.data)));
+	elog(WARNING, "VacuumCostTotalDelay=%lf", VacuumCostTotalDelay);
 	pfree(buf.data);
 }
 
@@ -1967,6 +1987,10 @@ lazy_parallel_vacuum_or_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel,
 										int nindexes, IndexBulkDeleteResult **stats,
 										LVParallelState *lps)
 {
+	int		i;
+	int		io;
+	double	costdelay;
+
 	Assert(!IsParallelWorker());
 	Assert(ParallelVacuumIsActive(lps));
 	Assert(nindexes > 0);
@@ -1994,6 +2018,9 @@ lazy_parallel_vacuum_or_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel,
 								 lps->pcxt->nworkers_launched),
 						lps->pcxt->nworkers_launched, lps->pcxt->nworkers)));
 
+	costdelay = VacuumCostTotalDelay;
+	VacuumCostTotalDelay = 0;
+	_nhit=_nmiss=_ndirty=0;
 	/*
 	 * Join as parallel workers. The leader process alone does that in case where
 	 * no workers launched.
@@ -2011,6 +2038,30 @@ lazy_parallel_vacuum_or_cleanup_indexes(LVRelStats *vacrelstats, Relation *Irel,
 	/* Continue to use the shared balance value */
 	VacuumCostBalance = pg_atomic_read_u32(&(lps->lvshared->cost_balance));
 
+	io = _nhit * VacuumCostPageHit + _nmiss * VacuumCostPageMiss + _ndirty * VacuumCostPageDirty;
+	elog(WARNING, "worker 0 delay=%lf total io=%d hit=%d miss=%d dirty=%d",
+				  VacuumCostTotalDelay, io, _nhit, _nmiss, _ndirty);
+	/*
+	 * Collect all the delay from workers and add to total delay. The delay from leader
+	 * is already included in VacuumCostTotalDelay.
+	 */
+	for (i = 0; i < lps->pcxt->nworkers_launched; i++)
+	{
+		VacuumCostTotalDelay += lps->lvcostdelay->stats[i].time;
+		_nhit += lps->lvcostdelay->stats[i].hit;
+		_nmiss += lps->lvcostdelay->stats[i].miss;
+		_ndirty += lps->lvcostdelay->stats[i].dirty;
+		io = lps->lvcostdelay->stats[i].hit * VacuumCostPageHit +
+			 lps->lvcostdelay->stats[i].miss * VacuumCostPageMiss +
+			 lps->lvcostdelay->stats[i].dirty * VacuumCostPageDirty;
+		elog(WARNING, "worker %d delay=%lf total io=%d hit=%d miss=%d dirty=%d",
+					  i+1, lps->lvcostdelay->stats[i].time, io,
+					  lps->lvcostdelay->stats[i].hit,
+					  lps->lvcostdelay->stats[i].miss,
+					  lps->lvcostdelay->stats[i].dirty);	
+	}
+	VacuumCostTotalDelay += costdelay;
+
 	/*
 	 * We need to reinitialize the parallel context as no more index vacuuming and
 	 * index cleanup will be performed after that.
@@ -2968,10 +3019,12 @@ begin_parallel_vacuum(LVRelStats *vacrelstats, Oid relid, BlockNumber nblocks,
 	ParallelContext *pcxt;
 	LVShared		*shared;
 	LVDeadTuples	*dead_tuples;
+	LVCostDelay		*costdelay;
 	long	maxtuples;
 	char	*sharedquery;
 	Size	est_shared;
 	Size	est_deadtuples;
+	Size	est_costdelay;
 	int		querylen;
 	int		i;
 
@@ -3043,6 +3096,14 @@ begin_parallel_vacuum(LVRelStats *vacrelstats, Oid relid, BlockNumber nblocks,
 	sharedquery[querylen] = '\0';
 	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
 
+	/* Vacuum cost balance. */
+	est_costdelay = MAXALIGN(add_size(SizeOfLVCostDelay,
+								   mul_size(sizeof(LVDelayStats), nrequested)));
+	costdelay = (LVCostDelay *) shm_toc_allocate(pcxt->toc, est_costdelay);
+	pg_atomic_init_u32(&(costdelay->nslot), 0);
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_COST_DELAY, costdelay);
+	lps->lvcostdelay = costdelay;
+
 	return lps;
 }
 
@@ -3171,8 +3232,10 @@ heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	Relation	*indrels;
 	LVShared	*lvshared;
 	LVDeadTuples	*dead_tuples;
+	LVCostDelay		*costdelay;
 	int			nindexes;
 	char		*sharedquery;
+	int			slot;
 	IndexBulkDeleteResult **stats;
 
 	lvshared = (LVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED,
@@ -3207,6 +3270,11 @@ heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 												  PARALLEL_VACUUM_KEY_DEAD_TUPLES,
 												  false);
 
+	costdelay = (LVCostDelay *) shm_toc_lookup(toc,
+												   PARALLEL_VACUUM_KEY_COST_DELAY,
+												   false);
+	slot = pg_atomic_fetch_add_u32(&(costdelay->nslot), 1);
+
 	/* Set cost-based vacuum delay */
 	VacuumCostActive = (VacuumCostDelay > 0);
 	VacuumCostBalance = 0;
@@ -3214,6 +3282,7 @@ heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	VacuumPageMiss = 0;
 	VacuumPageDirty = 0;
 	VacuumSharedCostBalance = &(lvshared->cost_balance);
+	_nhit = _nmiss = _ndirty = 0;
 
 	stats = (IndexBulkDeleteResult **)
 		palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
@@ -3225,6 +3294,11 @@ heap_parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	vacuum_or_cleanup_indexes_worker(indrels, nindexes, stats, lvshared,
 									 dead_tuples);
 
+	/* update the total delay in the shared location. */
+	costdelay->stats[slot].time = VacuumCostTotalDelay;
+	costdelay->stats[slot].hit = _nhit;
+	costdelay->stats[slot].miss = _nmiss;
+	costdelay->stats[slot].dirty = _ndirty;
 	vac_close_indexes(nindexes, indrels, RowExclusiveLock);
 	table_close(onerel, ShareUpdateExclusiveLock);
 }
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 905d173..bb07a5a 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -412,6 +412,7 @@ vacuum(List *relations, VacuumParams *params,
 		VacuumPageHit = 0;
 		VacuumPageMiss = 0;
 		VacuumPageDirty = 0;
+		_nhit = _nmiss = _ndirty = 0;
 		VacuumSharedCostBalance = NULL;
 
 		/*
@@ -2046,6 +2047,8 @@ vacuum_delay_point(void)
 			/* update balance values for workers */
 			AutoVacuumUpdateDelay();
 
+			VacuumCostTotalDelay += msec;
+
 			/* Might have gotten an interrupt while sleeping */
 			CHECK_FOR_INTERRUPTS();
 		}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7ad1073..1ba71c8 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -770,7 +770,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			VacuumPageHit++;
 
 			if (VacuumCostActive)
+			{
 				VacuumCostBalance += VacuumCostPageHit;
+				_nhit++;
+			}
 
 			TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
 											  smgr->smgr_rnode.node.spcNode,
@@ -959,7 +962,10 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 	VacuumPageMiss++;
 	if (VacuumCostActive)
+	{
 		VacuumCostBalance += VacuumCostPageMiss;
+		_nmiss++;
+	}
 
 	TRACE_POSTGRESQL_BUFFER_READ_DONE(forkNum, blockNum,
 									  smgr->smgr_rnode.node.spcNode,
@@ -1500,7 +1506,10 @@ MarkBufferDirty(Buffer buffer)
 		VacuumPageDirty++;
 		pgBufferUsage.shared_blks_dirtied++;
 		if (VacuumCostActive)
+		{
 			VacuumCostBalance += VacuumCostPageDirty;
+			_ndirty++;
+		}
 	}
 }
 
@@ -3556,7 +3565,10 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 			VacuumPageDirty++;
 			pgBufferUsage.shared_blks_dirtied++;
 			if (VacuumCostActive)
+			{
 				VacuumCostBalance += VacuumCostPageDirty;
+				_ndirty++;
+			}
 		}
 	}
 }
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 3bf96de..de214f3 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -139,11 +139,15 @@ int			VacuumCostPageMiss = 10;
 int			VacuumCostPageDirty = 20;
 int			VacuumCostLimit = 200;
 double		VacuumCostDelay = 0;
-
+double		VacuumCostTotalDelay = 0;
 int			VacuumPageHit = 0;
 int			VacuumPageMiss = 0;
 int			VacuumPageDirty = 0;
 
+int	_nhit = 0;
+int _nmiss = 0;
+int _ndirty = 0;
+
 int			VacuumCostBalance = 0;	/* working state for vacuum */
 bool		VacuumCostActive = false;
 
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index bc6e03f..8d95b6e 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -251,6 +251,10 @@ extern int	VacuumCostPageMiss;
 extern int	VacuumCostPageDirty;
 extern int	VacuumCostLimit;
 extern double VacuumCostDelay;
+extern double VacuumCostTotalDelay;
+extern int	_nhit;
+extern int _nmiss;
+extern int _ndirty;
 
 extern int	VacuumPageHit;
 extern int	VacuumPageMiss;