From 715bb7dec39f74c87df5b06a0e3b2138ca110fff Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 17 Dec 2019 14:24:26 +0900
Subject: [PATCH v36 2/3] Add parallel option to VACUUM command

This change adds PARALLEL option to VACUUM command that enable us to
perform index vacuuming and index cleanup with background
workers. Individual indexes is processed by one vacuum
process. Therefore parallel vacuum can be used when the table has at
least two indexes and it cannot specify larger parallel degree than
the number of indexes that the table has.

The parallel degree is either specified by user or determined based on
the number of indexes that the table has, and further limited by
max_parallel_maintenance_workers. The table size and index size don't
affect it.
---
 doc/src/sgml/config.sgml              |   14 +-
 doc/src/sgml/ref/vacuum.sgml          |   45 +
 src/backend/access/heap/vacuumlazy.c  | 1227 ++++++++++++++++++++++---
 src/backend/access/transam/parallel.c |   16 +-
 src/backend/commands/vacuum.c         |  126 ++-
 src/backend/executor/execParallel.c   |    2 +-
 src/backend/postmaster/autovacuum.c   |    2 +
 src/bin/psql/tab-complete.c           |    2 +-
 src/include/access/heapam.h           |    3 +
 src/include/access/parallel.h         |    5 +-
 src/include/commands/vacuum.h         |   10 +
 src/test/regress/expected/vacuum.out  |   26 +
 src/test/regress/sql/vacuum.sql       |   25 +
 13 files changed, 1380 insertions(+), 123 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 5d1c90282f..74756277b7 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2308,13 +2308,13 @@ include_dir 'conf.d'
        <listitem>
         <para>
          Sets the maximum number of parallel workers that can be
-         started by a single utility command.  Currently, the only
-         parallel utility command that supports the use of parallel
-         workers is <command>CREATE INDEX</command>, and only when
-         building a B-tree index.  Parallel workers are taken from the
-         pool of processes established by <xref
-         linkend="guc-max-worker-processes"/>, limited by <xref
-         linkend="guc-max-parallel-workers"/>.  Note that the requested
+         started by a single utility command.  Currently, the parallel
+         utility commands that support the use of parallel workers are
+         <command>CREATE INDEX</command> only when building a B-tree index,
+         and <command>VACUUM</command> without <literal>FULL</literal>
+         option.  Parallel workers are taken from the pool of processes
+         established by <xref linkend="guc-max-worker-processes"/>, limited
+         by <xref linkend="guc-max-parallel-workers"/>.  Note that the requested
          number of workers may not actually be available at run time.
          If this occurs, the utility operation will run with fewer
          workers than expected.  The default value is 2.  Setting this
diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml
index f9b0fb8794..9fee083233 100644
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -34,6 +34,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
     SKIP_LOCKED [ <replaceable class="parameter">boolean</replaceable> ]
     INDEX_CLEANUP [ <replaceable class="parameter">boolean</replaceable> ]
     TRUNCATE [ <replaceable class="parameter">boolean</replaceable> ]
+    PARALLEL [ <replaceable class="parameter">integer</replaceable> ]
 
 <phrase>and <replaceable class="parameter">table_and_columns</replaceable> is:</phrase>
 
@@ -223,6 +224,32 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>PARALLEL</literal></term>
+    <listitem>
+     <para>
+      Perform vacuum index and cleanup index phases of <command>VACUUM</command>
+      in parallel using <replaceable class="parameter">integer</replaceable>
+      background workers (for the detail of each vacuum phases, please
+      refer to <xref linkend="vacuum-phases"/>).  If the parallel degree
+      <replaceable class="parameter">integer</replaceable> is omitted,
+      then <command>VACUUM</command> decides the number of workers based
+      on number of indexes that support parallel vacuum operation on the
+      relation which is further limited by
+      <xref linkend="guc-max-parallel-workers-maintenance"/>.  Please note
+      that it is not guaranteed that the number of parallel workers specified
+      in <replaceable class="parameter">integer</replaceable> will be used
+      during execution.  It is possible for a vacuum to run with fewer workers
+      than specified, or even with no workers at all.  Only one worker can
+      be used per index.  So parallel workers are launched only when there
+      are at least <literal>2</literal> indexes in the table.  Workers for
+      vacuum launches before starting each phase and exit at the end of
+      the phase.  These behaviors might change in a future release.  This
+      option can't be used with the <literal>FULL</literal> option.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><replaceable class="parameter">boolean</replaceable></term>
     <listitem>
@@ -237,6 +264,18 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><replaceable class="parameter">integer</replaceable></term>
+    <listitem>
+     <para>
+      Specifies a positive integer value passed to the selected option.
+      The <replaceable class="parameter">integer</replaceable> value can
+      also be omitted, in which case the value is decided by the command
+      based on the option used.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><replaceable class="parameter">table_name</replaceable></term>
     <listitem>
@@ -316,6 +355,12 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
     more than a plain <command>VACUUM</command> would.
    </para>
 
+   <para>
+     The <option>PARALLEL</option> option is used only for vacuum purpose.
+     Even if this option is specified with <option>ANALYZE</option> option
+     it does not affect <option>ANALYZE</option>.
+   </para>
+
    <para>
     <command>VACUUM</command> causes a substantial increase in I/O traffic,
     which might cause poor performance for other active sessions.  Therefore,
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index ab09d8408c..6f90a4dc40 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -22,6 +22,20 @@
  * of index scans performed.  So we don't use maintenance_work_mem memory for
  * the TID array, just enough to hold as many heap tuples as fit on one page.
  *
+ * Lazy vacuum supports parallel execution with parallel worker processes.  In
+ * a parallel lazy vacuum, we perform both index vacuuming and index cleanup
+ * with parallel worker processes.  Individual indexes are processed by one
+ * vacuum process.  At the beginning of a lazy vacuum (at lazy_scan_heap) we
+ * prepare the parallel context and initialize the DSM segment that contains
+ * shared information as well as the memory space for storing dead tuples.
+ * When starting either index vacuuming or index cleanup, we launch parallel
+ * worker processes.  Once all indexes are processed the parallel worker
+ * processes exit.  And then the leader process re-initializes the parallel
+ * context so that it can use the same DSM for multiple passses of index
+ * vacuum and for performing index cleanup.  For updating the index statistics,
+ * we need to update the system table and since updates are not
+ * allowed during parallel mode we update the index statistics after exiting
+ * from the parallel mode.
  *
  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -36,25 +50,30 @@
 
 #include <math.h>
 
+#include "access/amapi.h"
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/heapam_xlog.h"
 #include "access/htup_details.h"
 #include "access/multixact.h"
+#include "access/parallel.h"
 #include "access/transam.h"
 #include "access/visibilitymap.h"
+#include "access/xact.h"
 #include "access/xlog.h"
 #include "catalog/storage.h"
 #include "commands/dbcommands.h"
 #include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
+#include "optimizer/paths.h"
 #include "pgstat.h"
 #include "portability/instr_time.h"
 #include "postmaster/autovacuum.h"
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
 #include "storage/lmgr.h"
+#include "tcop/tcopprot.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/pg_rusage.h"
@@ -110,6 +129,142 @@
  */
 #define PREFETCH_SIZE			((BlockNumber) 32)
 
+/*
+ * DSM keys for parallel lazy vacuum.  Unlike other parallel execution code,
+ * since we don't need to worry about DSM keys conflicting with plan_node_id
+ * we can use small integers.
+ */
+#define PARALLEL_VACUUM_KEY_SHARED			1
+#define PARALLEL_VACUUM_KEY_DEAD_TUPLES		2
+#define PARALLEL_VACUUM_KEY_QUERY_TEXT		3
+
+/*
+ * Macro to check if we are in a parallel lazy vacuum.  If true, we are
+ * in the parallel mode and prepared the DSM segment.
+ */
+#define ParallelVacuumIsActive(lps) (((LVParallelState *) (lps)) != NULL)
+
+/*
+ * LVDeadTuples stores the dead tuple TIDs collected during the heap scan.
+ * This is allocated in the DSM segment in parallel mode and in local memory
+ * in non-parallel mode.
+ */
+typedef struct LVDeadTuples
+{
+	int			max_tuples;		/* # slots allocated in array */
+	int			num_tuples;		/* current # of entries */
+	/* List of TIDs of tuples we intend to delete */
+	/* NB: this list is ordered by TID address */
+	ItemPointerData itemptrs[FLEXIBLE_ARRAY_MEMBER];	/* array of
+														 * ItemPointerData */
+} LVDeadTuples;
+
+#define SizeOfLVDeadTuples offsetof(LVDeadTuples, itemptrs) + sizeof(ItemPointerData)
+
+/*
+ * Shared information among parallel workers.  So this is allocated in the DSM
+ * segment.
+ */
+typedef struct LVShared
+{
+	/*
+	 * Target table relid and log level.  These fields are not modified during
+	 * the lazy vacuum.
+	 */
+	Oid			relid;
+	int			elevel;
+
+	/*
+	 * An indication for vacuum workers to perform either index vacuuming or
+	 * index cleanup.  first_time is true only if for_cleanup is true and
+	 * bulk-deletion is not performed yet.
+	 */
+	bool		for_cleanup;
+	bool		first_time;
+
+	/*
+	 * Fields for both index vacuum and cleanup.
+	 *
+	 * reltuples is the total number of input heap tuples.  We set either old
+	 * live tuples in the index vacuum case or the new live tuples in the
+	 * index cleanup case.
+	 *
+	 * estimated_count is true if the reltuples is an estimated value.
+	 */
+	double		reltuples;
+	bool		estimated_count;
+
+	/*
+	 * In single process lazy vacuum we could consume more memory during index
+	 * vacuuming or cleanup apart from the memory for heap scanning. In
+	 * parallel index vacuuming, since individual vacuum workers can consume
+	 * memory equal to maitenance_work_mem, the new maitenance_work_mem for
+	 * each worker is set such that the parallel operation doesn't consume
+	 * more memory than single process lazy vacuum.
+	 */
+	int			maintenance_work_mem_worker;
+
+	/*
+	 * Shared vacuum cost balance.  During parallel index vacuuming
+	 * VacuumSharedCostBalance points to this value and it accumulates the
+	 * balance of each parallel vacuum workers.
+	 */
+	pg_atomic_uint32 cost_balance;
+
+	/*
+	 * Number of active parallel workers.  This is used for computing the
+	 * minimum threshold of the vacuum cost balance for a worker to go for the
+	 * delay.
+	 */
+	pg_atomic_uint32 active_nworkers;
+
+	/*
+	 * Variables to control parallel index vacuuming.  We have a bitmap to
+	 * indicate which index has stats in shared memory.  The set bit in the
+	 * map indicates that the particular index supports a parallel vacuum.
+	 */
+	pg_atomic_uint32 idx;		/* counter for vacuuming and clean up */
+	pg_atomic_uint32 nprocessed;	/* # of indexes done during parallel
+									 * execution */
+	uint32		offset;			/* sizeof header incl. bitmap */
+	bits8		bitmap[FLEXIBLE_ARRAY_MEMBER];	/* bit map of NULLs */
+
+	/* Shared index statistics data follows at end of struct */
+} LVShared;
+
+#define SizeOfLVShared offsetof(LVShared, bitmap) + sizeof(bits8)
+#define GetSharedIndStats(s) \
+	((LVSharedIndStats *)((char *)(s) + ((LVShared *)(s))->offset))
+#define IndStatsIsNull(s, i) \
+	(!(((LVShared *)(s))->bitmap[(i) >> 3] & (1 << ((i) & 0x07))))
+
+/*
+ * Struct for an index bulk-deletion statistic used for parallel lazy
+ * vacuum.  This is allocated in the DSM segment.
+ */
+typedef struct LVSharedIndStats
+{
+	bool		updated;		/* are the stats updated? */
+	IndexBulkDeleteResult stats;
+} LVSharedIndStats;
+
+/* Struct for maintaining a parallel vacuum state. */
+typedef struct LVParallelState
+{
+	ParallelContext *pcxt;
+
+	/* Shared information among parallel vacuum workers */
+	LVShared   *lvshared;
+
+	/*
+	 * The number of indexes that support parallel index bulk-deletion and
+	 * parallel index cleanup respectively.
+	 */
+	int			nindexes_parallel_bulkdel;
+	int			nindexes_parallel_cleanup;
+	int			nindexes_parallel_condcleanup;
+} LVParallelState;
+
 typedef struct LVRelStats
 {
 	/* useindex = true means two-pass strategy; false means one-pass */
@@ -128,11 +283,7 @@ typedef struct LVRelStats
 	BlockNumber pages_removed;
 	double		tuples_deleted;
 	BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
-	/* List of TIDs of tuples we intend to delete */
-	/* NB: this list is ordered by TID address */
-	int			num_dead_tuples;	/* current # of entries */
-	int			max_dead_tuples;	/* # slots allocated in array */
-	ItemPointer dead_tuples;	/* array of ItemPointerData */
+	LVDeadTuples *dead_tuples;
 	int			num_index_scans;
 	TransactionId latestRemovedXid;
 	bool		lock_waiter_detected;
@@ -148,19 +299,17 @@ static MultiXactId MultiXactCutoff;
 
 static BufferAccessStrategy vac_strategy;
 
-
 /* non-export function prototypes */
 static void lazy_scan_heap(Relation onerel, VacuumParams *params,
 						   LVRelStats *vacrelstats, Relation *Irel, int nindexes,
 						   bool aggressive);
 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
-static void lazy_vacuum_index(Relation indrel,
-							  IndexBulkDeleteResult **stats,
-							  LVRelStats *vacrelstats);
+static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
+							  LVDeadTuples *dead_tuples, double reltuples);
 static void lazy_cleanup_index(Relation indrel,
-							   IndexBulkDeleteResult *stats,
-							   LVRelStats *vacrelstats);
+							   IndexBulkDeleteResult **stats,
+							   double reltuples, bool estimated_count);
 static int	lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 							 int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
 static bool should_attempt_truncation(VacuumParams *params,
@@ -169,12 +318,42 @@ static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
 static BlockNumber count_nondeletable_pages(Relation onerel,
 											LVRelStats *vacrelstats);
 static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
-static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
+static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples,
 								   ItemPointer itemptr);
 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 static int	vac_cmp_itemptr(const void *left, const void *right);
 static bool heap_page_is_all_visible(Relation rel, Buffer buf,
 									 TransactionId *visibility_cutoff_xid, bool *all_frozen);
+static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
+										 LVRelStats *vacrelstats, LVParallelState *lps,
+										 int nindexes);
+static void parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
+								  LVShared *lvshared, LVDeadTuples *dead_tuples,
+								  int nindexes);
+static void vacuum_indexes_leader(Relation *Irel, int nindexes, IndexBulkDeleteResult **stats,
+								  LVRelStats *vacrelstats, LVParallelState *lps);
+static void vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
+							 LVShared *lvshared, LVSharedIndStats *shared_indstats,
+							 LVDeadTuples *dead_tuples);
+static void lazy_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
+								LVRelStats *vacrelstats, LVParallelState *lps,
+								int nindexes);
+static void lazy_cleanup_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
+								 LVRelStats *vacrelstats, LVParallelState *lps,
+								 int nindexes);
+static long compute_max_dead_tuples(BlockNumber relblocks, bool hasindex);
+static int	compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested);
+static void prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
+									 int nindexes);
+static void update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats,
+									int nindexes);
+static LVParallelState *begin_parallel_vacuum(Oid relid, Relation *Irel,
+											  LVRelStats *vacrelstats, BlockNumber nblocks,
+											  int nindexes, int nrequested);
+static void end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats,
+								LVParallelState *lps, int nindexes);
+static LVSharedIndStats *get_indstats(LVShared *lvshared, int n);
+static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared);
 
 
 /*
@@ -488,6 +667,18 @@ vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
  *		dead-tuple TIDs, invoke vacuuming of indexes and call lazy_vacuum_heap
  *		to reclaim dead line pointers.
  *
+ *		If the table has at least two indexes and parallel lazy vacuum is
+ *		requested, we execute both index vacuuming and index cleanup with
+ *		parallel workers.  In parallel lazy vacuum, we enter parallel mode and
+ *		then create both the parallel context and the DSM segment before starting
+ *		heap scan so that we can record dead tuples to the DSM segment.  All
+ *		parallel workers are launched at beginning of index vacuuming and index
+ *		cleanup and they exit once done with all indexes.  At the end of this
+ *		function we exit from parallel mode.  Index bulk-deletion results are
+ *		stored in the DSM segment and update index statistics as a whole after
+ *		exited from parallel mode since all writes are not allowed during parallel
+ *		mode.
+ *
  *		If there are no indexes then we can reclaim line pointers on the fly;
  *		dead line pointers need only be retained until all index pointers that
  *		reference them have been killed.
@@ -496,6 +687,8 @@ static void
 lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			   Relation *Irel, int nindexes, bool aggressive)
 {
+	LVParallelState *lps = NULL;
+	LVDeadTuples *dead_tuples;
 	BlockNumber nblocks,
 				blkno;
 	HeapTupleData tuple;
@@ -553,13 +746,28 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	vacrelstats->nonempty_pages = 0;
 	vacrelstats->latestRemovedXid = InvalidTransactionId;
 
-	lazy_space_alloc(vacrelstats, nblocks);
+	/*
+	 * Try to initialize the parallel vacuum if requested
+	 */
+	if (params->nworkers >= 0 && vacrelstats->useindex)
+		lps = begin_parallel_vacuum(RelationGetRelid(onerel), Irel,
+									vacrelstats, nblocks, nindexes,
+									params->nworkers);
+
+	/*
+	 * Allocate the space for dead tuples in case the parallel vacuum is not
+	 * initialized.
+	 */
+	if (!ParallelVacuumIsActive(lps))
+		lazy_space_alloc(vacrelstats, nblocks);
+
+	dead_tuples = vacrelstats->dead_tuples;
 	frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
 
 	/* Report that we're scanning the heap, advertising total # of blocks */
 	initprog_val[0] = PROGRESS_VACUUM_PHASE_SCAN_HEAP;
 	initprog_val[1] = nblocks;
-	initprog_val[2] = vacrelstats->max_dead_tuples;
+	initprog_val[2] = dead_tuples->max_tuples;
 	pgstat_progress_update_multi_param(3, initprog_index, initprog_val);
 
 	/*
@@ -737,8 +945,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		 * If we are close to overrunning the available space for dead-tuple
 		 * TIDs, pause and do a cycle of vacuuming before we tackle this page.
 		 */
-		if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage &&
-			vacrelstats->num_dead_tuples > 0)
+		if ((dead_tuples->max_tuples - dead_tuples->num_tuples) < MaxHeapTuplesPerPage &&
+			dead_tuples->num_tuples > 0)
 		{
 			const int	hvp_index[] = {
 				PROGRESS_VACUUM_PHASE,
@@ -766,10 +974,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 										 PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
 
 			/* Remove index entries */
-			for (i = 0; i < nindexes; i++)
-				lazy_vacuum_index(Irel[i],
-								  &indstats[i],
-								  vacrelstats);
+			lazy_vacuum_indexes(Irel, indstats, vacrelstats, lps, nindexes);
 
 			/*
 			 * Report that we are now vacuuming the heap.  We also increase
@@ -789,7 +994,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			 * not to reset latestRemovedXid since we want that value to be
 			 * valid.
 			 */
-			vacrelstats->num_dead_tuples = 0;
+			dead_tuples->num_tuples = 0;
 			vacrelstats->num_index_scans++;
 
 			/*
@@ -985,7 +1190,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		has_dead_tuples = false;
 		nfrozen = 0;
 		hastup = false;
-		prev_dead_count = vacrelstats->num_dead_tuples;
+		prev_dead_count = dead_tuples->num_tuples;
 		maxoff = PageGetMaxOffsetNumber(page);
 
 		/*
@@ -1024,7 +1229,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			 */
 			if (ItemIdIsDead(itemid))
 			{
-				lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
+				lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
 				all_visible = false;
 				continue;
 			}
@@ -1170,7 +1375,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 
 			if (tupgone)
 			{
-				lazy_record_dead_tuple(vacrelstats, &(tuple.t_self));
+				lazy_record_dead_tuple(dead_tuples, &(tuple.t_self));
 				HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
 													   &vacrelstats->latestRemovedXid);
 				tups_vacuumed += 1;
@@ -1240,7 +1445,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		 * doing a second scan. Also we don't do that but forget dead tuples
 		 * when index cleanup is disabled.
 		 */
-		if (!vacrelstats->useindex && vacrelstats->num_dead_tuples > 0)
+		if (!vacrelstats->useindex && dead_tuples->num_tuples > 0)
 		{
 			if (nindexes == 0)
 			{
@@ -1269,7 +1474,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			 * not to reset latestRemovedXid since we want that value to be
 			 * valid.
 			 */
-			vacrelstats->num_dead_tuples = 0;
+			dead_tuples->num_tuples = 0;
 
 			/*
 			 * Periodically do incremental FSM vacuuming to make newly-freed
@@ -1384,7 +1589,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		 * page, so remember its free space as-is.  (This path will always be
 		 * taken if there are no indexes.)
 		 */
-		if (vacrelstats->num_dead_tuples == prev_dead_count)
+		if (dead_tuples->num_tuples == prev_dead_count)
 			RecordPageWithFreeSpace(onerel, blkno, freespace);
 	}
 
@@ -1418,7 +1623,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 
 	/* If any tuples need to be deleted, perform final vacuum cycle */
 	/* XXX put a threshold on min number of tuples here? */
-	if (vacrelstats->num_dead_tuples > 0)
+	if (dead_tuples->num_tuples > 0)
 	{
 		const int	hvp_index[] = {
 			PROGRESS_VACUUM_PHASE,
@@ -1434,10 +1639,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 									 PROGRESS_VACUUM_PHASE_VACUUM_INDEX);
 
 		/* Remove index entries */
-		for (i = 0; i < nindexes; i++)
-			lazy_vacuum_index(Irel[i],
-							  &indstats[i],
-							  vacrelstats);
+		lazy_vacuum_indexes(Irel, indstats, vacrelstats, lps, nindexes);
 
 		/* Report that we are now vacuuming the heap */
 		hvp_val[0] = PROGRESS_VACUUM_PHASE_VACUUM_HEAP;
@@ -1461,12 +1663,19 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
 								 PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
 
-	/* Do post-vacuum cleanup and statistics update for each index */
+	/* Do post-vacuum cleanup */
 	if (vacrelstats->useindex)
-	{
-		for (i = 0; i < nindexes; i++)
-			lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
-	}
+		lazy_cleanup_indexes(Irel, indstats, vacrelstats, lps, nindexes);
+
+	/*
+	 * End parallel mode before updating index statistics as we cannot write
+	 * during parallel mode.
+	 */
+	if (ParallelVacuumIsActive(lps))
+		end_parallel_vacuum(Irel, indstats, lps, nindexes);
+
+	/* Update index statistics */
+	update_index_statistics(Irel, indstats, nindexes);
 
 	/* If no indexes, make log report that lazy_vacuum_heap would've made */
 	if (vacuumed_pages)
@@ -1532,7 +1741,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 	npages = 0;
 
 	tupindex = 0;
-	while (tupindex < vacrelstats->num_dead_tuples)
+	while (tupindex < vacrelstats->dead_tuples->num_tuples)
 	{
 		BlockNumber tblk;
 		Buffer		buf;
@@ -1541,7 +1750,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 
 		vacuum_delay_point();
 
-		tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
+		tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
 		buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
 								 vac_strategy);
 		if (!ConditionalLockBufferForCleanup(buf))
@@ -1589,6 +1798,7 @@ static int
 lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 				 int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer)
 {
+	LVDeadTuples *dead_tuples = vacrelstats->dead_tuples;
 	Page		page = BufferGetPage(buffer);
 	OffsetNumber unused[MaxOffsetNumber];
 	int			uncnt = 0;
@@ -1599,16 +1809,16 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 
 	START_CRIT_SECTION();
 
-	for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
+	for (; tupindex < dead_tuples->num_tuples; tupindex++)
 	{
 		BlockNumber tblk;
 		OffsetNumber toff;
 		ItemId		itemid;
 
-		tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
+		tblk = ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
 		if (tblk != blkno)
 			break;				/* past end of tuples for this block */
-		toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
+		toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
 		itemid = PageGetItemId(page, toff);
 		ItemIdSetUnused(itemid);
 		unused[uncnt++] = toff;
@@ -1729,19 +1939,380 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup)
 	return false;
 }
 
+/*
+ * Perform index vacuuming or index cleanup with parallel workers.  This
+ * function must be used by the parallel vacuum leader process. The caller
+ * must set lps->lvshared->for_cleanup to indicate whether to perform vacuum
+ * or cleanup.
+ */
+static void
+lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
+							 LVRelStats *vacrelstats, LVParallelState *lps,
+							 int nindexes)
+{
+	int			nworkers;
+
+	Assert(!IsParallelWorker());
+	Assert(ParallelVacuumIsActive(lps));
+	Assert(nindexes > 0);
+
+	/* Determine the number of parallel workers to launch */
+	if (lps->lvshared->for_cleanup)
+	{
+		if (lps->lvshared->first_time)
+			nworkers = lps->nindexes_parallel_cleanup +
+				lps->nindexes_parallel_condcleanup - 1;
+		else
+			nworkers = lps->nindexes_parallel_cleanup - 1;
+
+	}
+	else
+		nworkers = lps->nindexes_parallel_bulkdel - 1;
+
+	/* Cap by the worker we computed at the beginning of parallel lazy vacuum */
+	nworkers = Min(nworkers, lps->pcxt->nworkers);
+
+	/* Setup the shared cost-based vacuum delay and launch workers */
+	if (nworkers > 0)
+	{
+		if (vacrelstats->num_index_scans > 0)
+		{
+			/* Reset the processing counts */
+			pg_atomic_write_u32(&(lps->lvshared->idx), 0);
+			pg_atomic_write_u32(&(lps->lvshared->nprocessed), 0);
+
+			/* Reinitialize the parallel context to relaunch parallel workers */
+			ReinitializeParallelDSM(lps->pcxt, nworkers);
+		}
+
+		/* Enable shared cost balance */
+		VacuumSharedCostBalance = &(lps->lvshared->cost_balance);
+		VacuumActiveNWorkers = &(lps->lvshared->active_nworkers);
+
+		/*
+		 * Set up shared cost balance and the number of active workers for
+		 * vacuum delay.
+		 */
+		pg_atomic_write_u32(VacuumSharedCostBalance, VacuumCostBalance);
+		pg_atomic_write_u32(VacuumActiveNWorkers, 0);
+
+		LaunchParallelWorkers(lps->pcxt);
+
+		if (lps->pcxt->nworkers_launched > 0)
+		{
+			/*
+			 * Reset the local value so that we compute cost balance during
+			 * parallel index vacuuming.
+			 */
+			VacuumCostBalance = 0;
+			VacuumCostBalanceLocal = 0;
+		}
+		else
+		{
+			/*
+			 * Disable shared cost balance if we are not able to launch
+			 * workers.
+			 */
+			VacuumSharedCostBalance = NULL;
+			VacuumActiveNWorkers = NULL;
+		}
+
+		if (lps->lvshared->for_cleanup)
+			ereport(elevel,
+					(errmsg(ngettext("launched %d parallel vacuum worker for index cleanup (planned: %d)",
+									 "launched %d parallel vacuum workers for index cleanup (planned: %d)",
+									 lps->pcxt->nworkers_launched),
+							lps->pcxt->nworkers_launched, nworkers)));
+		else
+			ereport(elevel,
+					(errmsg(ngettext("launched %d parallel vacuum worker for index vacuuming (planned: %d)",
+									 "launched %d parallel vacuum workers for index vacuuming (planned: %d)",
+									 lps->pcxt->nworkers_launched),
+							lps->pcxt->nworkers_launched, nworkers)));
+	}
+
+	/* Process the indexes that can be processed by only leader process */
+	vacuum_indexes_leader(Irel, nindexes, stats, vacrelstats, lps);
+
+	/*
+	 * Join as a parallel worker.  The leader process alone processes all the
+	 * indexes in the case where no workers are launched.
+	 */
+	parallel_vacuum_index(Irel, stats, lps->lvshared,
+						  vacrelstats->dead_tuples, nindexes);
+
+	/* Wait for all vacuum workers to finish */
+	WaitForParallelWorkersToFinish(lps->pcxt);
+
+	/* Carry the shared balance value to heap scan */
+	if (VacuumSharedCostBalance)
+		VacuumCostBalance = pg_atomic_read_u32(VacuumSharedCostBalance);
+
+	if (nworkers > 0)
+	{
+		/* Disable shared cost balance */
+		VacuumSharedCostBalance = NULL;
+		VacuumActiveNWorkers = NULL;
+	}
+}
+
+/*
+ * Index vacuum/cleanup routine used by the leader process and parallel
+ * vacuum worker processes to process the indexes in parallel.
+ */
+static void
+parallel_vacuum_index(Relation *Irel, IndexBulkDeleteResult **stats,
+					  LVShared *lvshared, LVDeadTuples *dead_tuples,
+					  int nindexes)
+{
+	/*
+	 * Increment the active worker count if we are able to launch any worker.
+	 */
+	if (VacuumActiveNWorkers)
+		pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
+
+	/* Loop until all indexes are vacuumed */
+	for (;;)
+	{
+		int			idx;
+		LVSharedIndStats *shared_indstats;
+
+		/* Get an index number to process */
+		idx = pg_atomic_fetch_add_u32(&(lvshared->idx), 1);
+
+		/* Done for all indexes? */
+		if (idx >= nindexes)
+			break;
+
+		/* Skip processing indexes that doesn't support parallel operation */
+		if (get_indstats(lvshared, idx) == NULL ||
+			skip_parallel_vacuum_index(Irel[idx], lvshared))
+			continue;
+
+		/* Increment the processing count */
+		pg_atomic_add_fetch_u32(&(lvshared->nprocessed), 1);
+
+		/* Get the index statistics of this index from DSM */
+		shared_indstats = get_indstats(lvshared, idx);
+
+		/*
+		 * This must exist in DSM as we reach here only for indexes that
+		 * support the parallel operation.
+		 */
+		Assert(shared_indstats);
+
+		/* Do vacuum or cleanup one index */
+		vacuum_one_index(Irel[idx], &(stats[idx]), lvshared, shared_indstats,
+						 dead_tuples);
+	}
+
+	/*
+	 * We have completed the index vacuum so decrement the active worker
+	 * count.
+	 */
+	if (VacuumActiveNWorkers)
+		pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
+}
+
+/*
+ * Vacuum or cleanup indexes that can be processed by only the leader process
+ * because these indexes don't support parallel operation at that phase.
+ * Therefore this function must be called by the leader process.
+ */
+static void
+vacuum_indexes_leader(Relation *Irel, int nindexes, IndexBulkDeleteResult **stats,
+					  LVRelStats *vacrelstats, LVParallelState *lps)
+{
+	int			i;
+
+	Assert(!IsParallelWorker());
+
+	/*
+	 * Increment the active worker count if we are able to launch any worker.
+	 */
+	if (VacuumActiveNWorkers)
+		pg_atomic_add_fetch_u32(VacuumActiveNWorkers, 1);
+
+	for (i = 0; i < nindexes; i++)
+	{
+		bool		leader_only = (get_indstats(lps->lvshared, i) == NULL ||
+								   skip_parallel_vacuum_index(Irel[i], lps->lvshared));
+
+		/* Skip the indexes that can be processed by parallel workers */
+		if (!leader_only)
+			continue;
+
+		vacuum_one_index(Irel[i], &(stats[i]), lps->lvshared,
+						 get_indstats(lps->lvshared, i),
+						 vacrelstats->dead_tuples);
+	}
+
+	/*
+	 * We have completed the index vacuum so decrement the active worker
+	 * count.
+	 */
+	if (VacuumActiveNWorkers)
+		pg_atomic_sub_fetch_u32(VacuumActiveNWorkers, 1);
+}
+
+/*
+ * Vacuum or cleanup index either by leader process or by one of the worker
+ * process.  After processing the index this function copies the index
+ * statistics returned from ambulkdelete and amvacuumcleanup to the DSM
+ * segment.
+ */
+static void
+vacuum_one_index(Relation indrel, IndexBulkDeleteResult **stats,
+				 LVShared *lvshared, LVSharedIndStats *shared_indstats,
+				 LVDeadTuples *dead_tuples)
+{
+	IndexBulkDeleteResult *bulkdelete_res = NULL;
+
+	if (shared_indstats)
+	{
+		/* Get the space for IndexBulkDeleteResult */
+		bulkdelete_res = &(shared_indstats->stats);
+
+		/*
+		 * Update the pointer to the corresponding bulk-deletion result if
+		 * someone has already updated it.
+		 */
+		if (shared_indstats->updated && *stats == NULL)
+			*stats = bulkdelete_res;
+	}
+
+	/* Do vacuum or cleanup one index */
+	if (lvshared->for_cleanup)
+		lazy_cleanup_index(indrel, stats, lvshared->reltuples,
+						   lvshared->estimated_count);
+	else
+		lazy_vacuum_index(indrel, stats, dead_tuples,
+						  lvshared->reltuples);
+
+	/*
+	 * Copy the index bulk-deletion result returned from ambulkdelete and
+	 * amvacuumcleanup to the DSM segment if it's the first time to get it
+	 * from them, because they allocate it locally and it's possible that an
+	 * index will be vacuumed by the different vacuum process at the next
+	 * time.  The copying the result normally happens only after the first
+	 * time of index vacuuming.  From the second time, we pass the result on
+	 * the DSM segment so that they then update it directly.
+	 *
+	 * Since all vacuum workers write the bulk-deletion result at different
+	 * slots we can write them without locking.
+	 */
+	if (shared_indstats && !shared_indstats->updated && *stats != NULL)
+	{
+		memcpy(bulkdelete_res, *stats, sizeof(IndexBulkDeleteResult));
+		shared_indstats->updated = true;
+
+		/*
+		 * no longer need the locally allocated result and now stats[idx]
+		 * points to the DSM segment.
+		 */
+		pfree(*stats);
+		*stats = bulkdelete_res;
+	}
+}
+
+/*
+ * Vacuum indexes.  We process the indexes serially unless we are doing
+ * parallel vacuum.
+ */
+static void
+lazy_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
+					LVRelStats *vacrelstats, LVParallelState *lps,
+					int nindexes)
+{
+	int			idx;
+
+	Assert(!IsParallelWorker());
+	Assert(nindexes > 0);
+
+	/* Perform index vacuuming with parallel workers for parallel vacuum. */
+	if (ParallelVacuumIsActive(lps))
+	{
+		/* Tell parallel workers to do index vacuuming */
+		lps->lvshared->for_cleanup = false;
+		lps->lvshared->first_time = false;
+
+		/*
+		 * We can only provide an approximate value of num_heap_tuples in
+		 * vacuum cases.
+		 */
+		lps->lvshared->reltuples = vacrelstats->old_live_tuples;
+		lps->lvshared->estimated_count = true;
+
+		lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
+	}
+	else
+	{
+		for (idx = 0; idx < nindexes; idx++)
+			lazy_vacuum_index(Irel[idx], &stats[idx], vacrelstats->dead_tuples,
+							  vacrelstats->old_live_tuples);
+	}
+}
+
+/*
+ * Cleanup indexes.  We process the indexes serially unless we are doing
+ * parallel vacuum.
+ */
+static void
+lazy_cleanup_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
+					 LVRelStats *vacrelstats, LVParallelState *lps,
+					 int nindexes)
+{
+	int			idx;
+
+	Assert(!IsParallelWorker());
+	Assert(nindexes > 0);
+
+	/*
+	 * If parallel vacuum is active we perform index cleanup with parallel
+	 * workers.
+	 */
+	if (ParallelVacuumIsActive(lps))
+	{
+		/* Tell parallel workers to do index cleanup */
+		lps->lvshared->for_cleanup = true;
+		lps->lvshared->first_time =
+			(vacrelstats->num_index_scans == 0);
+
+		/*
+		 * Now we can provide a better estimate of total number of surviving
+		 * tuples (we assume indexes are more interested in that than in the
+		 * number of nominally live tuples).
+		 */
+		lps->lvshared->reltuples = vacrelstats->new_rel_tuples;
+		lps->lvshared->estimated_count =
+			(vacrelstats->tupcount_pages < vacrelstats->rel_pages);
+
+		lazy_parallel_vacuum_indexes(Irel, stats, vacrelstats, lps, nindexes);
+	}
+	else
+	{
+		for (idx = 0; idx < nindexes; idx++)
+			lazy_cleanup_index(Irel[idx], &stats[idx],
+							   vacrelstats->new_rel_tuples,
+							   vacrelstats->tupcount_pages < vacrelstats->rel_pages);
+	}
+}
 
 /*
  *	lazy_vacuum_index() -- vacuum one index relation.
  *
  *		Delete all the index entries pointing to tuples listed in
- *		vacrelstats->dead_tuples, and update running statistics.
+ *		dead_tuples, and update running statistics.
+ *
+ *		reltuples is the number of heap tuples to be passed to the
+ *		bulkdelete callback.
  */
 static void
-lazy_vacuum_index(Relation indrel,
-				  IndexBulkDeleteResult **stats,
-				  LVRelStats *vacrelstats)
+lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
+				  LVDeadTuples *dead_tuples, double reltuples)
 {
 	IndexVacuumInfo ivinfo;
+	const char *msg;
 	PGRUsage	ru0;
 
 	pg_rusage_init(&ru0);
@@ -1751,30 +2322,38 @@ lazy_vacuum_index(Relation indrel,
 	ivinfo.report_progress = false;
 	ivinfo.estimated_count = true;
 	ivinfo.message_level = elevel;
-	/* We can only provide an approximate value of num_heap_tuples here */
-	ivinfo.num_heap_tuples = vacrelstats->old_live_tuples;
+	ivinfo.num_heap_tuples = reltuples;
 	ivinfo.strategy = vac_strategy;
 
 	/* Do bulk deletion */
 	*stats = index_bulk_delete(&ivinfo, *stats,
-							   lazy_tid_reaped, (void *) vacrelstats);
+							   lazy_tid_reaped, (void *) dead_tuples);
+
+	if (IsParallelWorker())
+		msg = gettext_noop("scanned index \"%s\" to remove %d row versions by parallel vacuum worker");
+	else
+		msg = gettext_noop("scanned index \"%s\" to remove %d row versions");
 
 	ereport(elevel,
-			(errmsg("scanned index \"%s\" to remove %d row versions",
+			(errmsg(msg,
 					RelationGetRelationName(indrel),
-					vacrelstats->num_dead_tuples),
+					dead_tuples->num_tuples),
 			 errdetail_internal("%s", pg_rusage_show(&ru0))));
 }
 
 /*
  *	lazy_cleanup_index() -- do post-vacuum cleanup for one index relation.
+ *
+ *		reltuples is the number of heap tuples and estimated_count is true
+ *		if the reltuples is an estimated value.
  */
 static void
 lazy_cleanup_index(Relation indrel,
-				   IndexBulkDeleteResult *stats,
-				   LVRelStats *vacrelstats)
+				   IndexBulkDeleteResult **stats,
+				   double reltuples, bool estimated_count)
 {
 	IndexVacuumInfo ivinfo;
+	const char *msg;
 	PGRUsage	ru0;
 
 	pg_rusage_init(&ru0);
@@ -1782,49 +2361,33 @@ lazy_cleanup_index(Relation indrel,
 	ivinfo.index = indrel;
 	ivinfo.analyze_only = false;
 	ivinfo.report_progress = false;
-	ivinfo.estimated_count = (vacrelstats->tupcount_pages < vacrelstats->rel_pages);
+	ivinfo.estimated_count = estimated_count;
 	ivinfo.message_level = elevel;
 
-	/*
-	 * Now we can provide a better estimate of total number of surviving
-	 * tuples (we assume indexes are more interested in that than in the
-	 * number of nominally live tuples).
-	 */
-	ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
+	ivinfo.num_heap_tuples = reltuples;
 	ivinfo.strategy = vac_strategy;
 
-	stats = index_vacuum_cleanup(&ivinfo, stats);
+	*stats = index_vacuum_cleanup(&ivinfo, *stats);
 
-	if (!stats)
+	if (!(*stats))
 		return;
 
-	/*
-	 * Now update statistics in pg_class, but only if the index says the count
-	 * is accurate.
-	 */
-	if (!stats->estimated_count)
-		vac_update_relstats(indrel,
-							stats->num_pages,
-							stats->num_index_tuples,
-							0,
-							false,
-							InvalidTransactionId,
-							InvalidMultiXactId,
-							false);
+	if (IsParallelWorker())
+		msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages as reported by parallel vacuum worker");
+	else
+		msg = gettext_noop("index \"%s\" now contains %.0f row versions in %u pages");
 
 	ereport(elevel,
-			(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
+			(errmsg(msg,
 					RelationGetRelationName(indrel),
-					stats->num_index_tuples,
-					stats->num_pages),
+					(*stats)->num_index_tuples,
+					(*stats)->num_pages),
 			 errdetail("%.0f index row versions were removed.\n"
 					   "%u index pages have been deleted, %u are currently reusable.\n"
 					   "%s.",
-					   stats->tuples_removed,
-					   stats->pages_deleted, stats->pages_free,
+					   (*stats)->tuples_removed,
+					   (*stats)->pages_deleted, (*stats)->pages_free,
 					   pg_rusage_show(&ru0))));
-
-	pfree(stats);
 }
 
 /*
@@ -2132,19 +2695,17 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
 }
 
 /*
- * lazy_space_alloc - space allocation decisions for lazy vacuum
- *
- * See the comments at the head of this file for rationale.
+ * Return the maximum number of dead tuples we can record.
  */
-static void
-lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
+static long
+compute_max_dead_tuples(BlockNumber relblocks, bool useindex)
 {
 	long		maxtuples;
 	int			vac_work_mem = IsAutoVacuumWorkerProcess() &&
 	autovacuum_work_mem != -1 ?
 	autovacuum_work_mem : maintenance_work_mem;
 
-	if (vacrelstats->useindex)
+	if (useindex)
 	{
 		maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData);
 		maxtuples = Min(maxtuples, INT_MAX);
@@ -2158,34 +2719,49 @@ lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
 		maxtuples = Max(maxtuples, MaxHeapTuplesPerPage);
 	}
 	else
-	{
 		maxtuples = MaxHeapTuplesPerPage;
-	}
 
-	vacrelstats->num_dead_tuples = 0;
-	vacrelstats->max_dead_tuples = (int) maxtuples;
-	vacrelstats->dead_tuples = (ItemPointer)
-		palloc(maxtuples * sizeof(ItemPointerData));
+	return maxtuples;
+}
+
+/*
+ * lazy_space_alloc - space allocation decisions for lazy vacuum
+ *
+ * See the comments at the head of this file for rationale.
+ */
+static void
+lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
+{
+	LVDeadTuples *dead_tuples = NULL;
+	long		maxtuples;
+
+	maxtuples = compute_max_dead_tuples(relblocks, vacrelstats->useindex);
+
+	dead_tuples = (LVDeadTuples *)
+		palloc(SizeOfLVDeadTuples + maxtuples * sizeof(ItemPointerData));
+	dead_tuples->num_tuples = 0;
+	dead_tuples->max_tuples = (int) maxtuples;
+
+	vacrelstats->dead_tuples = dead_tuples;
 }
 
 /*
  * lazy_record_dead_tuple - remember one deletable tuple
  */
 static void
-lazy_record_dead_tuple(LVRelStats *vacrelstats,
-					   ItemPointer itemptr)
+lazy_record_dead_tuple(LVDeadTuples *dead_tuples, ItemPointer itemptr)
 {
 	/*
 	 * The array shouldn't overflow under normal behavior, but perhaps it
 	 * could if we are given a really small maintenance_work_mem. In that
 	 * case, just forget the last few tuples (we'll get 'em next time).
 	 */
-	if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
+	if (dead_tuples->num_tuples < dead_tuples->max_tuples)
 	{
-		vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr;
-		vacrelstats->num_dead_tuples++;
+		dead_tuples->itemptrs[dead_tuples->num_tuples] = *itemptr;
+		dead_tuples->num_tuples++;
 		pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES,
-									 vacrelstats->num_dead_tuples);
+									 dead_tuples->num_tuples);
 	}
 }
 
@@ -2199,12 +2775,12 @@ lazy_record_dead_tuple(LVRelStats *vacrelstats,
 static bool
 lazy_tid_reaped(ItemPointer itemptr, void *state)
 {
-	LVRelStats *vacrelstats = (LVRelStats *) state;
+	LVDeadTuples *dead_tuples = (LVDeadTuples *) state;
 	ItemPointer res;
 
 	res = (ItemPointer) bsearch((void *) itemptr,
-								(void *) vacrelstats->dead_tuples,
-								vacrelstats->num_dead_tuples,
+								(void *) dead_tuples->itemptrs,
+								dead_tuples->num_tuples,
 								sizeof(ItemPointerData),
 								vac_cmp_itemptr);
 
@@ -2352,3 +2928,448 @@ heap_page_is_all_visible(Relation rel, Buffer buf,
 
 	return all_visible;
 }
+
+/*
+ * Compute the number of parallel worker processes to request.  Both index
+ * vacuuming and index cleanup can be executed together with parallel workers.
+ * The relation sizes of table and indexes don't affect to the parallel
+ * degree for now. nrequested is the number of parallel workers that user
+ * requested. If nrequested is 0 we compute the parallel degree based on
+ * nindexes that is the number of indexes that support parallel index
+ * vacuuming.
+ */
+static int
+compute_parallel_vacuum_workers(Relation *Irel, int nindexes, int nrequested)
+{
+	int			nindexes_parallel = 0;
+	int			nindexes_parallel_bulkdel = 0;
+	int			nindexes_parallel_cleanup = 0;
+	int			parallel_workers;
+	int			i;
+
+	/*
+	 * We don't allow to perform parallel operation in standalone backend or
+	 * when parallelism is disabled.
+	 */
+	if (!IsUnderPostmaster || max_parallel_maintenance_workers == 0)
+		return 0;
+
+	/*
+	 * Compute the number of indexes that can participate in parallel index
+	 * vacuuming.
+	 */
+	for (i = 0; i < nindexes; i++)
+	{
+		uint8		vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
+
+		if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
+			nindexes_parallel_bulkdel++;
+		if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0) ||
+			((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0))
+			nindexes_parallel_cleanup++;
+	}
+
+	nindexes_parallel = Max(nindexes_parallel_bulkdel,
+							nindexes_parallel_cleanup);
+
+	/* No index supports parallel index vacuuming */
+	if (nindexes_parallel == 0)
+		return 0;
+
+	/* The leader process takes one index */
+	nindexes_parallel--;
+
+	/* Compute the parallel degree */
+	parallel_workers = (nrequested > 0) ?
+		Min(nrequested, nindexes_parallel) : nindexes_parallel;
+
+	/* cap by max_parallel_maintenace_workers */
+	parallel_workers = Min(parallel_workers, max_parallel_maintenance_workers);
+
+	return parallel_workers;
+}
+
+/*
+ * Initialize variables for shared index statistics, set NULL bitmap and the
+ * size of stats for each index.  Also, this function   Since currently we don't support parallel vacuum
+ * for autovacuum we don't need to care about autovacuum_work_mem.
+ */
+static void
+prepare_index_statistics(LVShared *lvshared, bool *can_parallel_vacuum,
+						 int nindexes)
+{
+	int			i;
+
+	Assert(!IsAutoVacuumWorkerProcess());
+
+	/* Set NULL for all indexes */
+	memset(lvshared->bitmap, 0x00, BITMAPLEN(nindexes));
+
+	for (i = 0; i < nindexes; i++)
+	{
+		if (!can_parallel_vacuum[i])
+			continue;
+
+		/* Set NOT NULL as this index do support parallelism */
+		lvshared->bitmap[i >> 3] |= 1 << (i & 0x07);
+	}
+}
+
+/*
+ * Update index statistics in pg_class if the statistics is accurate.
+ */
+static void
+update_index_statistics(Relation *Irel, IndexBulkDeleteResult **stats,
+						int nindexes)
+{
+	int			i;
+
+	Assert(!IsInParallelMode());
+
+	for (i = 0; i < nindexes; i++)
+	{
+		if (stats[i] == NULL || stats[i]->estimated_count)
+			continue;
+
+		/* Update index statistics */
+		vac_update_relstats(Irel[i],
+							stats[i]->num_pages,
+							stats[i]->num_index_tuples,
+							0,
+							false,
+							InvalidTransactionId,
+							InvalidMultiXactId,
+							false);
+		pfree(stats[i]);
+	}
+}
+
+/*
+ * This function prepares and returns parallel vacuum state if we can launch
+ * even one worker.  This function is responsible to create a parallel
+ * context, enter parallel mode, and then initialize the DSM segment.
+ */
+static LVParallelState *
+begin_parallel_vacuum(Oid relid, Relation *Irel, LVRelStats *vacrelstats,
+					  BlockNumber nblocks, int nindexes, int nrequested)
+{
+	LVParallelState *lps = NULL;
+	ParallelContext *pcxt;
+	LVShared   *shared;
+	LVDeadTuples *dead_tuples;
+	bool	   *can_parallel_vacuum;
+	long		maxtuples;
+	char	   *sharedquery;
+	Size		est_shared;
+	Size		est_deadtuples;
+	int			nindexes_mwm = 0;
+	int			parallel_workers = 0;
+	int			querylen;
+	int			i;
+
+	/*
+	 * a parallel vacuum must be requested and there must be indexes on the
+	 * relation
+	 */
+	Assert(nrequested >= 0);
+	Assert(nindexes > 0);
+
+	/*
+	 * Compute the number of parallel vacuum workers to launch
+	 */
+	parallel_workers = compute_parallel_vacuum_workers(Irel, nindexes,
+													   nrequested);
+
+	/* Can't perform vacuum in parallel */
+	if (parallel_workers <= 0)
+		return lps;
+
+	lps = (LVParallelState *) palloc0(sizeof(LVParallelState));
+	can_parallel_vacuum = (bool *) palloc0(sizeof(bool) * nindexes);
+
+	EnterParallelMode();
+	pcxt = CreateParallelContext("postgres", "parallel_vacuum_main",
+								 parallel_workers);
+	lps->pcxt = pcxt;
+	Assert(pcxt->nworkers > 0);
+
+	/* Estimate size for shared information -- PARALLEL_VACUUM_KEY_SHARED */
+	est_shared = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
+	for (i = 0; i < nindexes; i++)
+	{
+		uint8		vacoptions = Irel[i]->rd_indam->amparallelvacuumoptions;
+
+		/*
+		 * Cleanup option should be either disabled, always performing in
+		 * parallel or conditionally performing in parallel.
+		 */
+		Assert(((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) ||
+			   ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0));
+		Assert(vacoptions <= VACUUM_OPTION_MAX_VALID_VALUE);
+
+		if (Irel[i]->rd_indam->amusemaintenanceworkmem)
+			nindexes_mwm++;
+
+		/* Skip indexes that don't participate parallel index vacuum */
+		if (vacoptions == VACUUM_OPTION_NO_PARALLEL ||
+			RelationGetNumberOfBlocks(Irel[i]) < min_parallel_index_scan_size)
+			continue;
+
+		/*
+		 * Remember indexes that can participate parallel index vacuum and use
+		 * it for index statistics initialization on DSM because the index
+		 * size can get bigger during vacuum.
+		 */
+		can_parallel_vacuum[i] = true;
+
+		est_shared = add_size(est_shared, sizeof(LVSharedIndStats));
+
+
+		/*
+		 * Remember the number of indexes that support parallel operation for
+		 * each phase.
+		 */
+		if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) != 0)
+			lps->nindexes_parallel_bulkdel++;
+		if ((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) != 0)
+			lps->nindexes_parallel_cleanup++;
+		if ((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0)
+			lps->nindexes_parallel_condcleanup++;
+	}
+	shm_toc_estimate_chunk(&pcxt->estimator, est_shared);
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Estimate size for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_TUPLES */
+	maxtuples = compute_max_dead_tuples(nblocks, true);
+	est_deadtuples = MAXALIGN(add_size(SizeOfLVDeadTuples,
+									   mul_size(sizeof(ItemPointerData), maxtuples)));
+	shm_toc_estimate_chunk(&pcxt->estimator, est_deadtuples);
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Finally, estimate PARALLEL_VACUUM_KEY_QUERY_TEXT space */
+	querylen = strlen(debug_query_string);
+	shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	InitializeParallelDSM(pcxt);
+
+	/* Prepare shared information */
+	shared = (LVShared *) shm_toc_allocate(pcxt->toc, est_shared);
+	MemSet(shared, 0, est_shared);
+	shared->relid = relid;
+	shared->elevel = elevel;
+	shared->disable_delay = (params->options & VACOPT_FAST);
+	shared->maintenance_work_mem_worker =
+		(nindexes_mwm > 0) ?
+		maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
+		maintenance_work_mem;
+
+	/*
+	 * We need to care about alignment because we estimate the shared memory
+	 * in that way.
+	 */
+	shared->offset = MAXALIGN(add_size(SizeOfLVShared, BITMAPLEN(nindexes)));
+	prepare_index_statistics(shared, can_parallel_vacuum, nindexes);
+	pg_atomic_init_u32(&(shared->idx), 0);
+	pg_atomic_init_u32(&(shared->nprocessed), 0);
+	pg_atomic_init_u32(&(shared->cost_balance), 0);
+	pg_atomic_init_u32(&(shared->active_nworkers), 0);
+
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared);
+	lps->lvshared = shared;
+
+	/* Prepare the dead tuple space */
+	dead_tuples = (LVDeadTuples *) shm_toc_allocate(pcxt->toc, est_deadtuples);
+	dead_tuples->max_tuples = maxtuples;
+	dead_tuples->num_tuples = 0;
+	MemSet(dead_tuples->itemptrs, 0, sizeof(ItemPointerData) * maxtuples);
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_TUPLES, dead_tuples);
+	vacrelstats->dead_tuples = dead_tuples;
+
+	/* Store query string for workers */
+	sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
+	memcpy(sharedquery, debug_query_string, querylen + 1);
+	sharedquery[querylen] = '\0';
+	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, sharedquery);
+
+	return lps;
+}
+
+/*
+ * Destroy the parallel context, and end parallel mode.
+ *
+ * Since writes are not allowed during the parallel mode, so we copy the
+ * updated index statistics from DSM in local memory and then later use that
+ * to update the index statistics.  One might think that we can exit from
+ * parallel mode, update the index statistics and then destroy parallel
+ * context, but that won't be safe (see ExitParallelMode).
+ */
+static void
+end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats,
+					LVParallelState *lps, int nindexes)
+{
+	int			i;
+
+	Assert(!IsParallelWorker());
+
+	/* copy the updated statistics */
+	for (i = 0; i < nindexes; i++)
+	{
+		LVSharedIndStats *indstats = get_indstats(lps->lvshared, i);
+
+		/*
+		 * Skip unused slot.  The statistics of this index are already stored
+		 * in local memory.
+		 */
+		if (indstats == NULL)
+			continue;
+
+		if (indstats->updated)
+		{
+			stats[i] = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+			memcpy(stats[i],
+				   &(indstats->stats),
+				   sizeof(IndexBulkDeleteResult));
+		}
+		else
+			stats[i] = NULL;
+	}
+
+	DestroyParallelContext(lps->pcxt);
+	ExitParallelMode();
+
+	/* Deactivate parallel vacuum */
+	pfree(lps);
+	lps = NULL;
+}
+
+/* Return the Nth index statistics or NULL */
+static LVSharedIndStats *
+get_indstats(LVShared *lvshared, int n)
+{
+	int			i;
+	char	   *p;
+
+	if (IndStatsIsNull(lvshared, n))
+		return NULL;
+
+	p = (char *) GetSharedIndStats(lvshared);
+	for (i = 0; i < n; i++)
+	{
+		if (IndStatsIsNull(lvshared, i))
+			continue;
+
+		p += sizeof(LVSharedIndStats);
+	}
+
+	return (LVSharedIndStats *) p;
+}
+
+/*
+ * Check if the given index participates in parallel index vacuum or parallel
+ * index cleanup.
+ */
+static bool
+skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared)
+{
+	uint8		vacoptions = indrel->rd_indam->amparallelvacuumoptions;
+
+	/* first_time must be true only if for_cleanup is true */
+	Assert(lvshared->for_cleanup || !lvshared->first_time);
+
+	if (lvshared->for_cleanup)
+	{
+		/* Skip, if the index does not support parallel cleanup */
+		if (((vacoptions & VACUUM_OPTION_PARALLEL_CLEANUP) == 0) &&
+			((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) == 0))
+			return true;
+
+		/*
+		 * Skip, if the index supports parallel cleanup conditionally, but we
+		 * have already processed the index (for bulkdelete).  See the
+		 * comments for option VACUUM_OPTION_PARALLEL_COND_CLEANUP to know
+		 * when indexes support parallel cleanup conditionally.
+		 */
+		if (!lvshared->first_time &&
+			((vacoptions & VACUUM_OPTION_PARALLEL_COND_CLEANUP) != 0))
+			return true;
+	}
+	else if ((vacoptions & VACUUM_OPTION_PARALLEL_BULKDEL) == 0)
+	{
+		/* Skip if the index does not support parallel bulk deletion */
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Perform work within a launched parallel process.
+ *
+ * Since parallel vacuum workers perform only index vacuum or index cleanup,
+ * we don't need to report the progress information.
+ */
+void
+parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
+{
+	Relation	onerel;
+	Relation   *indrels;
+	LVShared   *lvshared;
+	LVDeadTuples *dead_tuples;
+	int			nindexes;
+	char	   *sharedquery;
+	IndexBulkDeleteResult **stats;
+
+	lvshared = (LVShared *) shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_SHARED,
+										   false);
+	elevel = lvshared->elevel;
+
+	ereport(DEBUG1,
+			(errmsg("starting parallel lazy vacuum worker for %s",
+					lvshared->for_cleanup ? "cleanup" : "vacuuming")));
+
+	/* Set debug_query_string for individual workers */
+	sharedquery = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_QUERY_TEXT, false);
+	debug_query_string = sharedquery;
+	pgstat_report_activity(STATE_RUNNING, debug_query_string);
+
+	/*
+	 * Open table.  The lock mode is the same as the leader process.  It's
+	 * okay because The lockmode does not conflict among the parallel workers.
+	 */
+	onerel = table_open(lvshared->relid, ShareUpdateExclusiveLock);
+
+	/*
+	 * Open all indexes. indrels are sorted in order by OID, which should be
+	 * matched to the leader's one.
+	 */
+	vac_open_indexes(onerel, RowExclusiveLock, &nindexes, &indrels);
+	Assert(nindexes > 0);
+
+	/* Set dead tuple space */
+	dead_tuples = (LVDeadTuples *) shm_toc_lookup(toc,
+												  PARALLEL_VACUUM_KEY_DEAD_TUPLES,
+												  false);
+
+	/* Set cost-based vacuum delay */
+	VacuumCostActive = (VacuumCostDelay > 0);
+	VacuumCostBalance = 0;
+	VacuumPageHit = 0;
+	VacuumPageMiss = 0;
+	VacuumPageDirty = 0;
+	VacuumSharedCostBalance = &(lvshared->cost_balance);
+	VacuumActiveNWorkers = &(lvshared->active_nworkers);
+
+	stats = (IndexBulkDeleteResult **)
+		palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
+
+	if (lvshared->maintenance_work_mem_worker > 0)
+		maintenance_work_mem = lvshared->maintenance_work_mem_worker;
+
+	/* Process indexes to perform vacuum/cleanup */
+	parallel_vacuum_index(indrels, stats, lvshared, dead_tuples, nindexes);
+
+	vac_close_indexes(nindexes, indrels, RowExclusiveLock);
+	table_close(onerel, ShareUpdateExclusiveLock);
+	pfree(stats);
+}
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index d147236429..b90305cde6 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -14,6 +14,7 @@
 
 #include "postgres.h"
 
+#include "access/heapam.h"
 #include "access/nbtree.h"
 #include "access/parallel.h"
 #include "access/session.h"
@@ -139,6 +140,9 @@ static const struct
 	},
 	{
 		"_bt_parallel_build_main", _bt_parallel_build_main
+	},
+	{
+		"parallel_vacuum_main", parallel_vacuum_main
 	}
 };
 
@@ -174,6 +178,7 @@ CreateParallelContext(const char *library_name, const char *function_name,
 	pcxt = palloc0(sizeof(ParallelContext));
 	pcxt->subid = GetCurrentSubTransactionId();
 	pcxt->nworkers = nworkers;
+	pcxt->nworkers_to_launch = nworkers;
 	pcxt->library_name = pstrdup(library_name);
 	pcxt->function_name = pstrdup(function_name);
 	pcxt->error_context_stack = error_context_stack;
@@ -440,13 +445,16 @@ InitializeParallelDSM(ParallelContext *pcxt)
 
 /*
  * Reinitialize the dynamic shared memory segment for a parallel context such
- * that we could launch workers for it again.
+ * that we could launch workers for it again. nworkers is the number of worker
+ * to launch in the next execution.
  */
 void
-ReinitializeParallelDSM(ParallelContext *pcxt)
+ReinitializeParallelDSM(ParallelContext *pcxt, int nworkers)
 {
 	FixedParallelState *fps;
 
+	Assert(nworkers >= 0 && pcxt->nworkers >= nworkers);
+
 	/* Wait for any old workers to exit. */
 	if (pcxt->nworkers_launched > 0)
 	{
@@ -498,7 +506,7 @@ LaunchParallelWorkers(ParallelContext *pcxt)
 	bool		any_registrations_failed = false;
 
 	/* Skip this if we have no workers. */
-	if (pcxt->nworkers == 0)
+	if (pcxt->nworkers == 0 || pcxt->nworkers_to_launch == 0)
 		return;
 
 	/* We need to be a lock group leader. */
@@ -533,7 +541,7 @@ LaunchParallelWorkers(ParallelContext *pcxt)
 	 * fails.  It wouldn't help much anyway, because registering the worker in
 	 * no way guarantees that it will start up and initialize successfully.
 	 */
-	for (i = 0; i < pcxt->nworkers; ++i)
+	for (i = 0; i < pcxt->nworkers_to_launch; ++i)
 	{
 		memcpy(worker.bgw_extra, &i, sizeof(int));
 		if (!any_registrations_failed &&
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index da1da23400..14a9b2432e 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -42,6 +42,7 @@
 #include "nodes/makefuncs.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "postmaster/bgworker_internals.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "storage/proc.h"
@@ -68,6 +69,14 @@ static MemoryContext vac_context = NULL;
 static BufferAccessStrategy vac_strategy;
 
 
+/*
+ * Variables for cost-based parallel vacuum.  See comments atop
+ * compute_parallel_delay to understand how it works.
+ */
+pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
+pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
+int			VacuumCostBalanceLocal = 0;
+
 /* non-export function prototypes */
 static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
 static List *get_all_vacuum_rels(int options);
@@ -76,6 +85,7 @@ static void vac_truncate_clog(TransactionId frozenXID,
 							  TransactionId lastSaneFrozenXid,
 							  MultiXactId lastSaneMinMulti);
 static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
+static double compute_parallel_delay(void);
 static VacOptTernaryValue get_vacopt_ternary_value(DefElem *def);
 
 /*
@@ -99,6 +109,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 	/* Set default value */
 	params.index_cleanup = VACOPT_TERNARY_DEFAULT;
 	params.truncate = VACOPT_TERNARY_DEFAULT;
+	params.nworkers = -1;
 
 	/* Parse options list */
 	foreach(lc, vacstmt->options)
@@ -129,6 +140,28 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 			params.index_cleanup = get_vacopt_ternary_value(opt);
 		else if (strcmp(opt->defname, "truncate") == 0)
 			params.truncate = get_vacopt_ternary_value(opt);
+		else if (strcmp(opt->defname, "parallel") == 0)
+		{
+			if (opt->arg == NULL)
+			{
+				/*
+				 * Parallel lazy vacuum is requested but user didn't specify
+				 * the parallel degree. The parallel degree will be determined
+				 * at the start of lazy vacuum.
+				 */
+				params.nworkers = 0;
+			}
+			else
+			{
+				params.nworkers = defGetInt32(opt);
+				if (params.nworkers < 1 || params.nworkers > MAX_PARALLEL_WORKER_LIMIT)
+					ereport(ERROR,
+							(errcode(ERRCODE_SYNTAX_ERROR),
+							 errmsg("parallel vacuum degree must be between 1 and %d",
+									MAX_PARALLEL_WORKER_LIMIT),
+							 parser_errposition(pstate, opt->location)));
+			}
+		}
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
@@ -170,6 +203,11 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 		}
 	}
 
+	if ((params.options & VACOPT_FULL) && params.nworkers >= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("cannot specify FULL option with PARALLEL option")));
+
 	/*
 	 * All freeze ages are zero if the FREEZE option is given; otherwise pass
 	 * them as -1 which means to use the default values.
@@ -383,6 +421,7 @@ vacuum(List *relations, VacuumParams *params,
 		VacuumPageHit = 0;
 		VacuumPageMiss = 0;
 		VacuumPageDirty = 0;
+		VacuumSharedCostBalance = NULL;
 
 		/*
 		 * Loop to process each selected relation.
@@ -1738,6 +1777,20 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
 		return false;
 	}
 
+	/*
+	 * Since parallel workers cannot access data in temporary tables, parallel
+	 * vacuum is not allowed for temporary relation. However rather than
+	 * skipping vacuum on the table, just disabling parallel option is better
+	 * option in most cases.
+	 */
+	if (RelationUsesLocalBuffers(onerel) && params->nworkers >= 0)
+	{
+		ereport(WARNING,
+				(errmsg("disabling parallel option of vacuum on \"%s\" --- cannot vacuum temporary tables in parallel",
+						RelationGetRelationName(onerel))));
+		params->nworkers = -1;
+	}
+
 	/*
 	 * Silently ignore partitioned tables as there is no work to be done.  The
 	 * useful work is on their child partitions, which have been queued up for
@@ -1941,16 +1994,26 @@ vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
 void
 vacuum_delay_point(void)
 {
+	double		msec = 0;
+
 	/* Always check for interrupts */
 	CHECK_FOR_INTERRUPTS();
 
-	/* Nap if appropriate */
-	if (VacuumCostActive && !InterruptPending &&
-		VacuumCostBalance >= VacuumCostLimit)
-	{
-		double		msec;
+	if (!VacuumCostActive || InterruptPending)
+		return;
 
+	/*
+	 * For parallel vacuum, the delay is computed based on the shared cost
+	 * balance.  See compute_parallel_delay.
+	 */
+	if (VacuumSharedCostBalance != NULL)
+		msec = compute_parallel_delay();
+	else if (VacuumCostBalance >= VacuumCostLimit)
 		msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
+
+	/* Nap if appropriate */
+	if (msec > 0)
+	{
 		if (msec > VacuumCostDelay * 4)
 			msec = VacuumCostDelay * 4;
 
@@ -1966,6 +2029,59 @@ vacuum_delay_point(void)
 	}
 }
 
+/*
+ * Computes the vacuum delay for parallel workers.
+ *
+ * The basic idea of a cost-based vacuum delay for parallel index vacuuming
+ * is to allow all parallel vacuum workers including the leader process to
+ * have a shared view of cost related parameters (mainly VacuumCostBalance)
+ * and allow each worker to update it and then based on that decide
+ * whether it needs to sleep.  Besides, we allow any worker to sleep only
+ * if it has performed the I/O above a certain threshold, which is calculated
+ * based on the number of active workers (VacuumActiveNWorkers), and the
+ * overall cost balance is more than VacuumCostLimit set by the system.  Then
+ * we will allow the worker to sleep proportional to the work done and reduce
+ * the VacuumSharedCostBalance by the amount which is consumed by the current
+ * worker (VacuumCostBalanceLocal).  This can avoid letting the workers sleep
+ * who have done less or no I/O as compared to other workers and therefore can
+ * ensure that workers who are doing more I/O got throttled more.
+ */
+static double
+compute_parallel_delay(void)
+{
+	double		msec = 0;
+	uint32		shared_balance;
+	int			nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
+
+	/* At least count itself */
+	Assert(nworkers >= 1);
+
+	/* parallel vacuum must be active */
+	Assert(VacuumSharedCostBalance);
+
+	/* Update the shared cost balance value atomically */
+	shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
+
+	/* also compute the total local balance */
+	VacuumCostBalanceLocal += VacuumCostBalance;
+
+	if ((shared_balance >= VacuumCostLimit) &&
+		(VacuumCostBalanceLocal > 0.5 * (VacuumCostLimit / nworkers)))
+	{
+		/* compute sleep time based on the local cost balance */
+		msec = VacuumCostDelay * VacuumCostBalanceLocal / VacuumCostLimit;
+		shared_balance = pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
+		VacuumCostBalanceLocal = 0;
+	}
+
+	/*
+	 * Reset the local balance as we accumulated it into the shared value.
+	 */
+	VacuumCostBalance = 0;
+
+	return msec;
+}
+
 /*
  * A wrapper function of defGetBoolean().
  *
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 683b06cdd6..0dd1bf4472 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -873,7 +873,7 @@ ExecParallelReinitialize(PlanState *planstate,
 	 */
 	ExecSetParamPlanMulti(sendParams, GetPerTupleExprContext(estate));
 
-	ReinitializeParallelDSM(pei->pcxt);
+	ReinitializeParallelDSM(pei->pcxt, pei->pcxt->nworkers);
 	pei->tqueue = ExecParallelSetupTupleQueues(pei->pcxt, true);
 	pei->reader = NULL;
 	pei->finished = false;
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index c1dd8168ca..c3690f9c41 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2891,6 +2891,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 			(!wraparound ? VACOPT_SKIP_LOCKED : 0);
 		tab->at_params.index_cleanup = VACOPT_TERNARY_DEFAULT;
 		tab->at_params.truncate = VACOPT_TERNARY_DEFAULT;
+		/* We don't support parallel vacuum for autovacuum for now */
+		tab->at_params.nworkers = -1;
 		tab->at_params.freeze_min_age = freeze_min_age;
 		tab->at_params.freeze_table_age = freeze_table_age;
 		tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age;
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 5e0db3515d..e2dbd94a3e 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -3591,7 +3591,7 @@ psql_completion(const char *text, int start, int end)
 		if (ends_with(prev_wd, '(') || ends_with(prev_wd, ','))
 			COMPLETE_WITH("FULL", "FREEZE", "ANALYZE", "VERBOSE",
 						  "DISABLE_PAGE_SKIPPING", "SKIP_LOCKED",
-						  "INDEX_CLEANUP", "TRUNCATE");
+						  "INDEX_CLEANUP", "TRUNCATE", "PARALLEL");
 		else if (TailMatches("FULL|FREEZE|ANALYZE|VERBOSE|DISABLE_PAGE_SKIPPING|SKIP_LOCKED|INDEX_CLEANUP|TRUNCATE"))
 			COMPLETE_WITH("ON", "OFF");
 	}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 858bcb6bc9..e89c1252d3 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -24,6 +24,8 @@
 #include "nodes/primnodes.h"
 #include "storage/bufpage.h"
 #include "storage/lockdefs.h"
+#include "storage/shm_toc.h"
+#include "storage/dsm.h"
 #include "utils/relcache.h"
 #include "utils/snapshot.h"
 
@@ -193,6 +195,7 @@ extern Size SyncScanShmemSize(void);
 struct VacuumParams;
 extern void heap_vacuum_rel(Relation onerel,
 							struct VacuumParams *params, BufferAccessStrategy bstrategy);
+extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
 
 /* in heap/heapam_visibility.c */
 extern bool HeapTupleSatisfiesVisibility(HeapTuple stup, Snapshot snapshot,
diff --git a/src/include/access/parallel.h b/src/include/access/parallel.h
index c00ae6424c..86d24650ba 100644
--- a/src/include/access/parallel.h
+++ b/src/include/access/parallel.h
@@ -33,7 +33,8 @@ typedef struct ParallelContext
 {
 	dlist_node	node;
 	SubTransactionId subid;
-	int			nworkers;
+	int			nworkers;	/* Maximum number of workers to launch */
+	int			nworkers_to_launch;	/* Actual number of workers to launch */
 	int			nworkers_launched;
 	char	   *library_name;
 	char	   *function_name;
@@ -62,7 +63,7 @@ extern PGDLLIMPORT bool InitializingParallelWorker;
 extern ParallelContext *CreateParallelContext(const char *library_name,
 											  const char *function_name, int nworkers);
 extern void InitializeParallelDSM(ParallelContext *pcxt);
-extern void ReinitializeParallelDSM(ParallelContext *pcxt);
+extern void ReinitializeParallelDSM(ParallelContext *pcxt, int nworkers);
 extern void LaunchParallelWorkers(ParallelContext *pcxt);
 extern void WaitForParallelWorkersToAttach(ParallelContext *pcxt);
 extern void WaitForParallelWorkersToFinish(ParallelContext *pcxt);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index b05aedc670..6e9d918cfe 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -221,6 +221,11 @@ typedef struct VacuumParams
 										 * default value depends on reloptions */
 	VacOptTernaryValue truncate;	/* Truncate empty pages at the end,
 									 * default value depends on reloptions */
+	/*
+	 * The number of parallel vacuum workers. -1 by default for no workers
+	 * and 0 for choosing based on the number of indexes.
+	 */
+	int			nworkers;
 } VacuumParams;
 
 /* GUC parameters */
@@ -230,6 +235,11 @@ extern int	vacuum_freeze_table_age;
 extern int	vacuum_multixact_freeze_min_age;
 extern int	vacuum_multixact_freeze_table_age;
 
+/* Variables for cost-based parallel vacuum  */
+extern pg_atomic_uint32	*VacuumSharedCostBalance;
+extern pg_atomic_uint32	*VacuumActiveNWorkers;
+extern int	VacuumCostBalanceLocal;
+
 
 /* in commands/vacuum.c */
 extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel);
diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out
index 9996d882d1..5b42371d95 100644
--- a/src/test/regress/expected/vacuum.out
+++ b/src/test/regress/expected/vacuum.out
@@ -92,6 +92,32 @@ CONTEXT:  SQL function "do_analyze" statement 1
 SQL function "wrap_do_analyze" statement 1
 VACUUM FULL vactst;
 VACUUM (DISABLE_PAGE_SKIPPING) vaccluster;
+-- PARALLEL option
+CREATE TABLE pvactst (i INT, a INT[], p POINT) with (autovacuum_enabled = off);
+INSERT INTO pvactst SELECT i, array[1,2,3], point(i, i+1) FROM generate_series(1,1000) i;
+CREATE INDEX btree_pvactst ON pvactst USING btree (i);
+CREATE INDEX hash_pvactst ON pvactst USING hash (i);
+CREATE INDEX brin_pvactst ON pvactst USING brin (i);
+CREATE INDEX gin_pvactst ON pvactst USING gin (a);
+CREATE INDEX gist_pvactst ON pvactst USING gist (p);
+CREATE INDEX spgist_pvactst ON pvactst USING spgist (p);
+-- VACUUM invokes parallel index cleanup
+VACUUM (PARALLEL 2) pvactst;
+-- VACUUM invokes parallel bulk-deletion and parallel index cleanup
+UPDATE pvactst SET i = i WHERE i < 1000;
+VACUUM (PARALLEL 2) pvactst;
+VACUUM (PARALLEL 0) pvactst; -- error
+ERROR:  parallel vacuum degree must be between 1 and 1024
+LINE 1: VACUUM (PARALLEL 0) pvactst;
+                ^
+VACUUM (PARALLEL 2, INDEX_CLEANUP FALSE) pvactst;
+VACUUM (PARALLEL 2, FULL TRUE) pvactst; -- error, cannot use both PARALLEL and FULL
+ERROR:  cannot specify FULL option with PARALLEL option
+CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY);
+CREATE INDEX tmp_idx1 ON tmp (a);
+VACUUM (PARALLEL 1) tmp; -- disables parallel vacuum option
+WARNING:  disabling parallel option of vacuum on "tmp" --- cannot vacuum temporary tables in parallel
+DROP TABLE pvactst;
 -- INDEX_CLEANUP option
 CREATE TABLE no_index_cleanup (i INT PRIMARY KEY, t TEXT);
 -- Use uncompressed data stored in toast.
diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql
index 69987f75e9..0cdda11b25 100644
--- a/src/test/regress/sql/vacuum.sql
+++ b/src/test/regress/sql/vacuum.sql
@@ -75,6 +75,31 @@ VACUUM FULL vactst;
 
 VACUUM (DISABLE_PAGE_SKIPPING) vaccluster;
 
+-- PARALLEL option
+CREATE TABLE pvactst (i INT, a INT[], p POINT) with (autovacuum_enabled = off);
+INSERT INTO pvactst SELECT i, array[1,2,3], point(i, i+1) FROM generate_series(1,1000) i;
+CREATE INDEX btree_pvactst ON pvactst USING btree (i);
+CREATE INDEX hash_pvactst ON pvactst USING hash (i);
+CREATE INDEX brin_pvactst ON pvactst USING brin (i);
+CREATE INDEX gin_pvactst ON pvactst USING gin (a);
+CREATE INDEX gist_pvactst ON pvactst USING gist (p);
+CREATE INDEX spgist_pvactst ON pvactst USING spgist (p);
+
+-- VACUUM invokes parallel index cleanup
+VACUUM (PARALLEL 2) pvactst;
+
+-- VACUUM invokes parallel bulk-deletion and parallel index cleanup
+UPDATE pvactst SET i = i WHERE i < 1000;
+VACUUM (PARALLEL 2) pvactst;
+
+VACUUM (PARALLEL 0) pvactst; -- error
+VACUUM (PARALLEL 2, INDEX_CLEANUP FALSE) pvactst;
+VACUUM (PARALLEL 2, FULL TRUE) pvactst; -- error, cannot use both PARALLEL and FULL
+CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY);
+CREATE INDEX tmp_idx1 ON tmp (a);
+VACUUM (PARALLEL 1) tmp; -- disables parallel vacuum option
+DROP TABLE pvactst;
+
 -- INDEX_CLEANUP option
 CREATE TABLE no_index_cleanup (i INT PRIMARY KEY, t TEXT);
 -- Use uncompressed data stored in toast.
-- 
2.23.0

