On Tue, Aug 27, 2019 at 2:55 PM Jamison, Kirk <k.jami...@jp.fujitsu.com> wrote:
>
> On Monday, August 19, 2019 10:39 AM (GMT+9), Masahiko Sawada wrote:
> > Fixed.
> >
> > Attached the updated version patch.
>
> Hi Sawada-san,
>
> I haven't tested it with heavily updated large tables, but I think the patch
> is reasonable as it helps to shorten the execution time of vacuum by removing
> the redundant vacuuming and prioritizing reclaiming the garbage instead.
> I'm not sure if it's commonly reported to have problems even when we repeat
> vacuuming the already-vacuumed blocks, but I think it's a reasonable 
> improvement.
>
> I skimmed the patch and have few comments. If they deem fit, feel free to
> follow, but it's also ok if you don't.

Thank you for reviewing this patch! I've attached the updated patch
incorporated all your comments and some improvements.

Regards,

--
Masahiko Sawada
NIPPON TELEGRAPH AND TELEPHONE CORPORATION
NTT Open Source Software Center
From 0bd1cf64aa55977029371e3402d05240dfdfec21 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Wed, 10 Jul 2019 19:02:56 +0900
Subject: [PATCH v3] Add RESUME option to VACUUM and autovacuum.

This commit adds a new reloption, vaucum_resume, which controls
whether vacuum attempt to resume vacuuming from the last vacuumed
block saved at vacuum_resume_block column of pg_stat_all_tables. It
also adds a new option to the VACUUM command, RESUME which can be used
to override the reloption.
---
 doc/src/sgml/monitoring.sgml           |  5 ++
 doc/src/sgml/ref/vacuum.sgml           | 18 +++++++
 src/backend/access/common/reloptions.c | 13 ++++-
 src/backend/access/heap/vacuumlazy.c   | 88 +++++++++++++++++++++++++++++++---
 src/backend/catalog/system_views.sql   |  1 +
 src/backend/commands/vacuum.c          | 13 +++++
 src/backend/postmaster/pgstat.c        | 42 ++++++++++++++++
 src/backend/utils/adt/pgstatfuncs.c    | 14 ++++++
 src/include/catalog/pg_proc.dat        |  5 ++
 src/include/commands/vacuum.h          |  5 +-
 src/include/pgstat.h                   | 14 ++++++
 src/include/utils/rel.h                |  2 +
 src/test/regress/expected/rules.out    |  3 ++
 src/test/regress/expected/vacuum.out   | 20 ++++++++
 src/test/regress/sql/vacuum.sql        | 21 ++++++++
 15 files changed, 255 insertions(+), 9 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index bf72d0c..dfd8669 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2785,6 +2785,11 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
      <entry>Estimated number of rows modified since this table was last analyzed</entry>
     </row>
     <row>
+     <entry><structfield>vacuum_resume_block</structfield></entry>
+     <entry><type>bigint</type></entry>
+     <entry>Block number to resume vacuuming</entry>
+    </row>
+    <row>
      <entry><structfield>last_vacuum</structfield></entry>
      <entry><type>timestamp with time zone</type></entry>
      <entry>Last time at which this table was manually vacuumed
diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml
index f9b0fb8..ae3798b 100644
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -34,6 +34,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
     SKIP_LOCKED [ <replaceable class="parameter">boolean</replaceable> ]
     INDEX_CLEANUP [ <replaceable class="parameter">boolean</replaceable> ]
     TRUNCATE [ <replaceable class="parameter">boolean</replaceable> ]
+    RESUME [ <replaceable class="parameter">boolean</replaceable> ]
 
 <phrase>and <replaceable class="parameter">table_and_columns</replaceable> is:</phrase>
 
@@ -224,6 +225,23 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
    </varlistentry>
 
    <varlistentry>
+    <term><literal>RESUME</literal></term>
+    <listitem>
+     <para>
+      Specifies that <command>VACUUM</command> should attempt to
+      resume vacuuming from the last vacuumed block saved at
+      <literal>vacuum_resume_block</literal> column of
+      <xref linkend="pg-stat-all-tables-view"/>. This behavior is helpful
+      when resuming vacuum run from interruption and cancellation. The default
+      is false unless the <literal>vacuum_resume</literal> option has been
+      set to true. This option is ignored if either the <literal>FULL</literal>,
+      <literal>FREEZE</literal>, or <literal>DISABLE_PAGE_SKIPPING</literal>
+      option is used.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
     <term><replaceable class="parameter">boolean</replaceable></term>
     <listitem>
      <para>
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 42647b0..0200eb0 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -158,6 +158,15 @@ static relopt_bool boolRelOpts[] =
 		},
 		true
 	},
+	{
+		{
+			"vacuum_resume",
+			"Enables vacuum to resume vacuuming from the last vacuumed block",
+			RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+			ShareUpdateExclusiveLock
+		},
+		false
+	},
 	/* list terminator */
 	{{NULL}}
 };
@@ -1412,7 +1421,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
 		{"vacuum_index_cleanup", RELOPT_TYPE_BOOL,
 		offsetof(StdRdOptions, vacuum_index_cleanup)},
 		{"vacuum_truncate", RELOPT_TYPE_BOOL,
-		offsetof(StdRdOptions, vacuum_truncate)}
+		offsetof(StdRdOptions, vacuum_truncate)},
+		{"vacuum_resume", RELOPT_TYPE_BOOL,
+		offsetof(StdRdOptions, vacuum_resume)}
 	};
 
 	options = parseRelOptions(reloptions, validate, kind, &numoptions);
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index a3c4a1d..1895ca0 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -92,6 +92,14 @@
 	((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
 
 /*
+ * When a table has no indexes, save the progress every 8GB so that we can
+ * resume vacuum from the middle of table. When table has indexes we save it
+ * after the second heap pass finished.
+ */
+#define VACUUM_RESUME_BLK_INTERVAL \
+	((BlockNumber) (((uint64) 8 * 1024 * 1024 * 1024) / BLCKSZ))
+
+/*
  * Guesstimation of number of dead tuples per page.  This is used to
  * provide an upper limit to memory allocated when vacuuming small
  * tables.
@@ -175,6 +183,7 @@ static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 static int	vac_cmp_itemptr(const void *left, const void *right);
 static bool heap_page_is_all_visible(Relation rel, Buffer buf,
 									 TransactionId *visibility_cutoff_xid, bool *all_frozen);
+static BlockNumber get_resume_block(Relation onerel);
 
 
 /*
@@ -212,6 +221,7 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 	Assert(params != NULL);
 	Assert(params->index_cleanup != VACOPT_TERNARY_DEFAULT);
 	Assert(params->truncate != VACOPT_TERNARY_DEFAULT);
+	Assert(params->resume != VACOPT_TERNARY_DEFAULT);
 
 	/* not every AM requires these to be valid, but heap does */
 	Assert(TransactionIdIsNormal(onerel->rd_rel->relfrozenxid));
@@ -504,7 +514,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	TransactionId relminmxid = onerel->rd_rel->relminmxid;
 	BlockNumber empty_pages,
 				vacuumed_pages,
-				next_fsm_block_to_vacuum;
+				next_fsm_block_to_vacuum,
+				next_block_to_resume_vacuum;
 	double		num_tuples,		/* total number of nonremovable tuples */
 				live_tuples,	/* live tuples (reltuples estimate) */
 				tups_vacuumed,	/* tuples cleaned up by vacuum */
@@ -515,6 +526,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	PGRUsage	ru0;
 	Buffer		vmbuffer = InvalidBuffer;
 	BlockNumber next_unskippable_block;
+	BlockNumber	start_blkno = 0;
 	bool		skipping_blocks;
 	xl_heap_freeze_tuple *frozen;
 	StringInfoData buf;
@@ -527,6 +539,19 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 
 	pg_rusage_init(&ru0);
 
+	/*
+	 * If resuming is not requested, we clear the last saved block so as not
+	 * keep the previous information. If requested and it is not an aggressive
+	 * vacuum, we fetch the last saved block number to resume and set it as the
+	 * starting block to vacuum.
+	 */
+	if (params->resume == VACOPT_TERNARY_DISABLED)
+		pgstat_report_vacuum_resume_block(RelationGetRelid(onerel),
+										  onerel->rd_rel->relisshared,
+										  0);
+	else if (!aggressive)
+		start_blkno = get_resume_block(onerel);
+
 	relname = RelationGetRelationName(onerel);
 	if (aggressive)
 		ereport(elevel,
@@ -534,19 +559,30 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 						get_namespace_name(RelationGetNamespace(onerel)),
 						relname)));
 	else
-		ereport(elevel,
-				(errmsg("vacuuming \"%s.%s\"",
-						get_namespace_name(RelationGetNamespace(onerel)),
-						relname)));
+	{
+		if (start_blkno != 0)
+			ereport(elevel,
+					(errmsg("vacuuming \"%s.%s\" from %u block",
+							get_namespace_name(RelationGetNamespace(onerel)),
+							relname, start_blkno)));
+		else
+			ereport(elevel,
+					(errmsg("vacuuming \"%s.%s\"",
+							get_namespace_name(RelationGetNamespace(onerel)),
+							relname)));
+	}
 
 	empty_pages = vacuumed_pages = 0;
 	next_fsm_block_to_vacuum = (BlockNumber) 0;
+	next_block_to_resume_vacuum = (BlockNumber) 0;
 	num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
 
 	indstats = (IndexBulkDeleteResult **)
 		palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
 
 	nblocks = RelationGetNumberOfBlocks(onerel);
+	Assert(start_blkno <= nblocks);	/* both are the same if there are no blocks */
+
 	vacrelstats->rel_pages = nblocks;
 	vacrelstats->scanned_pages = 0;
 	vacrelstats->tupcount_pages = 0;
@@ -606,7 +642,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	 * the last page.  This is worth avoiding mainly because such a lock must
 	 * be replayed on any hot standby, where it can be disruptive.
 	 */
-	next_unskippable_block = 0;
+	next_unskippable_block = start_blkno;
 	if ((params->options & VACOPT_DISABLE_PAGE_SKIPPING) == 0)
 	{
 		while (next_unskippable_block < nblocks)
@@ -635,7 +671,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	else
 		skipping_blocks = false;
 
-	for (blkno = 0; blkno < nblocks; blkno++)
+	for (blkno = start_blkno; blkno < nblocks; blkno++)
 	{
 		Buffer		buf;
 		Page		page;
@@ -799,6 +835,11 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
 			next_fsm_block_to_vacuum = blkno;
 
+			/* Save the current block number to resume vacuuming */
+			pgstat_report_vacuum_resume_block(RelationGetRelid(onerel),
+											  onerel->rd_rel->relisshared,
+											  blkno);
+
 			/* Report that we are once again scanning the heap */
 			pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
 										 PROGRESS_VACUUM_PHASE_SCAN_HEAP);
@@ -1271,6 +1312,15 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			 */
 			vacrelstats->num_dead_tuples = 0;
 
+			/* Save the current block number to resume vacuuming */
+			if (blkno - next_block_to_resume_vacuum >= VACUUM_RESUME_BLK_INTERVAL)
+			{
+				pgstat_report_vacuum_resume_block(RelationGetRelid(onerel),
+												  onerel->rd_rel->relisshared,
+												  blkno);
+				next_block_to_resume_vacuum = blkno;
+			}
+
 			/*
 			 * Periodically do incremental FSM vacuuming to make newly-freed
 			 * space visible on upper FSM pages.  Note: although we've cleaned
@@ -1458,6 +1508,11 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	if (blkno > next_fsm_block_to_vacuum)
 		FreeSpaceMapVacuumRange(onerel, next_fsm_block_to_vacuum, blkno);
 
+	/* Clear the saved block number */
+	pgstat_report_vacuum_resume_block(RelationGetRelid(onerel),
+									  onerel->rd_rel->relisshared,
+									  0);
+
 	/* report all blocks vacuumed; and that we're cleaning up */
 	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
 	pgstat_progress_update_param(PROGRESS_VACUUM_PHASE,
@@ -2354,3 +2409,22 @@ heap_page_is_all_visible(Relation rel, Buffer buf,
 
 	return all_visible;
 }
+
+/*
+ * Return the block number to resume vacuuming fetched from stats collector.
+ */
+static BlockNumber
+get_resume_block(Relation onerel)
+{
+	PgStat_StatTabEntry	*tabentry;
+
+	tabentry = pgstat_fetch_stat_tabentry(RelationGetRelid(onerel));
+
+	/* If not found a valid saved block number, resume from the first block */
+	if (tabentry == NULL ||
+		tabentry->vacuum_resume_block >= RelationGetNumberOfBlocks(onerel))
+		return (BlockNumber) 0;
+
+	Assert(tabentry->vacuum_resume_block >= 0);
+	return tabentry->vacuum_resume_block;
+}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index ea4c85e..3e66956 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -566,6 +566,7 @@ CREATE VIEW pg_stat_all_tables AS
             pg_stat_get_live_tuples(C.oid) AS n_live_tup,
             pg_stat_get_dead_tuples(C.oid) AS n_dead_tup,
             pg_stat_get_mod_since_analyze(C.oid) AS n_mod_since_analyze,
+            pg_stat_get_vacuum_resume_block(C.oid) AS vacuum_resume_blk,
             pg_stat_get_last_vacuum_time(C.oid) as last_vacuum,
             pg_stat_get_last_autovacuum_time(C.oid) as last_autovacuum,
             pg_stat_get_last_analyze_time(C.oid) as last_analyze,
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e154507..f35c132 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -99,6 +99,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 	/* Set default value */
 	params.index_cleanup = VACOPT_TERNARY_DEFAULT;
 	params.truncate = VACOPT_TERNARY_DEFAULT;
+	params.resume = VACOPT_TERNARY_DEFAULT;
 
 	/* Parse options list */
 	foreach(lc, vacstmt->options)
@@ -127,6 +128,8 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 			disable_page_skipping = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "index_cleanup") == 0)
 			params.index_cleanup = get_vacopt_ternary_value(opt);
+		else if (strcmp(opt->defname, "resume") == 0)
+			params.resume = get_vacopt_ternary_value(opt);
 		else if (strcmp(opt->defname, "truncate") == 0)
 			params.truncate = get_vacopt_ternary_value(opt);
 		else
@@ -1783,6 +1786,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
 			params->truncate = VACOPT_TERNARY_DISABLED;
 	}
 
+	/* Set resume option based on reloptions if not yet, default is false */
+	if (params->resume == VACOPT_TERNARY_DEFAULT)
+	{
+		if (onerel->rd_options == NULL ||
+			!((StdRdOptions *) onerel->rd_options)->vacuum_resume)
+			params->resume = VACOPT_TERNARY_DISABLED;
+		else
+			params->resume = VACOPT_TERNARY_ENABLED;
+	}
+
 	/*
 	 * Remember the relation's TOAST relation for later, if the caller asked
 	 * us to process it.  In VACUUM FULL, though, the toast table is
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index d362e7f..5d690dc 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -328,6 +328,8 @@ static void pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, in
 static void pgstat_recv_resetsinglecounter(PgStat_MsgResetsinglecounter *msg, int len);
 static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
+static void pgstat_recv_vacuum_resume_block(PgStat_MsgVacuumResumeBlock *msg,
+											int len);
 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
@@ -1425,6 +1427,27 @@ pgstat_report_vacuum(Oid tableoid, bool shared,
 	pgstat_send(&msg, sizeof(msg));
 }
 
+/* ---------
+ * pgstat_report_vacuum_resume_block() -
+ *
+ *	Tell the collector about the block number to resume.
+ * ---------
+ */
+void
+pgstat_report_vacuum_resume_block(Oid tableoid, bool shared, BlockNumber blkno)
+{
+	PgStat_MsgVacuumResumeBlock msg;
+
+	if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+		return;
+
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUMRESUMEBLOCK);
+	msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
+	msg.m_tableoid = tableoid;
+	msg.m_blkno = blkno;
+	pgstat_send(&msg, sizeof(msg));
+}
+
 /* --------
  * pgstat_report_analyze() -
  *
@@ -4594,6 +4617,11 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_vacuum(&msg.msg_vacuum, len);
 					break;
 
+				case PGSTAT_MTYPE_VACUUMRESUMEBLOCK:
+					pgstat_recv_vacuum_resume_block(&msg.msg_vacuum_resume_block,
+													len);
+					break;
+
 				case PGSTAT_MTYPE_ANALYZE:
 					pgstat_recv_analyze(&msg.msg_analyze, len);
 					break;
@@ -6230,6 +6258,20 @@ pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
 	}
 }
 
+static void
+pgstat_recv_vacuum_resume_block(PgStat_MsgVacuumResumeBlock *msg, int len)
+{
+	PgStat_StatDBEntry *dbentry;
+	PgStat_StatTabEntry *tabentry;
+
+	/*
+	 * Store the data in the table's hashtable entry.
+	 */
+	dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+	tabentry = pgstat_get_tab_entry(dbentry, msg->m_tableoid, true);
+	tabentry->vacuum_resume_block = msg->m_blkno;
+}
+
 /* ----------
  * pgstat_recv_analyze() -
  *
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 05240bf..961c598 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -195,6 +195,20 @@ pg_stat_get_mod_since_analyze(PG_FUNCTION_ARGS)
 	PG_RETURN_INT64(result);
 }
 
+Datum
+pg_stat_get_vacuum_resume_block(PG_FUNCTION_ARGS)
+{
+	Oid			relid = PG_GETARG_OID(0);
+	uint64		result;
+	PgStat_StatTabEntry *tabentry;
+
+	if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+		result = 0;
+	else
+		result = (int64) (tabentry->vacuum_resume_block);
+
+	PG_RETURN_INT64(result);
+}
 
 Datum
 pg_stat_get_blocks_fetched(PG_FUNCTION_ARGS)
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cf1f409..126ade9 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5093,6 +5093,11 @@
   proname => 'pg_stat_get_mod_since_analyze', provolatile => 's',
   proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
   prosrc => 'pg_stat_get_mod_since_analyze' },
+{ oid => '4219',
+  descr => 'statistics: block number to resume vacuuming',
+  proname => 'pg_stat_get_vacuum_resume_block', provolatile => 's',
+  proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
+  prosrc => 'pg_stat_get_vacuum_resume_block' },
 { oid => '1934', descr => 'statistics: number of blocks fetched',
   proname => 'pg_stat_get_blocks_fetched', provolatile => 's',
   proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 128f7ae..c3006c5 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -145,7 +145,8 @@ typedef enum VacuumOption
 	VACOPT_FULL = 1 << 4,		/* FULL (non-concurrent) vacuum */
 	VACOPT_SKIP_LOCKED = 1 << 5,	/* skip if cannot get lock */
 	VACOPT_SKIPTOAST = 1 << 6,	/* don't process the TOAST table, if any */
-	VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7	/* don't skip any pages */
+	VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7,	/* don't skip any pages */
+	VACOPT_RESUME = 1 << 8		/* resume from the previous point */
 } VacuumOption;
 
 /*
@@ -184,6 +185,8 @@ typedef struct VacuumParams
 										 * default value depends on reloptions */
 	VacOptTernaryValue truncate;	/* Truncate empty pages at the end,
 									 * default value depends on reloptions */
+	VacOptTernaryValue resume;		/* Resume vacuuming from the last vacuumed
+									 * block */
 } VacuumParams;
 
 /* GUC parameters */
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fe076d8..00604b5 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -56,6 +56,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_RESETSINGLECOUNTER,
 	PGSTAT_MTYPE_AUTOVAC_START,
 	PGSTAT_MTYPE_VACUUM,
+	PGSTAT_MTYPE_VACUUMRESUMEBLOCK,
 	PGSTAT_MTYPE_ANALYZE,
 	PGSTAT_MTYPE_ARCHIVER,
 	PGSTAT_MTYPE_BGWRITER,
@@ -371,6 +372,14 @@ typedef struct PgStat_MsgVacuum
 	PgStat_Counter m_dead_tuples;
 } PgStat_MsgVacuum;
 
+typedef struct PgStat_MsgVacuumResumeBlock
+{
+	PgStat_MsgHdr m_hdr;
+	Oid			m_databaseid;
+	Oid			m_tableoid;
+	BlockNumber	m_blkno;
+} PgStat_MsgVacuumResumeBlock;
+
 
 /* ----------
  * PgStat_MsgAnalyze			Sent by the backend or autovacuum daemon
@@ -561,6 +570,7 @@ typedef union PgStat_Msg
 	PgStat_MsgResetsinglecounter msg_resetsinglecounter;
 	PgStat_MsgAutovacStart msg_autovacuum_start;
 	PgStat_MsgVacuum msg_vacuum;
+	PgStat_MsgVacuumResumeBlock msg_vacuum_resume_block;
 	PgStat_MsgAnalyze msg_analyze;
 	PgStat_MsgArchiver msg_archiver;
 	PgStat_MsgBgWriter msg_bgwriter;
@@ -650,6 +660,8 @@ typedef struct PgStat_StatTabEntry
 	PgStat_Counter blocks_fetched;
 	PgStat_Counter blocks_hit;
 
+	BlockNumber	vacuum_resume_block;
+
 	TimestampTz vacuum_timestamp;	/* user initiated vacuum */
 	PgStat_Counter vacuum_count;
 	TimestampTz autovac_vacuum_timestamp;	/* autovacuum initiated */
@@ -1262,6 +1274,8 @@ extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type t
 extern void pgstat_report_autovac(Oid dboid);
 extern void pgstat_report_vacuum(Oid tableoid, bool shared,
 								 PgStat_Counter livetuples, PgStat_Counter deadtuples);
+extern void pgstat_report_vacuum_resume_block(Oid tableoid, bool shared,
+											  BlockNumber blkno);
 extern void pgstat_report_analyze(Relation rel,
 								  PgStat_Counter livetuples, PgStat_Counter deadtuples,
 								  bool resetcounter);
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 91b3b1b..7e65586 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -272,6 +272,8 @@ typedef struct StdRdOptions
 	int			parallel_workers;	/* max number of parallel workers */
 	bool		vacuum_index_cleanup;	/* enables index vacuuming and cleanup */
 	bool		vacuum_truncate;	/* enables vacuum to truncate a relation */
+	bool		vacuum_resume;		/* enables vacuum to resume from last
+									 * vacuumed block. */
 } StdRdOptions;
 
 #define HEAP_MIN_FILLFACTOR			10
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 210e9cd..1edd97f 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1771,6 +1771,7 @@ pg_stat_all_tables| SELECT c.oid AS relid,
     pg_stat_get_live_tuples(c.oid) AS n_live_tup,
     pg_stat_get_dead_tuples(c.oid) AS n_dead_tup,
     pg_stat_get_mod_since_analyze(c.oid) AS n_mod_since_analyze,
+    pg_stat_get_vacuum_resume_block(c.oid) AS vacuum_resume_blk,
     pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum,
     pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum,
     pg_stat_get_last_analyze_time(c.oid) AS last_analyze,
@@ -2000,6 +2001,7 @@ pg_stat_sys_tables| SELECT pg_stat_all_tables.relid,
     pg_stat_all_tables.n_live_tup,
     pg_stat_all_tables.n_dead_tup,
     pg_stat_all_tables.n_mod_since_analyze,
+    pg_stat_all_tables.vacuum_resume_blk,
     pg_stat_all_tables.last_vacuum,
     pg_stat_all_tables.last_autovacuum,
     pg_stat_all_tables.last_analyze,
@@ -2043,6 +2045,7 @@ pg_stat_user_tables| SELECT pg_stat_all_tables.relid,
     pg_stat_all_tables.n_live_tup,
     pg_stat_all_tables.n_dead_tup,
     pg_stat_all_tables.n_mod_since_analyze,
+    pg_stat_all_tables.vacuum_resume_blk,
     pg_stat_all_tables.last_vacuum,
     pg_stat_all_tables.last_autovacuum,
     pg_stat_all_tables.last_analyze,
diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out
index aff0b10..d2f5de8 100644
--- a/src/test/regress/expected/vacuum.out
+++ b/src/test/regress/expected/vacuum.out
@@ -148,6 +148,25 @@ SELECT pg_relation_size('vac_truncate_test') = 0;
 
 VACUUM (TRUNCATE FALSE, FULL TRUE) vac_truncate_test;
 DROP TABLE vac_truncate_test;
+-- RESUME option
+CREATE TABLE resume_test (i INT PRIMARY KEY, t TEXT);
+INSERT INTO resume_test(i, t) VALUES (generate_series(1,30),
+    repeat('1234567890',300));
+VACUUM (RESUME TRUE) resume_test;
+-- resume option is ignored
+VACUUM (RESUME TRUE, FREEZE TRUE) resume_test;
+VACUUM (RESUME TRUE, FULL TRUE) resume_test;
+VACUUM (RESUME TRUE, DISABLE_PAGE_SKIPPING TRUE) resume_test;
+-- Only parent enables resuming
+ALTER TABLE resume_test SET (vacuum_resume = true,
+      toast.vacuum_resume = false);
+VACUUM (RESUME TRUE) resume_test;
+-- Only toast table enables resuming
+ALTER TABLE resume_test SET (vacuum_resume = false,
+      toast.vacuum_resume = true);
+-- Test some extra relations.
+VACUUM (RESUME TRUE) vaccluster;
+VACUUM (RESUME TRUE) vactst;
 -- partitioned table
 CREATE TABLE vacparted (a int, b char) PARTITION BY LIST (a);
 CREATE TABLE vacparted1 PARTITION OF vacparted FOR VALUES IN (1);
@@ -217,6 +236,7 @@ DROP TABLE vaccluster;
 DROP TABLE vactst;
 DROP TABLE vacparted;
 DROP TABLE no_index_cleanup;
+DROP TABLE resume_test;
 -- relation ownership, WARNING logs generated as all are skipped.
 CREATE TABLE vacowned (a int);
 CREATE TABLE vacowned_parted (a int) PARTITION BY LIST (a);
diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql
index f0fee3a..a1f2ad8 100644
--- a/src/test/regress/sql/vacuum.sql
+++ b/src/test/regress/sql/vacuum.sql
@@ -121,6 +121,26 @@ SELECT pg_relation_size('vac_truncate_test') = 0;
 VACUUM (TRUNCATE FALSE, FULL TRUE) vac_truncate_test;
 DROP TABLE vac_truncate_test;
 
+-- RESUME option
+CREATE TABLE resume_test (i INT PRIMARY KEY, t TEXT);
+INSERT INTO resume_test(i, t) VALUES (generate_series(1,30),
+    repeat('1234567890',300));
+VACUUM (RESUME TRUE) resume_test;
+-- resume option is ignored
+VACUUM (RESUME TRUE, FREEZE TRUE) resume_test;
+VACUUM (RESUME TRUE, FULL TRUE) resume_test;
+VACUUM (RESUME TRUE, DISABLE_PAGE_SKIPPING TRUE) resume_test;
+-- Only parent enables resuming
+ALTER TABLE resume_test SET (vacuum_resume = true,
+      toast.vacuum_resume = false);
+VACUUM (RESUME TRUE) resume_test;
+-- Only toast table enables resuming
+ALTER TABLE resume_test SET (vacuum_resume = false,
+      toast.vacuum_resume = true);
+-- Test some extra relations.
+VACUUM (RESUME TRUE) vaccluster;
+VACUUM (RESUME TRUE) vactst;
+
 -- partitioned table
 CREATE TABLE vacparted (a int, b char) PARTITION BY LIST (a);
 CREATE TABLE vacparted1 PARTITION OF vacparted FOR VALUES IN (1);
@@ -180,6 +200,7 @@ DROP TABLE vaccluster;
 DROP TABLE vactst;
 DROP TABLE vacparted;
 DROP TABLE no_index_cleanup;
+DROP TABLE resume_test;
 
 -- relation ownership, WARNING logs generated as all are skipped.
 CREATE TABLE vacowned (a int);
-- 
2.10.5

Reply via email to