There were some minor problems in v5 -- bogus Docbook as well as
outdated rules.out, small "git diff --check" complaint about whitespace.
This v6 (on today's master) fixes those, no other changes.
--
Álvaro Herrera https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index bf72d0c303..630f9821a9 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -360,6 +360,14 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
</entry>
</row>
+ <row>
+ <entry><structname>pg_stat_progress_analyze</structname><indexterm><primary>pg_stat_progress_analyze</primary></indexterm></entry>
+ <entry>One row for each backend (including autovacuum worker processes) running
+ <command>ANALYZE</command>, showing current progress.
+ See <xref linkend='analyze-progress-reporting'/>.
+ </entry>
+ </row>
+
<row>
<entry><structname>pg_stat_progress_cluster</structname><indexterm><primary>pg_stat_progress_cluster</primary></indexterm></entry>
<entry>One row for each backend running
@@ -3481,7 +3489,7 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
<productname>PostgreSQL</productname> has the ability to report the progress of
certain commands during command execution. Currently, the only commands
which support progress reporting are <command>CREATE INDEX</command>,
- <command>VACUUM</command> and
+ <command>VACUUM</command>, <command>ANALYZE</command> and
<command>CLUSTER</command>. This may be expanded in the future.
</para>
@@ -3927,6 +3935,132 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
</sect2>
+ <sect2 id="analyze-progress-reporting">
+ <title>ANALYZE Progress Reporting</title>
+
+ <para>
+ Whenever <command>ANALYZE</command> is running, the
+ <structname>pg_stat_progress_analyze</structname> view will contain a
+ row for each backend that is currently running that command. The tables
+ below describe the information that will be reported and provide
+ information about how to interpret it.
+ </para>
+
+ <table id="pg-stat-progress-analyze-view" xreflabel="pg_stat_progress_analyze">
+ <title><structname>pg_stat_progress_analyze</structname> View</title>
+ <tgroup cols="3">
+ <thead>
+ <row>
+ <entry>Column</entry>
+ <entry>Type</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry><structfield>pid</structfield></entry>
+ <entry><type>integer</type></entry>
+ <entry>Process ID of backend.</entry>
+ </row>
+ <row>
+ <entry><structfield>datid</structfield></entry>
+ <entry><type>oid</type></entry>
+ <entry>OID of the database to which this backend is connected.</entry>
+ </row>
+ <row>
+ <entry><structfield>datname</structfield></entry>
+ <entry><type>name</type></entry>
+ <entry>Name of the database to which this backend is connected.</entry>
+ </row>
+ <row>
+ <entry><structfield>relid</structfield></entry>
+ <entry><type>oid</type></entry>
+ <entry>OID of the table being analyzed.</entry>
+ </row>
+ <row>
+ <entry><structfield>include_children</structfield></entry>
+ <entry><type>boolean</type></entry>
+ <entry>Whether scanning through child tables.</entry>
+ </row>
+ <row>
+ <entry><structfield>current_relid</structfield></entry>
+ <entry><type>oid</type></entry>
+ <entry>OID of the table currently being scanned.
+ It might be different from relid when analyzing tables that have child tables.
+ </entry>
+ </row>
+ <row>
+ <entry><structfield>phase</structfield></entry>
+ <entry><type>text</type></entry>
+ <entry>Current processing phase. See <xref linkend="analyze-phases" /></entry>
+ </row>
+ <row>
+ <entry><structfield>sample_blks_total</structfield></entry>
+ <entry><type>bigint</type></entry>
+ <entry>
+ Total number of heap blocks that will be sampled.
+ </entry>
+ </row>
+ <row>
+ <entry><structfield>heap_blks_scanned</structfield></entry>
+ <entry><type>bigint</type></entry>
+ <entry>
+ Number of heap blocks scanned.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table id="analyze-phases">
+ <title>ANALYZE phases</title>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>Phase</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry><literal>initializing</literal></entry>
+ <entry>
+ The command is preparing to begin scanning the heap. This phase is
+ expected to be very brief.
+ </entry>
+ </row>
+ <row>
+ <entry><literal>acquiring sample rows</literal></entry>
+ <entry>
+ The command is currently scanning the <structfield>current_relid</structfield>
+ to obtain samples.
+ </entry>
+ </row>
+ <row>
+ <entry><literal>computing stats</literal></entry>
+ <entry>
+ The command is computing stats from the samples obtained during the table scan.
+ </entry>
+ </row>
+ <row>
+ <entry><literal>computing extended stats</literal></entry>
+ <entry>
+ The command is computing extended stats from the samples obtained in the previous phase.
+ </entry>
+ </row>
+ <row>
+ <entry><literal>finalizing analyze</literal></entry>
+ <entry>
+ The command is updating pg_class. When this phase is completed,
+ <command>ANALYZE</command> will end.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </sect2>
+
<sect2 id="cluster-progress-reporting">
<title>CLUSTER Progress Reporting</title>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index ea4c85e395..cc0c6291d9 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -964,6 +964,22 @@ CREATE VIEW pg_stat_progress_vacuum AS
FROM pg_stat_get_progress_info('VACUUM') AS S
LEFT JOIN pg_database D ON S.datid = D.oid;
+CREATE VIEW pg_stat_progress_analyze AS
+ SELECT
+ S.pid AS pid, S.datid AS datid, D.datname AS datname,
+ CAST(S.relid AS oid) AS relid,
+ CAST(CAST(S.param2 AS int) AS boolean) AS include_children,
+ CAST(S.param3 AS oid) AS current_relid,
+ CASE S.param1 WHEN 0 THEN 'initializing'
+ WHEN 1 THEN 'acquiring sample rows'
+ WHEN 2 THEN 'computing stats'
+ WHEN 3 THEN 'computing extended stats'
+ WHEN 4 THEN 'finalizing analyze'
+ END AS phase,
+ S.param4 AS sample_blks_total, S.param5 AS heap_blks_scanned
+ FROM pg_stat_get_progress_info('ANALYZE') AS S
+ LEFT JOIN pg_database D ON S.datid = D.oid;
+
CREATE VIEW pg_stat_progress_cluster AS
SELECT
S.pid AS pid,
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index d2fdf447b6..6380e4cbbb 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -35,6 +35,7 @@
#include "catalog/pg_namespace.h"
#include "catalog/pg_statistic_ext.h"
#include "commands/dbcommands.h"
+#include "commands/progress.h"
#include "commands/tablecmds.h"
#include "commands/vacuum.h"
#include "executor/executor.h"
@@ -251,6 +252,8 @@ analyze_rel(Oid relid, RangeVar *relation,
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyPgXact->vacuumFlags |= PROC_IN_ANALYZE;
LWLockRelease(ProcArrayLock);
+ pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE,
+ RelationGetRelid(onerel));
/*
* Do the normal non-recursive ANALYZE. We can skip this for partitioned
@@ -275,6 +278,8 @@ analyze_rel(Oid relid, RangeVar *relation,
*/
relation_close(onerel, NoLock);
+ pgstat_progress_end_command();
+
/*
* Reset my PGXACT flag. Note: we need this here, and not in vacuum_rel,
* because the vacuum flag is cleared by the end-of-xact code.
@@ -317,6 +322,11 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
Oid save_userid;
int save_sec_context;
int save_nestlevel;
+ const int st_index[] = {
+ PROGRESS_ANALYZE_PHASE,
+ PROGRESS_ANALYZE_INCLUDE_CHILDREN
+ };
+ int64 st_val[2];
if (inh)
ereport(elevel,
@@ -494,6 +504,10 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
/*
* Acquire the sample rows
*/
+ st_val[0] = PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS;
+ st_val[1] = inh;
+ pgstat_progress_update_multi_param(2, st_index, st_val);
+
rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
if (inh)
numrows = acquire_inherited_sample_rows(onerel, elevel,
@@ -513,7 +527,10 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
if (numrows > 0)
{
MemoryContext col_context,
- old_context;
+ old_context;
+
+ pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+ PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
col_context = AllocSetContextCreate(anl_context,
"Analyze Column",
@@ -581,10 +598,17 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
* not for relations representing inheritance trees.
*/
if (!inh)
+ {
+ pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+ PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS);
BuildRelationExtStatistics(onerel, totalrows, numrows, rows,
attr_cnt, vacattrstats);
+ }
}
+ pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
+ PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE);
+
/*
* Update pages/tuples stats in pg_class ... but not if we're doing
* inherited stats.
@@ -1023,6 +1047,13 @@ acquire_sample_rows(Relation onerel, int elevel,
ReservoirStateData rstate;
TupleTableSlot *slot;
TableScanDesc scan;
+ BlockNumber nblocks;
+ BlockNumber blksdone = 0;
+ const int sb_index[] = {
+ PROGRESS_ANALYZE_TOTAL_BLOCKS,
+ PROGRESS_ANALYZE_SCANREL
+ };
+ int64 sb_val[2];
Assert(targrows > 0);
@@ -1032,7 +1063,13 @@ acquire_sample_rows(Relation onerel, int elevel,
OldestXmin = GetOldestXmin(onerel, PROCARRAY_FLAGS_VACUUM);
/* Prepare for sampling block numbers */
- BlockSampler_Init(&bs, totalblocks, targrows, random());
+ nblocks = BlockSampler_Init(&bs, totalblocks, targrows, random());
+
+ /* Report sampling block numbers */
+ sb_val[0] = nblocks;
+ sb_val[1] = RelationGetRelid(onerel);
+ pgstat_progress_update_multi_param(2, sb_index, sb_val);
+
/* Prepare for sampling rows */
reservoir_init_selection_state(&rstate, targrows);
@@ -1093,6 +1130,9 @@ acquire_sample_rows(Relation onerel, int elevel,
samplerows += 1;
}
+
+ pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_DONE,
+ ++blksdone);
}
ExecDropSingleTupleTableSlot(slot);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 05240bfd14..db2cc5c316 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -469,6 +469,8 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS)
/* Translate command name into command type code. */
if (pg_strcasecmp(cmd, "VACUUM") == 0)
cmdtype = PROGRESS_COMMAND_VACUUM;
+ else if (pg_strcasecmp(cmd, "ANALYZE") == 0)
+ cmdtype = PROGRESS_COMMAND_ANALYZE;
else if (pg_strcasecmp(cmd, "CLUSTER") == 0)
cmdtype = PROGRESS_COMMAND_CLUSTER;
else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0)
diff --git a/src/backend/utils/misc/sampling.c b/src/backend/utils/misc/sampling.c
index d2a1537979..f7daece5ee 100644
--- a/src/backend/utils/misc/sampling.c
+++ b/src/backend/utils/misc/sampling.c
@@ -32,8 +32,10 @@
* Since we know the total number of blocks in advance, we can use the
* straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's
* algorithm.
+ *
+ * Returns the number of blocks that BlockSampler_Next will return.
*/
-void
+BlockNumber
BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
long randseed)
{
@@ -48,6 +50,8 @@ BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize,
bs->m = 0; /* blocks selected so far */
sampler_random_init_state(randseed, bs->randstate);
+
+ return Min(bs->n, bs->N);
}
bool
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index acd1313cb3..d99e8613d2 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -34,6 +34,20 @@
#define PROGRESS_VACUUM_PHASE_TRUNCATE 5
#define PROGRESS_VACUUM_PHASE_FINAL_CLEANUP 6
+/* Progress parameters for analyze */
+#define PROGRESS_ANALYZE_PHASE 0
+#define PROGRESS_ANALYZE_INCLUDE_CHILDREN 1
+#define PROGRESS_ANALYZE_SCANREL 2
+#define PROGRESS_ANALYZE_TOTAL_BLOCKS 3
+#define PROGRESS_ANALYZE_BLOCKS_DONE 4
+
+/* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */
+#define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS 1
+#define PROGRESS_ANALYZE_PHASE_COMPUTE_STATS 2
+#define PROGRESS_ANALYZE_PHASE_COMPUTE_EXT_STATS 3
+#define PROGRESS_ANALYZE_PHASE_FINALIZE_ANALYZE 4
+
+
/* Progress parameters for cluster */
#define PROGRESS_CLUSTER_COMMAND 0
#define PROGRESS_CLUSTER_PHASE 1
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fe076d823d..df418af4e7 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -955,6 +955,7 @@ typedef enum ProgressCommandType
{
PROGRESS_COMMAND_INVALID,
PROGRESS_COMMAND_VACUUM,
+ PROGRESS_COMMAND_ANALYZE,
PROGRESS_COMMAND_CLUSTER,
PROGRESS_COMMAND_CREATE_INDEX
} ProgressCommandType;
diff --git a/src/include/utils/sampling.h b/src/include/utils/sampling.h
index 541b507fb5..76d31dc126 100644
--- a/src/include/utils/sampling.h
+++ b/src/include/utils/sampling.h
@@ -37,7 +37,7 @@ typedef struct
typedef BlockSamplerData *BlockSampler;
-extern void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
+extern BlockNumber BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
int samplesize, long randseed);
extern bool BlockSampler_HasMore(BlockSampler bs);
extern BlockNumber BlockSampler_Next(BlockSampler bs);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 210e9cd146..638fb6acca 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1846,6 +1846,24 @@ pg_stat_gssapi| SELECT s.pid,
s.gss_princ AS principal,
s.gss_enc AS encrypted
FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, sslcompression, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc);
+pg_stat_progress_analyze| SELECT s.pid,
+ s.datid,
+ d.datname,
+ s.relid,
+ ((s.param2)::integer)::boolean AS include_children,
+ (s.param3)::oid AS current_relid,
+ CASE s.param1
+ WHEN 0 THEN 'initializing'::text
+ WHEN 1 THEN 'acquiring sample rows'::text
+ WHEN 2 THEN 'computing stats'::text
+ WHEN 3 THEN 'computing extended stats'::text
+ WHEN 4 THEN 'finalizing analyze'::text
+ ELSE NULL::text
+ END AS phase,
+ s.param4 AS sample_blks_total,
+ s.param5 AS heap_blks_scanned
+ FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
+ LEFT JOIN pg_database d ON ((s.datid = d.oid)));
pg_stat_progress_cluster| SELECT s.pid,
s.datid,
d.datname,