Hi hackers, During the last pgconf.dev I attended Robert’s presentation about autovacuum and it made me remember of an idea I had some time ago: $SUBJECT
Please find attached a patch doing so by adding a new field (aka "time_delayed") to the pg_stat_progress_vacuum view. Currently one can change [autovacuum_]vacuum_cost_delay and [auto vacuum]vacuum_cost_limit but has no reliable way to measure the impact of the changes on the vacuum duration: one could observe the vacuum duration variation but the correlation to the changes is not accurate (as many others factors could impact the vacuum duration (load on the system, i/o latency,...)). This new field reports the time that the vacuum has to sleep due to cost delay: it could be useful to 1) measure the impact of the current cost_delay and cost_limit settings and 2) when experimenting new values (and then help for decision making for those parameters). The patch is relatively small thanks to the work that has been done in f1889729dd (to allow parallel worker to report to the leader). [1]: https://www.pgevents.ca/events/pgconfdev2024/schedule/session/29-how-autovacuum-goes-wrong-and-can-we-please-make-it-stop-doing-that/ Looking forward to your feedback, Regards, -- Bertrand Drouvot PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
>From 750dfc26cd6fcf5a5618c3fe35fc42d5b5c66f00 Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Thu, 6 Jun 2024 12:35:57 +0000 Subject: [PATCH v1] Report the total amount of time that vacuum has been delayed due to cost delay This commit adds one column: time_delayed to the pg_stat_progress_vacuum system view to show the total amount of time in milliseconds that vacuum has been delayed. This uses the new parallel message type for progress reporting added by f1889729dd. Bump catversion because this changes the definition of pg_stat_progress_vacuum. --- doc/src/sgml/monitoring.sgml | 11 +++++++++++ src/backend/catalog/system_views.sql | 3 ++- src/backend/commands/vacuum.c | 6 ++++++ src/include/catalog/catversion.h | 2 +- src/include/commands/progress.h | 1 + src/test/regress/expected/rules.out | 3 ++- 6 files changed, 23 insertions(+), 3 deletions(-) 47.6% doc/src/sgml/ 3.7% src/backend/catalog/ 26.8% src/backend/commands/ 7.5% src/include/catalog/ 4.1% src/include/commands/ 10.0% src/test/regress/expected/ diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index 053da8d6e4..cdd0f0e533 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -6290,6 +6290,17 @@ FROM pg_stat_get_backend_idset() AS backendid; <literal>cleaning up indexes</literal>. </para></entry> </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>time_delayed</structfield> <type>bigint</type> + </para> + <para> + Total amount of time spent in milliseconds waiting due to <varname>vacuum_cost_delay</varname> + or <varname>autovacuum_vacuum_cost_delay</varname>. In case of parallel + vacuum the reported time is across all the workers and the leader. + </para></entry> + </row> </tbody> </tgroup> </table> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 53047cab5f..1345e99dcb 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1221,7 +1221,8 @@ CREATE VIEW pg_stat_progress_vacuum AS S.param2 AS heap_blks_total, S.param3 AS heap_blks_scanned, S.param4 AS heap_blks_vacuumed, S.param5 AS index_vacuum_count, S.param6 AS max_dead_tuple_bytes, S.param7 AS dead_tuple_bytes, - S.param8 AS indexes_total, S.param9 AS indexes_processed + S.param8 AS indexes_total, S.param9 AS indexes_processed, + S.param10 AS time_delayed FROM pg_stat_get_progress_info('VACUUM') AS S LEFT JOIN pg_database D ON S.datid = D.oid; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 48f8eab202..2551408a86 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -40,6 +40,7 @@ #include "catalog/pg_inherits.h" #include "commands/cluster.h" #include "commands/defrem.h" +#include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -2386,6 +2387,11 @@ vacuum_delay_point(void) pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); pg_usleep(msec * 1000); pgstat_report_wait_end(); + /* Report the amount of time we slept */ + if (VacuumSharedCostBalance != NULL) + pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_TIME_DELAYED, msec); + else + pgstat_progress_incr_param(PROGRESS_VACUUM_TIME_DELAYED, msec); /* * We don't want to ignore postmaster death during very long vacuums diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index f0809c0e58..40b4f1d1e4 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202405161 +#define CATALOG_VERSION_NO 202406101 #endif diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 82a8fe6bd1..1fcefe9436 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -27,6 +27,7 @@ #define PROGRESS_VACUUM_DEAD_TUPLE_BYTES 6 #define PROGRESS_VACUUM_INDEXES_TOTAL 7 #define PROGRESS_VACUUM_INDEXES_PROCESSED 8 +#define PROGRESS_VACUUM_TIME_DELAYED 9 /* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */ #define PROGRESS_VACUUM_PHASE_SCAN_HEAP 1 diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index ef658ad740..a499e44df1 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2053,7 +2053,8 @@ pg_stat_progress_vacuum| SELECT s.pid, s.param6 AS max_dead_tuple_bytes, s.param7 AS dead_tuple_bytes, s.param8 AS indexes_total, - s.param9 AS indexes_processed + s.param9 AS indexes_processed, + s.param10 AS time_delayed FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_recovery_prefetch| SELECT stats_reset, -- 2.34.1