On Tue, Jul 09, 2024 at 01:12:59PM -0500, Nathan Bossart wrote:
> I've committed 0001.  It looks like 0002 failed CI testing [0], but I
> haven't investigated why.
> 
> [0] https://cirrus-ci.com/task/5668467599212544

Nice catch by the CI.  This looks like a race condition to me.  I
think that we should wait for the autovacuum worker to exit, and then
scan the server logs we expect.

For this failure, look at the timestamps of the server logs:
2024-07-08 12:48:23.271 UTC [32697][client backend]
[006_signal_autovacuum.pl][11/3:0] LOG:  statement: SELECT
pg_terminate_backend(32672);
2024-07-08 12:48:23.275 UTC [32697][client backend]
[006_signal_autovacuum.pl][:0] LOG:  disconnection: session time:
0:00:00.018 user=postgres database=postgres host=[local]
2024-07-08 12:48:23.278 UTC [32672][autovacuum worker] FATAL:
terminating autovacuum process due to administrator command

And then the timestamp of the tests:
[12:48:23.277](0.058s) not ok 2 - autovacuum worker signaled with
pg_signal_autovacuum_worker granted

We check for the contents of the logs 1ms before they are generated,
hence failing the lookup check because the test is faster than the
backend.

Like what we are doing in 010_pg_basebackup.pl, we could do a
poll_query_until() until the PID of the autovacuum worker is gone from
pg_stat_activity before fetching the logs as ProcessInterrupts() stuff
would be logged before the process exits, say:
+# Wait for the autovacuum worker to exit before scanning the logs.
+$node->poll_query_until('postgres',
+   "SELECT count(*) = 0 FROM pg_stat_activity "
+   . "WHERE pid = $av_pid AND backend_type = 'autovacuum worker';");

That gives something like the attached.  Does that look correct to
you?
--
Michael
From 4f79a7741a0ff38ab063c02bc81a2ace60848d92 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Wed, 10 Jul 2024 14:13:00 +0900
Subject: [PATCH v7] Add tap test for pg_signal_autovacuum role

---
 src/backend/postmaster/autovacuum.c           |   7 ++
 .../test_misc/t/006_signal_autovacuum.pl      | 105 ++++++++++++++++++
 2 files changed, 112 insertions(+)
 create mode 100644 src/test/modules/test_misc/t/006_signal_autovacuum.pl

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 928754b51c..4e4a0ccbef 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -100,6 +100,7 @@
 #include "utils/fmgroids.h"
 #include "utils/fmgrprotos.h"
 #include "utils/guc_hooks.h"
+#include "utils/injection_point.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
@@ -1902,6 +1903,12 @@ do_autovacuum(void)
 	/* Start a transaction so our commands have one to play into. */
 	StartTransactionCommand();
 
+	/*
+	 * This injection point is put in a transaction block to work with a wait
+	 * that uses a condition variable.
+	 */
+	INJECTION_POINT("autovacuum-worker-start");
+
 	/*
 	 * Compute the multixact age for which freezing is urgent.  This is
 	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
diff --git a/src/test/modules/test_misc/t/006_signal_autovacuum.pl b/src/test/modules/test_misc/t/006_signal_autovacuum.pl
new file mode 100644
index 0000000000..136dc8c52e
--- /dev/null
+++ b/src/test/modules/test_misc/t/006_signal_autovacuum.pl
@@ -0,0 +1,105 @@
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+# Test signaling autovacuum worker backend by non-superuser role.
+#
+# Only non-superuser roles granted pg_signal_autovacuum_worker are allowed
+# to signal autovacuum workers.  This test uses an injection point located
+# at the beginning of the autovacuum worker startup.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+# Initialize postgres
+my $psql_err = '';
+my $psql_out = '';
+my $node = PostgreSQL::Test::Cluster->new('node');
+$node->init;
+
+# This ensures a quick worker spawn.
+$node->append_conf(
+	'postgresql.conf', 'autovacuum_naptime = 1');
+$node->start;
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+$node->safe_psql(
+	'postgres', qq(
+    CREATE ROLE regular_role;
+    CREATE ROLE signal_autovacuum_worker_role;
+    GRANT pg_signal_autovacuum_worker TO signal_autovacuum_worker_role;
+));
+
+# From this point, autovacuum worker will wait at startup.
+$node->safe_psql('postgres',
+	"SELECT injection_points_attach('autovacuum-worker-start', 'wait');");
+
+# Create some content and set an aggressive autovacuum.
+$node->safe_psql(
+	'postgres', qq(
+    CREATE TABLE tab_int(i int);
+    ALTER TABLE tab_int SET (autovacuum_vacuum_cost_limit = 1);
+    ALTER TABLE tab_int SET (autovacuum_vacuum_cost_delay = 100);
+));
+
+$node->safe_psql(
+	'postgres', qq(
+    INSERT INTO tab_int VALUES(1);
+));
+
+# Wait until an autovacuum worker starts.
+$node->wait_for_event('autovacuum worker', 'autovacuum-worker-start');
+
+my $av_pid = $node->safe_psql(
+	'postgres', qq(
+    SELECT pid FROM pg_stat_activity WHERE backend_type = 'autovacuum worker';
+));
+
+# Regular role cannot terminate autovacuum worker.
+my $terminate_with_no_pg_signal_av = $node->psql(
+	'postgres', qq(
+    SET ROLE regular_role;
+    SELECT pg_terminate_backend($av_pid);
+),
+	stdout => \$psql_out,
+	stderr => \$psql_err);
+
+like(
+	$psql_err,
+	qr/ERROR:  permission denied to terminate process\nDETAIL:  Only roles with privileges of the "pg_signal_autovacuum_worker" role may terminate autovacuum workers./,
+	"autovacuum worker not signaled with regular role");
+
+my $offset = -s $node->logfile;
+
+# Role with pg_signal_autovacuum can terminate autovacuum worker.
+my $terminate_with_pg_signal_av = $node->psql(
+	'postgres', qq(
+    SET ROLE signal_autovacuum_worker_role;
+    SELECT pg_terminate_backend($av_pid);
+),
+	stdout => \$psql_out,
+	stderr => \$psql_err);
+
+# Wait for the autovacuum worker to exit before scanning the logs.
+$node->poll_query_until('postgres',
+	"SELECT count(*) = 0 FROM pg_stat_activity "
+	. "WHERE pid = $av_pid AND backend_type = 'autovacuum worker';");
+
+# Check that the primary server logs a FATAL indicating that autovacuum
+# is terminated.
+ok( $node->log_contains(
+		qr/FATAL:  terminating autovacuum process due to administrator command/,
+		$offset),
+	"autovacuum worker signaled with pg_signal_autovacuum_worker granted"
+);
+
+# Release injection point.
+$node->safe_psql('postgres',
+	"SELECT injection_points_detach('autovacuum-worker-start');");
+
+done_testing();
-- 
2.45.2

Attachment: signature.asc
Description: PGP signature

Reply via email to