Hello, everyone.

> I think It is even possible to see !alive index in the same situation (it
is worse case), but I was unable to reproduce it so far.
Fortunately, it is not possible.

So, seems like I have found the source of the problem:

1) infer_arbiter_indexes calls RelationGetIndexList to get the list of
candidates.
It does no lock selected indexes in any additional way which
prevents index_concurrently_swap changing them (set and clear validity).

                RelationGetIndexList relcache.c:4857
                infer_arbiter_indexes plancat.c:780
                make_modifytable createplan.c:7097 ----------
node->arbiterIndexes = infer_arbiter_indexes(root);
                create_modifytable_plan createplan.c:2826
                create_plan_recurse createplan.c:532
                create_plan createplan.c:349
                standard_planner planner.c:421
                planner planner.c:282
                pg_plan_query postgres.c:904
                pg_plan_queries postgres.c:996
                exec_simple_query postgres.c:1193

2) other backend marks some index as invalid and commits

                index_concurrently_swap index.c:1600
                ReindexRelationConcurrently indexcmds.c:4115
                ReindexIndex indexcmds.c:2814
                ExecReindex indexcmds.c:2743
                ProcessUtilitySlow utility.c:1567
                standard_ProcessUtility utility.c:1067
                ProcessUtility utility.c:523
                PortalRunUtility pquery.c:1158
                PortalRunMulti pquery.c:1315
                PortalRun pquery.c:791
                exec_simple_query postgres.c:1274

3) first backend invalidates catalog snapshot because transactional snapshot

                InvalidateCatalogSnapshot snapmgr.c:426
                GetTransactionSnapshot snapmgr.c:278
                PortalRunMulti pquery.c:1244
                PortalRun pquery.c:791
                exec_simple_query postgres.c:1274

4) first backend copies indexes selected using previous catalog snapshot

                ExecInitModifyTable nodeModifyTable.c:4499 --------
resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
                ExecInitNode execProcnode.c:177
                InitPlan execMain.c:966
                standard_ExecutorStart execMain.c:261
                ExecutorStart execMain.c:137
                ProcessQuery pquery.c:155
                PortalRunMulti pquery.c:1277
                PortalRun pquery.c:791
                exec_simple_query postgres.c:1274

5) then reads indexes using new fresh snapshot

              RelationGetIndexList relcache.c:4816
              ExecOpenIndices execIndexing.c:175
              ExecInsert nodeModifyTable.c:792 -------------
ExecOpenIndices(resultRelInfo, onconflict != ONCONFLICT_NONE);
              ExecModifyTable nodeModifyTable.c:4059
              ExecProcNodeFirst execProcnode.c:464
              ExecProcNode executor.h:274
              ExecutePlan execMain.c:1646
              standard_ExecutorRun execMain.c:363
              ExecutorRun execMain.c:304
              ProcessQuery pquery.c:160
              PortalRunMulti pquery.c:1277
              PortalRun pquery.c:791
              exec_simple_query postgres.c:1274

5) and uses arbiter selected with stale snapshot with new index view
(marked as invalid)

            ExecInsert nodeModifyTable.c:1016 -------------- arbiterIndexes
= resultRelInfo->ri_onConflictArbiterIndexes;
            ............

            ExecInsert nodeModifyTable.c:1048 ---------------if
(!ExecCheckIndexConstraints(resultRelInfo, slot, estate, conflictTid,
arbiterIndexes))
            ExecModifyTable nodeModifyTable.c:4059
            ExecProcNodeFirst execProcnode.c:464
            ExecProcNode executor.h:274
            ExecutePlan execMain.c:1646
            standard_ExecutorRun execMain.c:363
            ExecutorRun execMain.c:304
            ProcessQuery pquery.c:160
            PortalRunMulti pquery.c:1277
            PortalRun pquery.c:791
            exec_simple_query postgres.c:1274


I have attached an updated test for the issue (it fails on assert quickly
and uses only 2 backends).
The same issue may happen in case of CREATE/DROP INDEX CONCURRENTLY as well.

The simplest possible fix is to use ShareLock
instead ShareUpdateExclusiveLock in the index_concurrently_swap

            oldClassRel = relation_open(oldIndexId, ShareLock);
            newClassRel = relation_open(newIndexId, ShareLock);

But this is not a "concurrent" way. But such update should be fast enough
as far as I understand.

Best regards,
Mikhail.
Subject: [PATCH] better test
error report in case of invalid index used as arbiter
test for issue with upsert fail
---
Index: src/test/modules/test_misc/t/006_concurrently_unique_fail.pl
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl b/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl
new file mode 100644
--- /dev/null	(revision 5c5bcc5b7cc83381a6e479decd4252f3897b69bd)
+++ b/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl	(revision 5c5bcc5b7cc83381a6e479decd4252f3897b69bd)
@@ -0,0 +1,44 @@
+
+# Copyright (c) 2024-2024, PostgreSQL Global Development Group
+
+# Test REINDEX INDEX CONCURRENTLY with concurrent INSERT
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+
+use Test::More;
+
+my ($node, $result);
+
+#
+# Test set-up
+#
+$node = PostgreSQL::Test::Cluster->new('CIC_test');
+$node->init;
+$node->append_conf('postgresql.conf',
+	'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default));
+$node->start;
+$node->safe_psql('postgres', q(CREATE UNLOGGED TABLE tbl(i int primary key, updated_at timestamp)));
+
+$node->pgbench(
+		'--no-vacuum --client=2 -j 2 --transactions=1000',
+		0,
+		[qr{actually processed}],
+		[qr{^$}],
+		'concurrent INSERTs, UPDATES and RC',
+		{
+			'01_updates' => q(
+				INSERT INTO tbl VALUES(13,now()) ON CONFLICT(i) DO UPDATE SET updated_at = now();
+			),
+			'02_reindex' => q(
+				SELECT pg_try_advisory_lock(42)::integer AS gotlock \gset
+				\if :gotlock
+					REINDEX INDEX CONCURRENTLY tbl_pkey;
+					SELECT pg_advisory_unlock(42);
+				\endif
+			),
+		});
+$node->stop;
+done_testing();
\ No newline at end of file
Index: src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl b/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl
new file mode 100644
--- /dev/null	(revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f)
+++ b/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl	(revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f)
@@ -0,0 +1,165 @@
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+# Test REINDEX CONCURRENTLY with concurrent modifications and HOT updates
+use strict;
+use warnings;
+
+use Config;
+use Errno;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep);
+use IPC::SysV;
+use threads;
+use Time::HiRes qw( time );
+use Test::More;
+use Test::Builder;
+
+if ($@ || $windows_os)
+{
+	plan skip_all => 'Fork and shared memory are not supported by this platform';
+}
+
+my ($pid, $shmem_id, $shmem_key,  $shmem_size);
+eval 'sub IPC_CREAT {0001000}' unless defined &IPC_CREAT;
+$shmem_size = 4;
+$shmem_key = rand(1000000);
+$shmem_id = shmget($shmem_key, $shmem_size, &IPC_CREAT | 0777) or die "Can't shmget: $!";
+shmwrite($shmem_id, "wait", 0, $shmem_size) or die "Can't shmwrite: $!";
+
+my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+#
+# Test set-up
+#
+my ($node, $result);
+$node = PostgreSQL::Test::Cluster->new('RC_test');
+$node->init;
+$node->append_conf('postgresql.conf',
+	'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default));
+$node->append_conf('postgresql.conf', 'fsync = off');
+$node->append_conf('postgresql.conf', 'autovacuum = off');
+$node->start;
+$node->safe_psql('postgres', q(CREATE UNLOGGED TABLE tbl(i int primary key, n int)));
+
+
+#######################################################################################################################
+#######################################################################################################################
+#######################################################################################################################
+
+# IT IS **REQUIRED** TO REPRODUCE THE ISSUE
+$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i, n)));
+$node->safe_psql('postgres', q(INSERT INTO tbl VALUES(13,1)));
+
+#######################################################################################################################
+#######################################################################################################################
+#######################################################################################################################
+
+my $builder = Test::More->builder;
+$builder->use_numbers(0);
+$builder->no_plan();
+
+my $child  = $builder->child("pg_bench");
+
+if(!defined($pid = fork())) {
+	# fork returned undef, so unsuccessful
+	die "Cannot fork a child: $!";
+} elsif ($pid == 0) {
+
+	$pid = fork();
+	if ($pid == 0) {
+		$node->pgbench(
+			'--no-vacuum --client=30 -j 2 --transactions=1000000',
+			0,
+			[qr{actually processed}],
+			[qr{^$}],
+			'concurrent INSERTs, UPDATES and RC',
+			{
+				'002_pgbench_concurrent_transaction_updates' => q(
+					BEGIN;
+					INSERT INTO tbl VALUES(13,1) on conflict(i) do update set n = tbl.n + EXCLUDED.n;
+					COMMIT;
+				),
+			});
+
+		if ($child->is_passing()) {
+			shmwrite($shmem_id, "done", 0, $shmem_size) or die "Can't shmwrite: $!";
+		} else {
+			shmwrite($shmem_id, "fail", 0, $shmem_size) or die "Can't shmwrite: $!";
+		}
+
+		my $pg_bench_fork_flag;
+		while (1) {
+			shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+			sleep(0.1);
+			last if $pg_bench_fork_flag eq "stop";
+		}
+	}
+	else {
+		my ($result, $stdout, $stderr, $n, $prev_n, $pg_bench_fork_flag);
+		while (1) {
+			sleep(1);
+			$prev_n = $n;
+			($result, $n, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;));
+			diag(" current n is " . $n . ', ' . ($n - $prev_n) . ' per one second');
+			shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+			last if $pg_bench_fork_flag eq "stop";
+		}
+	}
+} else {
+	my $pg_bench_fork_flag;
+	shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+
+	subtest 'reindex run subtest' => sub {
+		is($pg_bench_fork_flag, "wait", "pg_bench_fork_flag is correct");
+
+		my %psql = (stdin => '', stdout => '', stderr => '');
+		$psql{run} = IPC::Run::start(
+			[ 'psql', '-XA', '-f', '-', '-d', $node->connstr('postgres') ],
+			'<',
+			\$psql{stdin},
+			'>',
+			\$psql{stdout},
+			'2>',
+			\$psql{stderr},
+			$psql_timeout);
+
+		my ($result, $stdout, $stderr, $n, $begin_time, $end_time, $before_reindex, $after_reindex);
+
+		# IT IS NOT REQUIRED, JUST FOR CONSISTENCY
+		($result, $stdout, $stderr) = $node->psql('postgres', q(ALTER TABLE tbl SET (parallel_workers=0);));
+		is($result, '0', 'ALTER TABLE is correct');
+
+		while (1)
+		{
+			my $sql = q(REINDEX INDEX CONCURRENTLY tbl_pkey;);
+
+			($result, $before_reindex, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;));
+
+			diag('going to start reindex, num tuples in table is ' . $before_reindex);
+			$begin_time = time();
+			($result, $stdout, $stderr) = $node->psql('postgres', $sql);
+			is($result, '0', 'REINDEX INDEX CONCURRENTLY is correct');
+
+			$end_time = time();
+			($result, $after_reindex, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;));
+			diag('reindex ' . $n++ . ' done in ' . ($end_time - $begin_time) . ' seconds, num inserted during reindex tuples is ' . (int($after_reindex) - int($before_reindex)) . ' speed is ' . ((int($after_reindex) - int($before_reindex)) / ($end_time - $begin_time)) . ' per second');
+
+			shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!";
+			last if $pg_bench_fork_flag ne "wait";
+		}
+
+		# explicitly shut down psql instances gracefully
+		$psql{stdin} .= "\\q\n";
+		$psql{run}->finish;
+
+		is($pg_bench_fork_flag, "done", "pg_bench_fork_flag is correct");
+	};
+
+	$child->finalize();
+	$child->summary();
+	$node->stop;
+	done_testing();
+
+	shmwrite($shmem_id, "stop", 0, $shmem_size) or die "Can't shmwrite: $!";
+}
Index: src/backend/executor/execIndexing.c
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
--- a/src/backend/executor/execIndexing.c	(revision cb460d43c12db970056b2e994e023de06eabc494)
+++ b/src/backend/executor/execIndexing.c	(revision 5c5bcc5b7cc83381a6e479decd4252f3897b69bd)
@@ -587,6 +587,9 @@
 							 indexRelation->rd_index->indexrelid))
 			continue;
 
+		Assert(indexRelation->rd_index->indislive);
+		Assert(indexRelation->rd_index->indisvalid);
+
 		if (!indexRelation->rd_index->indimmediate)
 			ereport(ERROR,
 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),

Reply via email to