Hello. > But I was unable to reproduce that using some random usleep(), however - maybe it is a wrong assumption. It seems like the assumption is correct - we may use an invalid index as arbiter due to race condition.
The attached patch adds a check for that case, and now the test fails like this: # pgbench: error: client 16 script 1 aborted in command 1 query 0: ERROR: duplicate key value violates unique constraint "tbl_pkey_ccold" # DETAIL: Key (i)=(42) already exists. # pgbench: error: client 9 script 1 aborted in command 1 query 0: ERROR: ON CONFLICT does not support invalid indexes as arbiters # pgbench: error: client 0 script 2 aborted in command 1 query 0: ERROR: duplicate key value violates unique constraint "tbl_pkey" # DETAIL: Key (i)=(69) already exists. # pgbench: error: client 7 script 0 aborted in command 1 query 0: ERROR: ON CONFLICT does not support invalid indexes as arbiters # pgbench: error: client 10 script 0 aborted in command 1 query 0: ERROR: ON CONFLICT does not support invalid indexes as arbiters # pgbench: error: client 11 script 0 aborted in command 1 query 0: ERROR: ON CONFLICT does not support invalid indexes as arbiters I think It is even possible to see !alive index in the same situation (it is worse case), but I was unable to reproduce it so far. Best regards, Mikhail.
Subject: [PATCH] error report in case of invalid index used as arbiter test for issue with upsert fail --- Index: src/test/modules/test_misc/t/006_concurrently_unique_fail.pl IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl b/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl new file mode 100644 --- /dev/null (revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f) +++ b/src/test/modules/test_misc/t/006_concurrently_unique_fail.pl (revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f) @@ -0,0 +1,158 @@ + +# Copyright (c) 2024, PostgreSQL Global Development Group + +# Test REINDEX CONCURRENTLY with concurrent modifications and HOT updates +use strict; +use warnings; + +use Config; +use Errno; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Time::HiRes qw(usleep); +use IPC::SysV; +use threads; +use Time::HiRes qw( time ); +use Test::More; +use Test::Builder; + +if ($@ || $windows_os) +{ + plan skip_all => 'Fork and shared memory are not supported by this platform'; +} + +my ($pid, $shmem_id, $shmem_key, $shmem_size); +eval 'sub IPC_CREAT {0001000}' unless defined &IPC_CREAT; +$shmem_size = 4; +$shmem_key = rand(1000000); +$shmem_id = shmget($shmem_key, $shmem_size, &IPC_CREAT | 0777) or die "Can't shmget: $!"; +shmwrite($shmem_id, "wait", 0, $shmem_size) or die "Can't shmwrite: $!"; + +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); +# +# Test set-up +# +my ($node, $result); +$node = PostgreSQL::Test::Cluster->new('RC_test'); +$node->init; +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); +$node->append_conf('postgresql.conf', 'fsync = off'); +$node->append_conf('postgresql.conf', 'autovacuum = off'); +$node->start; +$node->safe_psql('postgres', q(CREATE UNLOGGED TABLE tbl(i int primary key, updated_at timestamp))); + + +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### + +# IT IS NOT REQUIRED TO REPRODUCE THE ISSUE BUT MAKES IT TO HAPPEN FASTER +$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i, updated_at))); + +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### + +my $builder = Test::More->builder; +$builder->use_numbers(0); +$builder->no_plan(); + +my $child = $builder->child("pg_bench"); + +if(!defined($pid = fork())) { + # fork returned undef, so unsuccessful + die "Cannot fork a child: $!"; +} elsif ($pid == 0) { + + $node->pgbench( + '--no-vacuum --client=20 -j 2 --transactions=100000', + 0, + [qr{actually processed}], + [qr{^$}], + 'concurrent INSERTs, UPDATES and RC', + { + # Ensure some HOT updates happen + '002_pgbench_concurrent_transaction_updates' => q( + BEGIN; + INSERT INTO tbl VALUES(13,now()) on conflict(i) do update set updated_at = now(); + COMMIT; + ), + '003_pgbench_concurrent_transaction_updates' => q( + BEGIN; + INSERT INTO tbl VALUES(42,now()) on conflict(i) do update set updated_at = now(); + COMMIT; + ), + '004_pgbench_concurrent_transaction_updates' => q( + BEGIN; + INSERT INTO tbl VALUES(69,now()) on conflict(i) do update set updated_at = now(); + COMMIT; + ), + }); + + if ($child->is_passing()) { + shmwrite($shmem_id, "done", 0, $shmem_size) or die "Can't shmwrite: $!"; + } else { + shmwrite($shmem_id, "fail", 0, $shmem_size) or die "Can't shmwrite: $!"; + } + + my $pg_bench_fork_flag; + while (1) { + shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!"; + sleep(0.1); + last if $pg_bench_fork_flag eq "stop"; + } +} else { + my $pg_bench_fork_flag; + shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!"; + + subtest 'reindex run subtest' => sub { + is($pg_bench_fork_flag, "wait", "pg_bench_fork_flag is correct"); + + my %psql = (stdin => '', stdout => '', stderr => ''); + $psql{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node->connstr('postgres') ], + '<', + \$psql{stdin}, + '>', + \$psql{stdout}, + '2>', + \$psql{stderr}, + $psql_timeout); + + my ($result, $stdout, $stderr, $n, $begin_time, $end_time); + + # IT IS NOT REQUIRED, JUST FOR CONSISTENCY + ($result, $stdout, $stderr) = $node->psql('postgres', q(ALTER TABLE tbl SET (parallel_workers=0);)); + is($result, '0', 'ALTER TABLE is correct'); + + $begin_time = time(); + while (1) + { + my $sql = q(REINDEX INDEX CONCURRENTLY tbl_pkey;); + + ($result, $stdout, $stderr) = $node->psql('postgres', $sql); + is($result, '0', 'REINDEX INDEX CONCURRENTLY is correct'); + + $end_time = time(); + diag('waiting for an about 3000, now is ' . $n++ . ', seconds passed : ' . int($end_time - $begin_time)); + + shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!"; + last if $pg_bench_fork_flag ne "wait"; + } + + # explicitly shut down psql instances gracefully + $psql{stdin} .= "\\q\n"; + $psql{run}->finish; + + is($pg_bench_fork_flag, "done", "pg_bench_fork_flag is correct"); + }; + + $child->finalize(); + $child->summary(); + $node->stop; + done_testing(); + + shmwrite($shmem_id, "stop", 0, $shmem_size) or die "Can't shmwrite: $!"; +} Index: src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl b/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl new file mode 100644 --- /dev/null (revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f) +++ b/src/test/modules/test_misc/t/007_concurrently_unique_stuck.pl (revision 9446f944b415306d9e5d5ab98f69938d8f5ee87f) @@ -0,0 +1,165 @@ +# Copyright (c) 2024, PostgreSQL Global Development Group + +# Test REINDEX CONCURRENTLY with concurrent modifications and HOT updates +use strict; +use warnings; + +use Config; +use Errno; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Time::HiRes qw(usleep); +use IPC::SysV; +use threads; +use Time::HiRes qw( time ); +use Test::More; +use Test::Builder; + +if ($@ || $windows_os) +{ + plan skip_all => 'Fork and shared memory are not supported by this platform'; +} + +my ($pid, $shmem_id, $shmem_key, $shmem_size); +eval 'sub IPC_CREAT {0001000}' unless defined &IPC_CREAT; +$shmem_size = 4; +$shmem_key = rand(1000000); +$shmem_id = shmget($shmem_key, $shmem_size, &IPC_CREAT | 0777) or die "Can't shmget: $!"; +shmwrite($shmem_id, "wait", 0, $shmem_size) or die "Can't shmwrite: $!"; + +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); +# +# Test set-up +# +my ($node, $result); +$node = PostgreSQL::Test::Cluster->new('RC_test'); +$node->init; +$node->append_conf('postgresql.conf', + 'lock_timeout = ' . (1000 * $PostgreSQL::Test::Utils::timeout_default)); +$node->append_conf('postgresql.conf', 'fsync = off'); +$node->append_conf('postgresql.conf', 'autovacuum = off'); +$node->start; +$node->safe_psql('postgres', q(CREATE UNLOGGED TABLE tbl(i int primary key, n int))); + + +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### + +# IT IS **REQUIRED** TO REPRODUCE THE ISSUE +$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i, n))); +$node->safe_psql('postgres', q(INSERT INTO tbl VALUES(13,1))); + +####################################################################################################################### +####################################################################################################################### +####################################################################################################################### + +my $builder = Test::More->builder; +$builder->use_numbers(0); +$builder->no_plan(); + +my $child = $builder->child("pg_bench"); + +if(!defined($pid = fork())) { + # fork returned undef, so unsuccessful + die "Cannot fork a child: $!"; +} elsif ($pid == 0) { + + $pid = fork(); + if ($pid == 0) { + $node->pgbench( + '--no-vacuum --client=30 -j 2 --transactions=1000000', + 0, + [qr{actually processed}], + [qr{^$}], + 'concurrent INSERTs, UPDATES and RC', + { + '002_pgbench_concurrent_transaction_updates' => q( + BEGIN; + INSERT INTO tbl VALUES(13,1) on conflict(i) do update set n = tbl.n + EXCLUDED.n; + COMMIT; + ), + }); + + if ($child->is_passing()) { + shmwrite($shmem_id, "done", 0, $shmem_size) or die "Can't shmwrite: $!"; + } else { + shmwrite($shmem_id, "fail", 0, $shmem_size) or die "Can't shmwrite: $!"; + } + + my $pg_bench_fork_flag; + while (1) { + shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!"; + sleep(0.1); + last if $pg_bench_fork_flag eq "stop"; + } + } + else { + my ($result, $stdout, $stderr, $n, $prev_n, $pg_bench_fork_flag); + while (1) { + sleep(1); + $prev_n = $n; + ($result, $n, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;)); + diag(" current n is " . $n . ', ' . ($n - $prev_n) . ' per one second'); + shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!"; + last if $pg_bench_fork_flag eq "stop"; + } + } +} else { + my $pg_bench_fork_flag; + shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!"; + + subtest 'reindex run subtest' => sub { + is($pg_bench_fork_flag, "wait", "pg_bench_fork_flag is correct"); + + my %psql = (stdin => '', stdout => '', stderr => ''); + $psql{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node->connstr('postgres') ], + '<', + \$psql{stdin}, + '>', + \$psql{stdout}, + '2>', + \$psql{stderr}, + $psql_timeout); + + my ($result, $stdout, $stderr, $n, $begin_time, $end_time, $before_reindex, $after_reindex); + + # IT IS NOT REQUIRED, JUST FOR CONSISTENCY + ($result, $stdout, $stderr) = $node->psql('postgres', q(ALTER TABLE tbl SET (parallel_workers=0);)); + is($result, '0', 'ALTER TABLE is correct'); + + while (1) + { + my $sql = q(REINDEX INDEX CONCURRENTLY tbl_pkey;); + + ($result, $before_reindex, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;)); + + diag('going to start reindex, num tuples in table is ' . $before_reindex); + $begin_time = time(); + ($result, $stdout, $stderr) = $node->psql('postgres', $sql); + is($result, '0', 'REINDEX INDEX CONCURRENTLY is correct'); + + $end_time = time(); + ($result, $after_reindex, $stderr) = $node->psql('postgres', q(SELECT n from tbl WHERE i = 13;)); + diag('reindex ' . $n++ . ' done in ' . ($end_time - $begin_time) . ' seconds, num inserted during reindex tuples is ' . (int($after_reindex) - int($before_reindex)) . ' speed is ' . ((int($after_reindex) - int($before_reindex)) / ($end_time - $begin_time)) . ' per second'); + + shmread($shmem_id, $pg_bench_fork_flag, 0, $shmem_size) or die "Can't shmread: $!"; + last if $pg_bench_fork_flag ne "wait"; + } + + # explicitly shut down psql instances gracefully + $psql{stdin} .= "\\q\n"; + $psql{run}->finish; + + is($pg_bench_fork_flag, "done", "pg_bench_fork_flag is correct"); + }; + + $child->finalize(); + $child->summary(); + $node->stop; + done_testing(); + + shmwrite($shmem_id, "stop", 0, $shmem_size) or die "Can't shmwrite: $!"; +} Index: src/backend/executor/execIndexing.c IDEA additional info: Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP <+>UTF-8 =================================================================== diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c --- a/src/backend/executor/execIndexing.c (revision cb460d43c12db970056b2e994e023de06eabc494) +++ b/src/backend/executor/execIndexing.c (revision dd72fa154aed98254b47637fed62a1d584131dbf) @@ -587,6 +587,13 @@ indexRelation->rd_index->indexrelid)) continue; + if (!indexRelation->rd_index->indisvalid) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("ON CONFLICT does not support invalid indexes as arbiters"), + errtableconstraint(heapRelation, + RelationGetRelationName(indexRelation)))); + if (!indexRelation->rd_index->indimmediate) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),