Hello hackers,
A recent buildfarm failure [1]:
38/273 recovery - postgresql:recovery/035_standby_logical_decoding ERROR
74.36s exit status 1
[14:34:25.522](0.001s) ok 65 - pg_recvlogical exited non-zero
[14:34:25.522](0.000s) not ok 66 - slot has been invalidated
[14:34:25.523](0.000s) # Failed test 'slot has been invalidated'
# at
/home/bf/bf-build/francolin/REL_16_STABLE/pgsql/src/test/recovery/t/035_standby_logical_decoding.pl
line 118.
[14:34:25.523](0.001s) # 'pg_recvlogical: error: could not send replication command "START_REPLICATION
SLOT "drop_db_activeslot" LOGICAL 0/0 ("include-xids" '0', "skip-empty-xacts" '1')": server closed the connection
unexpectedly
# This probably means the server terminated abnormally
# before or while processing the request.
# pg_recvlogical: error: disconnected
# '
# doesn't match '(?^:conflict with recovery)'
[14:34:25.675](0.152s) ok 67 - otherslot on standby not dropped
duplicated one that happened the last year [2] and had no explanation so
far. This time I've managed to reproduce it with the check_slots_dropped()
repeated in the loop (the patch is attached).
With extra logging added, I could see:
[08:47:52.385](0.000s) not ok 114 - slot has been invalidated
[08:47:52.385](0.000s) # Failed test 'slot has been invalidated'
# at
/home/user/postgresql/src/test/recovery_9/t/035_standby_logical_decoding.pl
line 120.
[08:47:52.385](0.000s) # '!!!pqsecure_raw_read| len=16384 n=554 errno=0
buf=760000002b00030002000000015f70715f2e746573745f70726f746f636f6c5f6e65676f74696174696f6e00520000000800000000530000001b496e74657276616c5374796c6500706f7374677265730053000000207365617263685f7061746800222475736572222c207075626c696300530000001469735f737570657275736572006f6e0053000000237374616e646172645f636f6e666f726d696e675f737472696e6773006f6e00530000001f73657373696f6e5f617574686f72697a6174696f6e0075736572005300000019636c69656e745f656e636f64696e67005554463800530000001b7365727665725f76657273696f6e003139646576656c
# !!!pqsecure_raw_read| len=16384 n=85 errno=0
buf=540000002300017365745f636f6e6669670000000000000000000019ffffffffffff0000440000000a000100000000430000000d53454c45435420310053000000117365617263685f7061746800005a0000000549
# !!!pqsecure_raw_read| len=16384 n=76 errno=0
buf=540000002c0001646174615f6469726563746f72795f6d6f64650000000000000000000019ffffffffffff0000440000000e00010000000430373030430000000953484f57005a0000000549
# !!!pqsecure_raw_read| len=16384 n=198 errno=0
buf=540000006f000473797374656d69640000000000000000000019ffffffffffff000074696d656c696e6500000000000000000000140008ffffffff0000786c6f67706f730000000000000000000019ffffffffffff000064626e616d650000000000000000000019ffffffffffff0000440000003a0004000000133736343038333834313938393731313334343000000001310000000a302f30373138433739380000000674657374646243000000144944454e544946595f53595354454d005a0000000549
# !!!pqsecure_raw_read| len=16384 n=181 errno=0
buf=45000000b453464154414c0056464154414c00433537503034004d7465726d696e6174696e6720636f6e6e656374696f6e2064756520746f20636f6e666c6963742077697468207265636f766572790044557365722077617320636f6e6e656374656420746f20612064617461626173652074686174206d7573742062652064726f707065642e0046706f7374677265732e63004c3332313300527265706f72745f7265636f766572795f636f6e666c6963740000
### FATAL...57P04...terminating connection due to conflict with recovery
# !!!pqsecure_raw_read| len=16384 n=0 errno=0 buf=
# !!!pqsecure_raw_read| len=16384 n=0 errno=0 buf=
# pg_recvlogical: error: could not send replication command "START_REPLICATION SLOT "drop_db_activeslot" LOGICAL
0/00000000 ("include-xids" '0', "skip-empty-xacts" '1')": server closed the connection unexpectedly
# This probably means the server terminated abnormally
# before or while processing the request.
# pg_recvlogical: error: disconnected
# '
# doesn't match '(?^:conflict with recovery)'
That is, the FATAL message was received from the server, but libpq lost it
inside.
A successful run shows this:
!!!check_pg_recvlogical_stderr:
!!!pqsecure_raw_read| len=16384 n=554 errno=0
buf=760000002b00030002000000015f70715f2e746573745f70726f746f636f6c5f6e65676f74696174696f6e00520000000800000000530000001b496e74657276616c5374796c6500706f7374677265730053000000207365617263685f7061746800222475736572222c207075626c696300530000001469735f737570657275736572006f6e0053000000237374616e646172645f636f6e666f726d696e675f737472696e6773006f6e00530000001f73657373696f6e5f617574686f72697a6174696f6e0075736572005300000019636c69656e745f656e636f64696e67005554463800530000001b7365727665725f76657273696f6e003139646576656c
!!!pqsecure_raw_read| len=16384 n=85 errno=0
buf=540000002300017365745f636f6e6669670000000000000000000019ffffffffffff0000440000000a000100000000430000000d53454c45435420310053000000117365617263685f7061746800005a0000000549
!!!pqsecure_raw_read| len=16384 n=76 errno=0
buf=540000002c0001646174615f6469726563746f72795f6d6f64650000000000000000000019ffffffffffff0000440000000e00010000000430373030430000000953484f57005a0000000549
!!!pqsecure_raw_read| len=16384 n=198 errno=0
buf=540000006f000473797374656d69640000000000000000000019ffffffffffff000074696d656c696e6500000000000000000000140008ffffffff0000786c6f67706f730000000000000000000019ffffffffffff000064626e616d650000000000000000000019ffffffffffff0000440000003a0004000000133736343038333834313938393731313334343000000001310000000a302f30364432393539380000000674657374646243000000144944454e544946595f53595354454d005a0000000549
!!!pqsecure_raw_read| len=16384 n=8 errno=0 buf=5700000007000000
!!!pqsecure_raw_read| len=16384 n=46 errno=0
buf=64000000166b0000000006d295980002f5019f6590590064000000166b0000000006d295980002f5019f6590c100
!!!pqsecure_raw_read| len=16384 n=46 errno=0
buf=64000000166b0000000006d295980002f5019f66df5b0064000000166b0000000006d295d00002f5019f66e01d00
!!!pqsecure_raw_read| len=16384 n=46 errno=0
buf=64000000166b0000000006d295d00002f5019f6719390064000000166b0000000006d296a00002f5019f67197c00
!!!pqsecure_raw_read| len=16384 n=23 errno=0
buf=64000000166b0000000006d296a00002f5019f671b3300
!!!pqsecure_raw_read| len=16384 n=23 errno=0
buf=64000000166b0000000006d297800002f5019f671bc900
!!!pqsecure_raw_read| len=16384 n=46 errno=0
buf=64000000166b0000000006d297800002f5019f671c970064000000166b0000000006d298000002f5019f671cbf00
!!!pqsecure_raw_read| len=16384 n=181 errno=0
buf=45000000b453464154414c0056464154414c00433537503034004d7465726d696e6174696e6720636f6e6e656374696f6e2064756520746f20636f6e666c6963742077697468207265636f766572790044557365722077617320636f6e6e656374656420746f20612064617461626173652074686174206d7573742062652064726f707065642e0046706f7374677265732e63004c3332313300527265706f72745f7265636f766572795f636f6e666c6963740000
### FATAL...57P04...terminating connection due to conflict with recovery
pg_recvlogical: error: unexpected termination of replication stream: FATAL: terminating connection due to conflict with
recovery
DETAIL: User was connected to a database that must be dropped.
pg_recvlogical: error: disconnected
Thus, FATAL is received by pg_recvlogical as expected, if it follows
CopyBothResponse in the stream.
Having this knowledge, I could reproduce such behavior simply with:
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1501,6 +1501,7 @@ StartLogicalReplication(StartReplicationCmd *cmd)
ReplicationSlotAcquire(cmd->slotname, true, true);
+pg_usleep(1000000);
/*
* Force a disconnect, so that the decoding code doesn't need to care
* about an eventual switch from running in recovery, to running in a
# +++ tap check in src/test/recovery +++
t/035_standby_logical_decoding.pl .. 69/?
# Failed test 'slot has been invalidated'
# at t/035_standby_logical_decoding.pl line 119.
# 'pg_recvlogical: error: could not send replication command "START_REPLICATION SLOT
"drop_db_activeslot" LOGICAL 0/00000000 ("include-xids" '0', "skip-empty-xacts" '1')": server closed the connection
unexpectedly
# This probably means the server terminated abnormally
# before or while processing the request.
# pg_recvlogical: error: disconnected
# '
# doesn't match '(?^:conflict with recovery)'
t/035_standby_logical_decoding.pl .. 88/? # Looks like you failed 1 test of 89.
Reproduced on REL_16_STABLE (where this test was landed) .. master.
[1]
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=francolin&dt=2026-05-11%2012%3A25%3A04
[2]
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=kestrel&dt=2025-08-04%2005%3A45%3A46
Best regards,
Alexander
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index a33a685dcc6..0c37ebdf9d1 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -603,6 +603,7 @@ CreateDecodingContext(XLogRecPtr start_lsn,
ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
+pg_usleep(100000);
ereport(LogicalDecodingLogLevel(),
(errmsg("starting logical decoding for slot \"%s\"",
NameStr(slot->data.name)),
diff --git a/src/interfaces/libpq/fe-secure.c b/src/interfaces/libpq/fe-secure.c
index 31d5b48d3f9..c08c2987c46 100644
--- a/src/interfaces/libpq/fe-secure.c
+++ b/src/interfaces/libpq/fe-secure.c
@@ -200,6 +200,16 @@ pqsecure_raw_read(PGconn *conn, void *ptr, size_t len)
n = recv(conn->sock, ptr, len, 0);
+if (conn->replication && conn->replication[0])
+{
+int save_errno = SOCK_ERRNO;
+char hb[1024];
+hb[0] = 0;
+for (size_t i = 0; i < n && i < 256; i++) sprintf(hb + (i * 2), "%02x", ((char *)ptr)[i]);
+fprintf(stderr, "!!!pqsecure_raw_read| len=%zu n=%zd errno=%d buf=%s\n", len, n, (n < 0) ? save_errno : 0, hb);
+SOCK_ERRNO_SET(save_errno);
+}
+
if (n < 0)
{
result_errno = SOCK_ERRNO;
diff --git a/src/test/meson.build b/src/test/meson.build
index cd45cbf57fb..cbb6d70a439 100644
--- a/src/test/meson.build
+++ b/src/test/meson.build
@@ -5,6 +5,26 @@ subdir('isolation')
subdir('authentication')
subdir('postmaster')
+subdir('recovery_1')
+subdir('recovery_2')
+subdir('recovery_3')
+subdir('recovery_4')
+subdir('recovery_5')
+subdir('recovery_6')
+subdir('recovery_7')
+subdir('recovery_8')
+subdir('recovery_9')
+subdir('recovery_10')
+subdir('recovery_11')
+subdir('recovery_12')
+subdir('recovery_13')
+subdir('recovery_14')
+subdir('recovery_15')
+subdir('recovery_16')
+subdir('recovery_17')
+subdir('recovery_18')
+subdir('recovery_19')
+subdir('recovery_20')
subdir('recovery')
subdir('subscription')
subdir('modules')
diff --git a/src/test/recovery/t/035_standby_logical_decoding.pl b/src/test/recovery/t/035_standby_logical_decoding.pl
index 4421059f100..44727a5d9f7 100644
--- a/src/test/recovery/t/035_standby_logical_decoding.pl
+++ b/src/test/recovery/t/035_standby_logical_decoding.pl
@@ -114,6 +114,7 @@ sub check_pg_recvlogical_stderr
$slot_user_handle->finish;
$return = $?;
cmp_ok($return, "!=", 0, "pg_recvlogical exited non-zero");
+print("!!!check_pg_recvlogical_stderr:\n$stderr\n===\n");
if ($return)
{
like($stderr, qr/$check_stderr/, 'slot has been invalidated');
@@ -907,6 +908,10 @@ check_pg_recvlogical_stderr($handle,
# drop the logical slots
drop_logical_slots('wal_level_');
+for my $i (1 .. 100)
+{
+print("DROP DATABASE scenario: iteration $i\n");
+
# create the logical slots
create_logical_slots($node_standby, 'drop_db_');
@@ -933,10 +938,17 @@ check_slots_dropped('drop_db', $handle);
is($node_standby->slot('otherslot')->{'slot_type'},
'logical', 'otherslot on standby not dropped');
+$node_standby->safe_psql('postgres', q[SELECT pg_drop_replication_slot('otherslot')]);
+$node_primary->safe_psql('postgres', q[CREATE DATABASE testdb]);
+$node_primary->wait_for_replay_catchup($node_standby);
+}
+
# Cleanup : manually drop the slot that was not dropped.
$node_standby->psql('postgres',
q[SELECT pg_drop_replication_slot('otherslot')]);
+if (0)
+{
##################################################
# Test standby promotion and logical decoding behavior
# after the standby gets promoted.
@@ -1059,5 +1071,6 @@ chomp($cascading_stdout);
is($cascading_stdout, $expected,
'got same expected output from pg_recvlogical decoding session on cascading standby'
);
+}
done_testing();