On 2017-02-08 23:25, Petr Jelinek wrote:
0001-Use-asynchronous-connect-API-in-libpqwalreceiver-v2.patch
0002-Always-initialize-stringinfo-buffers-in-walsender-v2.patch
0003-Fix-after-trigger-execution-in-logical-replication-v2.patch
0004-Add-RENAME-support-for-PUBLICATIONs-and-SUBSCRIPTION-v2.patch
0001-Logical-replication-support-for-initial-data-copy-v4.patch
Apart from the failing one make check test (test 'object_address') which
I reported earlier, I find it is easy to 'confuse' the replication.
I attach a script that intends to test the default COPY DATA. There
are two instances, initially without any replication. The script inits
pgbench on the master, adds a serial column to pgbench_history, and
dump-restores the 4 pgbench-tables to the future replica. It then
empties the 4 pgbench-tables on the 'replica'. The idea is that when
logrep is initiated, data will be replicated from master, with the end
result being that there are 4 identical tables on master and replica.
This often works but it also fails far too often (in my hands). I test
whether the tables are identical by comparing an md5 from an ordered
resultset, from both replica and master. I estimate that 1 in 5 tries
fail; 'fail' being a somewhat different table on replica (compared to
mater), most often pgbench_accounts (typically there are 10-30 differing
rows). No errors or warnings in either logfile. I'm not sure but I
think testing on faster machines seem to be doing somewhat better
('better' being less replication error).
Another, probably unrelated, problem occurs (but much more rarely) when
executing 'DROP SUBSCRIPTION sub1' on the replica (see the beginning of
the script). Sometimes that command hangs, and refuses to accept
shutdown of the server. I don't know how to recover from this -- I just
have to kill the replica server (master server still obeys normal
shutdown) and restart the instances.
The script accepts 2 parameters, scale and clients (used in pgbench -s
resp. -c)
I don't think I've managed to successfully run the script with more than
1 client yet.
Can you have a look whether this is reproducible elsewhere?
thanks,
Erik Rijkers
#!/bin/sh
# assumes both instances are running, on port 6972 and 6973
logfile1=$HOME/pg_stuff/pg_installations/pgsql.logical_replication/logfile.logical_replication
logfile2=$HOME/pg_stuff/pg_installations/pgsql.logical_replication2/logfile.logical_replication2
scale=1
if [[ ! "$1" == "" ]]
then
scale=$1
fi
clients=1
if [[ ! "$2" == "" ]]
then
clients=$2
fi
unset PGSERVICEFILE PGSERVICE PGPORT PGDATA PGHOST
PGDATABASE=testdb
# (this script also uses a custom pgpassfile)
## just for info:
# env | grep PG
# psql -qtAXc "select current_setting('server_version')"
port1=6972
port2=6973
function cb()
{
# display the 4 pgbench tables' accumulated content as md5s
# a,b,t,h stand for: pgbench_accounts, -branches, -tellers, -history
md5_total_6972='-1'
md5_total_6973='-2'
for port in $port1 $port2
do
md5_a=$(echo "select * from pgbench_accounts order by aid"|psql -qtAXp$port|md5sum|cut -b 1-9)
md5_b=$(echo "select * from pgbench_branches order by bid"|psql -qtAXp$port|md5sum|cut -b 1-9)
md5_t=$(echo "select * from pgbench_tellers order by tid"|psql -qtAXp$port|md5sum|cut -b 1-9)
md5_h=$(echo "select * from pgbench_history order by hid"|psql -qtAXp$port|md5sum|cut -b 1-9)
cnt_a=$(echo "select count(*) from pgbench_accounts"|psql -qtAXp $port)
cnt_b=$(echo "select count(*) from pgbench_branches"|psql -qtAXp $port)
cnt_t=$(echo "select count(*) from pgbench_tellers" |psql -qtAXp $port)
cnt_h=$(echo "select count(*) from pgbench_history" |psql -qtAXp $port)
md5_total[$port]=$( echo "${md5_a} ${md5_b} ${md5_t} ${md5_h}" | md5sum )
printf "$port a,b,t,h: %6d %6d %6d %6d" $cnt_a $cnt_b $cnt_t $cnt_h
echo -n " $md5_a $md5_b $md5_t $md5_h"
if [[ $port -eq $port1 ]]; then echo " master"
elif [[ $port -eq $port2 ]]; then echo -n " replica"
else echo " ERROR "
fi
done
if [[ "${md5_total[6972]}" == "${md5_total[6973]}" ]]
then
echo " ok"
else
echo " NOK"
fi
}
bail=0
pub_count=$( echo "select count(*) from pg_publication" | psql -qtAXp 6972 )
if [[ $pub_count -ne 0 ]]
then
echo "pub_count -ne 0 - deleting pub1 & bailing out"
echo "drop publication if exists pub1" | psql -Xp 6972
bail=1
fi
sub_count=$( echo "select count(*) from pg_subscription" | psql -qtAXp 6973 )
if [[ $sub_count -ne 0 ]]
then
echo "sub_count -ne 0 - deleting sub1 & bailing out"
echo "drop subscription if exists sub1" | psql -Xp 6973
rc=$?
echo "(drop subscr. ) ) rc=$rc"
bail=1
fi
pub_count=$( echo "select count(*) from pg_publication" | psql -qtAXp 6972 )
sub_count=$( echo "select count(*) from pg_subscription" | psql -qtAXp 6973 )
#if [[ $bail -eq 1 ]]
#then
# if [[ $pub_count -eq 0 ]] && [[ $sub_count -eq 0 ]]
# then
# exit 0
# else
# exit 1
# fi
#fi
if [[ $pub_count -eq 1 ]] || [[ $sub_count -eq 1 ]]
then
exit 1
fi
# clear logs
echo > $logfile1
echo > $logfile2
echo "drop table if exists pgbench_accounts;
drop table if exists pgbench_branches;
drop table if exists pgbench_tellers;
drop table if exists pgbench_history;" | psql -X -p $port1 \
&& echo "drop table if exists pgbench_accounts;
drop table if exists pgbench_branches;
drop table if exists pgbench_tellers;
drop table if exists pgbench_history;" | psql -X -p $port2
pgbench -p $port1 -qis $scale && echo "
alter table pgbench_history add column hid serial primary key;
-- alter table pgbench_history replica identity full;
-- delete from pgbench_accounts where aid > 40;
" | psql -1p $port1 \
&& pg_dump -F c -p $port1 \
-t pgbench_accounts \
-t pgbench_branches \
-t pgbench_tellers \
-t pgbench_history \
| pg_restore -p $port2 -d testdb
echo "-- (no diffs expected... )"
echo "$(cb)"
#pgbench -p $port2 -qis 1 && echo "
# alter table pgbench_history add column hid serial primary key;
# -- alter table pgbench_history replica identity full;" | psql -1p $port2
#
currval=1
# currval=$( echo "
# select nextval('pgbench_history_hid_seq'::regclass);
# select currval('pgbench_history_hid_seq'::regclass);" | psql -qtAXp $port1 | tail -n 1 )
# echo "-- currval [$currval]"
# empty tables at replica:
echo "
delete from pgbench_accounts;
delete from pgbench_branches;
delete from pgbench_tellers;
delete from pgbench_history;
-- select setval('pgbench_history_hid_seq', $currval);
" | psql -q -X -p $port2
echo "-- (pre-replication, diffs expected... )"
echo "$(cb)"
echo "create publication pub1 for all tables;" | psql -p $port1 -aqtAX
echo "create subscription sub1 connection 'port=${port1}' publication pub1 with (disabled);
alter subscription sub1 enable;
" | psql -p $port2 -aqtAX
#------------------------------------
if [[ 1 -eq 1 ]]
then
echo "-- pgbench -c $clients -T 20 -P 5 -n "
pgbench -c $clients -T 20 -P 5 -n
fi
sleep 5
echo "-- "
echo "$(cb)"
echo "select * from pgbench_accounts order by aid" | psql -Xp 6972 > acc.6972.md5
echo "select * from pgbench_accounts order by aid" | psql -Xp 6973 > acc.6973.md5
echo "select * from pgbench_history order by hid" | psql -Xp 6972 > his.6972.md5
echo "select * from pgbench_history order by hid" | psql -Xp 6973 > his.6973.md5
wc_acc=$( diff acc.6972.md5 acc.6973.md5 | wc -l )
wc_his=$( diff his.6972.md5 his.6973.md5 | wc -l )
echo "-- wc -l accounts $wc_acc"
echo "-- wc -l history $wc_his"
echo -n "-- logfile1 "; wc -l $logfile1 | cut '-d ' -f 1
echo -n "-- logfile2 "; wc -l $logfile2 | cut '-d ' -f 1
if [[ $wc_acc -eq 0 ]]
then
echo "-- wc lines [$wc_acc] - ok"
exit 0
fi
echo "-- waiting 30s..."
sleep 30
echo "-- "
echo "$(cb)"
echo "select * from pgbench_accounts order by aid" | psql -Xp 6972 > accounts.6972.md5
echo "select * from pgbench_accounts order by aid" | psql -Xp 6973 > accounts.6973.md5
diff accounts.6972.md5 accounts.6973.md5 | wc -l
echo -n "-- logfile1 "; wc -l $logfile1 | cut '-d ' -f 1
echo -n "-- logfile2 "; wc -l $logfile2 | cut '-d ' -f 1
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers