On Dec 8, 2009, at 1:33 AM, Maxim Boguk wrote:
I have a bit uncommon configuration now.
Master: postgresql 8.3
Slave: postgresql 8.4
slony: version 1.2.17
You know what is funny - I *JUST* ran into this problem too!
It only seems to happen when not specifying "only on xxx"
Here's a **HACK** solution to get your replication going again:
create or replace function _replication.altertableforreplication(int)
returns int
as $$
declare
p_tab_id alias for $1;
v_no_id int4;
v_tab_row record;
v_tab_fqname text;
v_tab_attkind text;
v_n int4;
v_trec record;
v_tgbad boolean;
begin
-- ----
-- Grab the central configuration lock
-- ----
lock table "_replication".sl_config_lock;
-- ----
-- Get our local node ID
-- ----
v_no_id := "_replication".getLocalNodeId('_replication');
-- ----
-- Get the sl_table row and the current origin of the table.
-- Verify that the table currently is NOT in altered state.
-- ----
select T.tab_reloid, T.tab_set, T.tab_idxname, T.tab_altered,
S.set_origin, PGX.indexrelid,
"_replication".slon_quote_brute(PGN.nspname) || '.' ||
"_replication".slon_quote_brute(PGC.relname) as
tab_fqname
into v_tab_row
from "_replication".sl_table T, "_replication".sl_set S,
"pg_catalog".pg_class PGC,
"pg_catalog".pg_namespace PGN,
"pg_catalog".pg_index PGX,
"pg_catalog".pg_class PGXC
where T.tab_id = p_tab_id
and T.tab_set = S.set_id
and T.tab_reloid = PGC.oid
and PGC.relnamespace = PGN.oid
and PGX.indrelid = T.tab_reloid
and PGX.indexrelid = PGXC.oid
and PGXC.relname = T.tab_idxname
for update;
if not found then
raise exception 'Slony-I: alterTableForReplication(): Table with id
% not found', p_tab_id;
end if;
v_tab_fqname = v_tab_row.tab_fqname;
if v_tab_row.tab_altered then
raise notice 'Slony-I: alterTableForReplication(): Table % is
already in altered state',
v_tab_fqname;
return p_tab_id;
end if;
v_tab_attkind :=
"_replication".determineAttKindUnique(v_tab_row.tab_fqname,
v_tab_row.tab_idxname);
execute 'lock table ' || v_tab_fqname || ' in access exclusive mode';
-- ----
-- Procedures are different on origin and subscriber
-- ----
if v_no_id = v_tab_row.set_origin then
-- ----
-- On the Origin we add the log trigger to the table and done
-- ----
execute 'create trigger "_replication_logtrigger_' ||
p_tab_id::text || '" after insert or update or
delete on ' ||
v_tab_fqname || ' for each row execute procedure
"_replication".logTrigger (''_replication'',
''' ||
p_tab_id::text || ''', ''' ||
v_tab_attkind || ''');';
else
-- ----
-- On the subscriber the thing is a bit more difficult. We want
-- to disable all user- and foreign key triggers and rules.
-- ----
-- ----
-- Check to see if there are any trigger conflicts...
-- ----
v_tgbad := 'false';
for v_trec in
select pc.relname, tg1.tgname from
"pg_catalog".pg_trigger tg1,
"pg_catalog".pg_trigger tg2,
"pg_catalog".pg_class pc,
"pg_catalog".pg_index pi,
"_replication".sl_table tab
where
tg1.tgname = tg2.tgname and -- Trigger names
match
tg1.tgrelid = tab.tab_reloid and -- trigger 1 is on
the table
pi.indexrelid = tg2.tgrelid and -- trigger 2 is on
the index
pi.indrelid = tab.tab_reloid and -- indexes table is
this table
pc.oid = tab.tab_reloid
loop
raise notice 'Slony-I: alterTableForReplication(): multiple
instances of trigger % on table %',
v_trec.tgname, v_trec.relname;
v_tgbad := 'true';
end loop;
if v_tgbad then
raise exception 'Slony-I: Unable to disable triggers';
end if;
-- ----
-- Disable all existing triggers
-- ----
update "pg_catalog".pg_trigger
set tgrelid = v_tab_row.indexrelid
where tgrelid = v_tab_row.tab_reloid
and not exists (
select true from
"_replication".sl_table TAB,
"_replication".sl_trigger TRIG
where
TAB.tab_reloid = tgrelid
and TAB.tab_id
= TRIG.trig_tabid
and
TRIG.trig_tgname = tgname
);
get diagnostics v_n = row_count;
if (v_n > 0) and exists (select 1 from information_schema.columns
where table_name = 'pg_class' and table_schema = 'pg_catalog' and
column_name = 'reltriggers') then
update "pg_catalog".pg_class
set reltriggers = reltriggers - v_n
where oid = v_tab_row.tab_reloid;
end if;
-- ----
-- Disable all existing rules
-- ----
update "pg_catalog".pg_rewrite
set ev_class = v_tab_row.indexrelid
where ev_class = v_tab_row.tab_reloid;
get diagnostics v_n = row_count;
if v_n > 0 then
update "pg_catalog".pg_class
set relhasrules = false
where oid = v_tab_row.tab_reloid;
end if;
-- ----
-- Add the trigger that denies write access to replicated tables
-- ----
execute 'create trigger "_replication_denyaccess_' ||
p_tab_id::text || '" before insert or update or
delete on ' ||
v_tab_fqname || ' for each row execute procedure
"_replication".denyAccess (''_replication'');';
end if;
-- ----
-- Mark the table altered in our configuration
-- ----
update "_replication".sl_table
set tab_altered = true where tab_id = p_tab_id;
return p_tab_id;
end;
$$
language 'plpgsql';
you'll need to change the _replication to _slony - in a nuthshell it
makes the tab being altered not an error condition (changing tht
EXCEPTION to a NOTICE).
This is a hack solution until a proper fix is done up - was about to
fire up some test instances and trace what happens.
Any execute script surely break replication set. Even if i submit
something like 'select 1' via execte script replication will be broken
down unrepairable. Only way fix it is drop/create/subscribe slave
node.
restarting slon doesn't help at all...
Here is slony log from slave database:
2009-12-08 09:11:43 MSK INFO prepare for DDL script - set:1
onlyonnode:-1
2009-12-08 09:11:43 MSK ERROR remoteWorkerThread_1: "select
"_slony".ddlScript_prepare_int(1, -1); " PGRES_FATAL_ERROR ERROR:
Slony-I: alterTableRestore(): Table "public"."access_type" is not in
altered state
CONTEXT: SQL statement "SELECT "_slony".alterTableRestore( $1 )"
PL/pgSQL function "ddlscript_prepare_int" line 46 at PERFORM
2009-12-08 09:11:43 MSK ERROR remoteWorkerThread_1: DDL preparation
failed - set 1 - only on node -1
2009-12-08 09:11:43 MSK DEBUG1 slon: retry requested
2009-12-08 09:11:43 MSK INFO remoteListenThread_1: disconnecting
from 'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread done
2009-12-08 09:11:43 MSK DEBUG1 main: scheduler mainloop returned
2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 main: done
2009-12-08 09:11:43 MSK DEBUG1 slon: restart of worker
2009-12-08 09:11:43 MSK CONFIG main: slon version 1.2.17 starting up
2009-12-08 09:11:43 MSK CONFIG main: local node id = 2
2009-12-08 09:11:43 MSK CONFIG main: launching sched_start_mainloop
2009-12-08 09:11:43 MSK CONFIG main: loading current cluster
configuration
2009-12-08 09:11:43 MSK CONFIG storeNode: no_id=1
no_comment='director'
2009-12-08 09:11:43 MSK CONFIG storePath: pa_server=1 pa_client=2
pa_conninfo="dbname=plus1 host=192.168.1.137 port=5432 user=slony"
pa_connretry=10
2009-12-08 09:11:43 MSK CONFIG storeListen: li_origin=1 li_receiver=2
li_provider=1
2009-12-08 09:11:43 MSK CONFIG storeSet: set_id=1 set_origin=1
set_comment='A replication set so boring no one thought to give it a
name'
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
worker thread
2009-12-08 09:11:43 MSK CONFIG storeSubscribe: sub_set=1
sub_provider=1 sub_forward='t'
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
worker thread
2009-12-08 09:11:43 MSK CONFIG enableSubscription: sub_set=1
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
worker thread
2009-12-08 09:11:43 MSK CONFIG main: configuration complete -
starting threads
2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread starts
NOTICE: Slony-I: cleanup stale sl_nodelock entry for pid=31722
2009-12-08 09:11:43 MSK CONFIG enableNode: no_id=1
2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread starts
2009-12-08 09:11:43 MSK DEBUG1 remoteWorkerThread_1: thread starts
2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread starts
2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread starts
2009-12-08 09:11:43 MSK DEBUG1 main: running scheduler mainloop
2009-12-08 09:11:43 MSK DEBUG1 remoteWorkerThread_1: helper thread for
provider 1 created
2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: connected to
'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
2009-12-08 09:11:43 MSK INFO Checking local node id
2009-12-08 09:11:43 MSK INFO Found local node id
2009-12-08 09:11:43 MSK INFO prepare for DDL script - set:1
onlyonnode:-1
2009-12-08 09:11:43 MSK ERROR remoteWorkerThread_1: "select
"_slony".ddlScript_prepare_int(1, -1); " PGRES_FATAL_ERROR ERROR:
Slony-I: alterTableRestore(): Table "public"."access_type" is not in
altered state
CONTEXT: SQL statement "SELECT "_slony".alterTableRestore( $1 )"
PL/pgSQL function "ddlscript_prepare_int" line 46 at PERFORM
2009-12-08 09:11:43 MSK ERROR remoteWorkerThread_1: DDL preparation
failed - set 1 - only on node -1
2009-12-08 09:11:43 MSK DEBUG1 slon: retry requested
2009-12-08 09:11:43 MSK INFO remoteListenThread_1: disconnecting
from 'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread done
2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 main: scheduler mainloop returned
2009-12-08 09:11:43 MSK DEBUG1 main: done
2009-12-08 09:11:43 MSK DEBUG1 slon: restart of worker
2009-12-08 09:11:43 MSK CONFIG main: slon version 1.2.17 starting up
2009-12-08 09:11:43 MSK CONFIG main: local node id = 2
2009-12-08 09:11:43 MSK CONFIG main: launching sched_start_mainloop
2009-12-08 09:11:43 MSK CONFIG main: loading current cluster
configuration
2009-12-08 09:11:43 MSK CONFIG storeNode: no_id=1
no_comment='director'
2009-12-08 09:11:43 MSK CONFIG storePath: pa_server=1 pa_client=2
pa_conninfo="dbname=plus1 host=192.168.1.137 port=5432 user=slony"
pa_connretry=10
2009-12-08 09:11:43 MSK CONFIG storeListen: li_origin=1 li_receiver=2
li_provider=1
2009-12-08 09:11:43 MSK CONFIG storeSet: set_id=1 set_origin=1
set_comment='A replication set so boring no one thought to give it a
name'
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
worker thread
2009-12-08 09:11:43 MSK CONFIG storeSubscribe: sub_set=1
sub_provider=1 sub_forward='t'
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
worker thread
2009-12-08 09:11:43 MSK CONFIG enableSubscription: sub_set=1
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no
worker thread
2009-12-08 09:11:43 MSK CONFIG main: configuration complete -
starting threads
2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread starts
2009-12-08 09:11:43 MSK FATAL localListenThread: "select
"_slony".cleanupNodelock(); insert into "_slony".sl_nodelock values (
2, 0, "pg_catalog".pg_backend_pid()); " - ERROR: duplicate key
value violates unique constraint "sl_nodelock-pkey"
2009-12-08 09:11:43 MSK DEBUG1 slon: shutdown requested
2009-12-08 09:12:03 MSK DEBUG1 slon: child termination timeout -
kill child
2009-12-08 09:12:03 MSK DEBUG1 slon: done
--
=======================================================
Я в контакте: http://vkontakte.ru/id16323414
Сила солому ломит, но не все в нашей
жизни - солома, да и сила далеко не все.
_______________________________________________
Slony1-general mailing list
[email protected]
http://lists.slony.info/mailman/listinfo/slony1-general
--
Jeff Trout <[email protected]>
http://www.stuarthamm.net/
http://www.dellsmartexitin.com/
_______________________________________________
Slony1-general mailing list
[email protected]
http://lists.slony.info/mailman/listinfo/slony1-general