On Dec 8, 2009, at 1:33 AM, Maxim Boguk wrote:

I have a bit uncommon configuration now.
Master: postgresql 8.3
Slave:   postgresql 8.4
slony:   version 1.2.17


You know what is funny - I *JUST* ran into this problem too!
It only seems to happen when not specifying "only on xxx"

Here's a **HACK** solution to get your replication going again:

create or replace function _replication.altertableforreplication(int)
returns int
as $$
declare
        p_tab_id                        alias for $1;
        v_no_id                         int4;
        v_tab_row                       record;
        v_tab_fqname            text;
        v_tab_attkind           text;
        v_n                                     int4;
        v_trec  record;
        v_tgbad boolean;
begin
        -- ----
        -- Grab the central configuration lock
        -- ----
        lock table "_replication".sl_config_lock;

        -- ----
        -- Get our local node ID
        -- ----
        v_no_id := "_replication".getLocalNodeId('_replication');

        -- ----
        -- Get the sl_table row and the current origin of the table.
        -- Verify that the table currently is NOT in altered state.
        -- ----
        select T.tab_reloid, T.tab_set, T.tab_idxname, T.tab_altered,
                        S.set_origin, PGX.indexrelid,
                        "_replication".slon_quote_brute(PGN.nspname) || '.' ||
                        "_replication".slon_quote_brute(PGC.relname) as 
tab_fqname
                        into v_tab_row
                        from "_replication".sl_table T, "_replication".sl_set S,
                                "pg_catalog".pg_class PGC, 
"pg_catalog".pg_namespace PGN,
                                "pg_catalog".pg_index PGX, 
"pg_catalog".pg_class PGXC
                        where T.tab_id = p_tab_id
                                and T.tab_set = S.set_id
                                and T.tab_reloid = PGC.oid
                                and PGC.relnamespace = PGN.oid
                                and PGX.indrelid = T.tab_reloid
                                and PGX.indexrelid = PGXC.oid
                                and PGXC.relname = T.tab_idxname
                                for update;
        if not found then
raise exception 'Slony-I: alterTableForReplication(): Table with id % not found', p_tab_id;
        end if;
        v_tab_fqname = v_tab_row.tab_fqname;
        if v_tab_row.tab_altered then
raise notice 'Slony-I: alterTableForReplication(): Table % is already in altered state',
                                v_tab_fqname;

                return p_tab_id;
        end if;

v_tab_attkind := "_replication".determineAttKindUnique(v_tab_row.tab_fqname,
                                                v_tab_row.tab_idxname);

        execute 'lock table ' || v_tab_fqname || ' in access exclusive mode';

        -- ----
        -- Procedures are different on origin and subscriber
        -- ----
        if v_no_id = v_tab_row.set_origin then
                -- ----
                -- On the Origin we add the log trigger to the table and done
                -- ----
                execute 'create trigger "_replication_logtrigger_' ||
                                p_tab_id::text || '" after insert or update or 
delete on ' ||
                                v_tab_fqname || ' for each row execute procedure
                                "_replication".logTrigger (''_replication'', 
''' ||
                                        p_tab_id::text || ''', ''' ||
                                        v_tab_attkind || ''');';
        else
                -- ----
                -- On the subscriber the thing is a bit more difficult. We want
                -- to disable all user- and foreign key triggers and rules.
                -- ----


                -- ----
                -- Check to see if there are any trigger conflicts...
                -- ----
                v_tgbad := 'false';
                for v_trec in
                        select pc.relname, tg1.tgname from
                        "pg_catalog".pg_trigger tg1,
                        "pg_catalog".pg_trigger tg2,
                        "pg_catalog".pg_class pc,
                        "pg_catalog".pg_index pi,
                        "_replication".sl_table tab
                        where
                         tg1.tgname = tg2.tgname and        -- Trigger names 
match
                         tg1.tgrelid = tab.tab_reloid and   -- trigger 1 is on 
the table
                         pi.indexrelid = tg2.tgrelid and    -- trigger 2 is on 
the index
                         pi.indrelid = tab.tab_reloid and   -- indexes table is 
this table
                         pc.oid = tab.tab_reloid
                loop
raise notice 'Slony-I: alterTableForReplication(): multiple instances of trigger % on table %',
                                v_trec.tgname, v_trec.relname;
                        v_tgbad := 'true';
                end loop;
                if v_tgbad then
                        raise exception 'Slony-I: Unable to disable triggers';
                end if;                 

                -- ----
                -- Disable all existing triggers
                -- ----
                update "pg_catalog".pg_trigger
                                set tgrelid = v_tab_row.indexrelid
                                where tgrelid = v_tab_row.tab_reloid
                                and not exists (
                                                select true from 
"_replication".sl_table TAB,
                                                                
"_replication".sl_trigger TRIG
                                                                where 
TAB.tab_reloid = tgrelid
                                                                and TAB.tab_id 
= TRIG.trig_tabid
                                                                and 
TRIG.trig_tgname = tgname
                                        );
                get diagnostics v_n = row_count;
if (v_n > 0) and exists (select 1 from information_schema.columns where table_name = 'pg_class' and table_schema = 'pg_catalog' and column_name = 'reltriggers') then
                        update "pg_catalog".pg_class
                                        set reltriggers = reltriggers - v_n
                                        where oid = v_tab_row.tab_reloid;
                end if;

                -- ----
                -- Disable all existing rules
                -- ----
                update "pg_catalog".pg_rewrite
                                set ev_class = v_tab_row.indexrelid
                                where ev_class = v_tab_row.tab_reloid;
                get diagnostics v_n = row_count;
                if v_n > 0 then
                        update "pg_catalog".pg_class
                                        set relhasrules = false
                                        where oid = v_tab_row.tab_reloid;
                end if;

                -- ----
                -- Add the trigger that denies write access to replicated tables
                -- ----
                execute 'create trigger "_replication_denyaccess_' ||
                                p_tab_id::text || '" before insert or update or 
delete on ' ||
                                v_tab_fqname || ' for each row execute procedure
                                "_replication".denyAccess (''_replication'');';
        end if;

        -- ----
        -- Mark the table altered in our configuration
        -- ----
        update "_replication".sl_table
                        set tab_altered = true where tab_id = p_tab_id;

        return p_tab_id;
end;
$$
language 'plpgsql';

you'll need to change the _replication to _slony - in a nuthshell it makes the tab being altered not an error condition (changing tht EXCEPTION to a NOTICE).

This is a hack solution until a proper fix is done up - was about to fire up some test instances and trace what happens.


Any execute script surely break replication set. Even if i submit
something like 'select 1' via execte script replication will be broken
down unrepairable. Only way fix it is drop/create/subscribe slave node.
restarting slon doesn't help at all...

Here is slony log from slave database:

2009-12-08 09:11:43 MSK INFO prepare for DDL script - set:1 onlyonnode:-1
2009-12-08 09:11:43 MSK ERROR  remoteWorkerThread_1: "select
"_slony".ddlScript_prepare_int(1, -1); " PGRES_FATAL_ERROR ERROR:
Slony-I: alterTableRestore(): Table "public"."access_type" is not in
altered state
CONTEXT:  SQL statement "SELECT  "_slony".alterTableRestore( $1 )"
PL/pgSQL function "ddlscript_prepare_int" line 46 at PERFORM
2009-12-08 09:11:43 MSK ERROR  remoteWorkerThread_1: DDL preparation
failed - set 1 - only on node -1
2009-12-08 09:11:43 MSK DEBUG1 slon: retry requested
2009-12-08 09:11:43 MSK INFO   remoteListenThread_1: disconnecting
from 'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread done
2009-12-08 09:11:43 MSK DEBUG1 main: scheduler mainloop returned
2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 main: done
2009-12-08 09:11:43 MSK DEBUG1 slon: restart of worker
2009-12-08 09:11:43 MSK CONFIG main: slon version 1.2.17 starting up
2009-12-08 09:11:43 MSK CONFIG main: local node id = 2
2009-12-08 09:11:43 MSK CONFIG main: launching sched_start_mainloop
2009-12-08 09:11:43 MSK CONFIG main: loading current cluster configuration 2009-12-08 09:11:43 MSK CONFIG storeNode: no_id=1 no_comment='director'
2009-12-08 09:11:43 MSK CONFIG storePath: pa_server=1 pa_client=2
pa_conninfo="dbname=plus1 host=192.168.1.137 port=5432 user=slony"
pa_connretry=10
2009-12-08 09:11:43 MSK CONFIG storeListen: li_origin=1 li_receiver=2
li_provider=1
2009-12-08 09:11:43 MSK CONFIG storeSet: set_id=1 set_origin=1
set_comment='A replication set so boring no one thought to give it a
name'
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no worker thread
2009-12-08 09:11:43 MSK CONFIG storeSubscribe: sub_set=1
sub_provider=1 sub_forward='t'
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no worker thread
2009-12-08 09:11:43 MSK CONFIG enableSubscription: sub_set=1
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no worker thread 2009-12-08 09:11:43 MSK CONFIG main: configuration complete - starting threads
2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread starts
NOTICE:  Slony-I: cleanup stale sl_nodelock entry for pid=31722
2009-12-08 09:11:43 MSK CONFIG enableNode: no_id=1
2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread starts
2009-12-08 09:11:43 MSK DEBUG1 remoteWorkerThread_1: thread starts
2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread starts
2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread starts
2009-12-08 09:11:43 MSK DEBUG1 main: running scheduler mainloop
2009-12-08 09:11:43 MSK DEBUG1 remoteWorkerThread_1: helper thread for
provider 1 created
2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: connected to
'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
2009-12-08 09:11:43 MSK INFO   Checking local node id
2009-12-08 09:11:43 MSK INFO   Found local node id
2009-12-08 09:11:43 MSK INFO prepare for DDL script - set:1 onlyonnode:-1
2009-12-08 09:11:43 MSK ERROR  remoteWorkerThread_1: "select
"_slony".ddlScript_prepare_int(1, -1); " PGRES_FATAL_ERROR ERROR:
Slony-I: alterTableRestore(): Table "public"."access_type" is not in
altered state
CONTEXT:  SQL statement "SELECT  "_slony".alterTableRestore( $1 )"
PL/pgSQL function "ddlscript_prepare_int" line 46 at PERFORM
2009-12-08 09:11:43 MSK ERROR  remoteWorkerThread_1: DDL preparation
failed - set 1 - only on node -1
2009-12-08 09:11:43 MSK DEBUG1 slon: retry requested
2009-12-08 09:11:43 MSK INFO   remoteListenThread_1: disconnecting
from 'dbname=plus1 host=192.168.1.137 port=5432 user=slony'
2009-12-08 09:11:43 MSK DEBUG1 remoteListenThread_1: thread done
2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 syncThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 cleanupThread: thread done
2009-12-08 09:11:43 MSK DEBUG1 main: scheduler mainloop returned
2009-12-08 09:11:43 MSK DEBUG1 main: done
2009-12-08 09:11:43 MSK DEBUG1 slon: restart of worker
2009-12-08 09:11:43 MSK CONFIG main: slon version 1.2.17 starting up
2009-12-08 09:11:43 MSK CONFIG main: local node id = 2
2009-12-08 09:11:43 MSK CONFIG main: launching sched_start_mainloop
2009-12-08 09:11:43 MSK CONFIG main: loading current cluster configuration 2009-12-08 09:11:43 MSK CONFIG storeNode: no_id=1 no_comment='director'
2009-12-08 09:11:43 MSK CONFIG storePath: pa_server=1 pa_client=2
pa_conninfo="dbname=plus1 host=192.168.1.137 port=5432 user=slony"
pa_connretry=10
2009-12-08 09:11:43 MSK CONFIG storeListen: li_origin=1 li_receiver=2
li_provider=1
2009-12-08 09:11:43 MSK CONFIG storeSet: set_id=1 set_origin=1
set_comment='A replication set so boring no one thought to give it a
name'
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no worker thread
2009-12-08 09:11:43 MSK CONFIG storeSubscribe: sub_set=1
sub_provider=1 sub_forward='t'
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no worker thread
2009-12-08 09:11:43 MSK CONFIG enableSubscription: sub_set=1
2009-12-08 09:11:43 MSK WARN remoteWorker_wakeup: node 1 - no worker thread 2009-12-08 09:11:43 MSK CONFIG main: configuration complete - starting threads
2009-12-08 09:11:43 MSK DEBUG1 localListenThread: thread starts
2009-12-08 09:11:43 MSK FATAL  localListenThread: "select
"_slony".cleanupNodelock(); insert into "_slony".sl_nodelock values (
 2, 0, "pg_catalog".pg_backend_pid()); " - ERROR:  duplicate key
value violates unique constraint "sl_nodelock-pkey"

2009-12-08 09:11:43 MSK DEBUG1 slon: shutdown requested
2009-12-08 09:12:03 MSK DEBUG1 slon: child termination timeout - kill child
2009-12-08 09:12:03 MSK DEBUG1 slon: done


--
=======================================================
Я в контакте: http://vkontakte.ru/id16323414
Сила солому ломит, но не все в нашей жизни - солома, да и сила далеко не все.
_______________________________________________
Slony1-general mailing list
[email protected]
http://lists.slony.info/mailman/listinfo/slony1-general

--
Jeff Trout <[email protected]>
http://www.stuarthamm.net/
http://www.dellsmartexitin.com/



_______________________________________________
Slony1-general mailing list
[email protected]
http://lists.slony.info/mailman/listinfo/slony1-general

Reply via email to