Thanks! I will look into this. Also any comments are welcome from
other pgpool hackers!
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp

> Tatsuo,
> 
> I modified the source file for Pgpool -II 2.3.2.2.
> I attached the patch(diff).
> 
> Regards,
> Rumman
> ________________________________________
> From: Tatsuo Ishii [[email protected]]
> Sent: Tuesday, February 23, 2010 9:14 AM
> To: Brian Maguire
> Cc: [email protected]; Gazi Rahman; Ahmed Iftekhar; Arthur Vossberg; 
> [email protected]
> Subject: Re: Pgpool II 2.3.1 Patch Contribution
> 
> [Cced to pgpool-hackers]
> 
> Brian,
> 
> Thanks for your contribution. I will look into the code. Also I would
> like to hear from pgpool hackers's opinion if any.
> 
> If possible, it would be great if you could generate patches (diff)
> against CVS HEAD. Also you should change pool_config.l, rather than
> pool_config.c. Another request is, please provide documentation. You
> can modify doc/pgpool-en.html.
> --
> Tatsuo Ishii
> SRA OSS, Inc. Japan
> English: http://www.sraoss.co.jp/index_en.php
> Japanese: http://www.sraoss.co.jp
> 
> > Tatsuo
> >
> >
> >
> > I would like to contribibution of a patch of a code change to support a 
> > config change for the killing of all child processes rather than waiting 
> > for disconnect.  The description of the rational and the code chnage are 
> > below and attached.  Please let me know if this can be contributed or even 
> > better avoided all together some how.
> >
> >
> >
> >
> >
> > Cheers,
> >
> >
> >
> > Brian
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> >
> > We changed the source files of Pgpool II 2.3.1 for the following two 
> > purposes:
> >     1. To avoid the wait for the disconnection of all sessions at 2nd stage 
> > of pgpool recovery.
> >     2. To avoid the disconnection of active sessions at Primary node when 
> > the secondary node stop communication with the primary pgpool.
> >
> > 1. To avoid the wait for the disconnection of all sessions at 2nd stage of 
> > pgpool recovery
> >
> > With default implementation of Pgpool II 2.3.1, during recovery, at 2nd 
> > stage pgpool waits for disconnection of all sessions.  But in 24x7 
> > production database, without stopping the application, it is difficult to 
> > ensure that no connection will exist in the database and if there is a 
> > session connected in the database, the recovery process will wait for its 
> > disconnection and if it does not happen within a specific period then the 
> > recovery will fail.
> > To avoid this recovery failure we added a parameter 
> > 'kill_all_child_process_before_2nd_stage"
> >
> > if kill_all_child_process_before_2nd_stage = true/1 then
> >     kill all child process before the start of 2nd stage so that no need to 
> > wait for disconnections of active sessions
> > else
> >     wait for the disconnections of all active sessions; that is same as the 
> > Pgpool II 2.3.1
> > end if
> >
> > Default value for kill_all_child_process_before_2nd_stage is set to false/0.
> >
> > The following 3 files have been modified:
> >
> > pool.h
> > pool_config.c
> > recovery.c
> >
> >
> >
> > File : pool.h   //added the parameter
> >
> >
> >
> > typedef struct
> > {
> > ....
> > int kill_all_child_process_during_before_2nd_stage;
> > } POOL_CONFIG;
> >
> >
> > File: pool_config.c  //read the pgpool.conf value set by the pgpool 
> > administrator
> >
> >
> >
> > int pool_init_config()
> > {
> > ...
> > pool_config->kill_all_child_process_before_2nd_stage = 0;
> >
> > } //int pool_init_config()
> >
> >
> > int pool_get_config(char *confpath, POOL_CONFIG_CONTEXT context)
> > {
> > ....
> >     //kill_all_child_process_before_2nd_stage
> >     else if (!strcmp(key, "kill_all_child_process_before_2nd_stage") && 
> > CHECK_CONTEXT(INIT_CONFIG|RELOAD_CONFIG, context))
> >         {
> >             int v = eval_logical(yytext);
> >
> >             if (v < 0)
> >             {
> >                 pool_error("pool_config: invalid value %s for %s", yytext, 
> > key);
> >                 return(-1);
> >             }
> >             pool_config->kill_all_child_process_before_2nd_stage = v;
> >     } //kill_all_child_process_before_2nd_stage
> > ....
> > }//int pool_get_config
> >
> >
> > File: recovery.c //added the parameterized functionality in the block 
> > "modified here"
> >
> >
> >
> > int start_recovery(int recovery_node)
> > {
> >     ...
> >     pool_log("1st stage is done");
> >
> >     --------------- modified here -------------------------
> >         if (pool_config->kill_all_child_process_before_2nd_stage == 1)
> >                 {       /* kill all children processes */
> >
> >                         pool_log("Disconnection of all sessions before 2nd 
> > stage started");
> >                         pool_debug("pool_config 
> > =%d",pool_config->num_init_children);
> >                         int i;
> >                         for (i = 0; i < pool_config->num_init_children; i++)
> >                         {
> >                                 pid_t pid = pids[i].pid;
> >                                 if (pid)
> >                                 {
> >                                         kill(pid, SIGQUIT);
> >                                         pool_debug("Disconnect session 
> > before 2nd stage: kill %d", pid);
> >                                 }//if (pids[i].pid)
> >                         }//for (i = 0; i < pool_config->num_init_children; 
> > i++)
> >                         pool_log("Killed all active sessions");
> >                 }// if 
> > (pool_config->kill_all_child_process_before_2nd_stage == 1)
> >
> >     ------------ end of modification ---------------------------------
> >     pool_log("starting 2nd stage");
> >
> >     /* 2nd stage */
> >     *InRecovery = 1;
> >         -------- modified here ----------------
> >         pool_log("---->Waiting for all connection closed <---");
> >         --------- end of modification ----------
> >     if (wait_connection_closed() != 0)
> >     {
> >         PQfinish(conn);
> >         pool_error("start_recovery: timeover for waiting connection 
> > closed");
> >         return 1;
> >     }
> >     pool_log("all connections from clients have been closed");
> >
> >     ...
> > } //int start_recovery
> > 2. To avoid the disconnection of active sessions at Primary node when the 
> > secondary node stop communication with the primary pgpool
> >
> > With default implementation of Pgpool II 2.3.1, if either node is down 
> > pgpool kills all child processes to stop TCP/IP retrying. But this may 
> > cause problem in our environment.
> > Let a situation where secondary node somehow disconnected and stopped 
> > communication with the primary node. With default implementation, pgpool 
> > kills all active calls and other activities of our application.
> > It is not desirable. Activities at primary node must not be hampered if 
> > secondary node stops communication.
> > To avoid this, we added a parameter 
> > "restart_all_process_for_same_master_node_in_failover".
> >
> > If restart_all_process_for_same_master_node_in_failover = true/1 then
> >    restart all child processes that is same as the default with Pgpool II 
> > 2.3.1
> > else
> >    don't restart the child processes for same master node
> > end if
> >
> > Default value for restart_all_process_for_same_master_node_in_failover set 
> > to 1/true;
> >
> > Actually, here we have only enabled the old functionaluty of do not restart 
> > child process for same master node with the parameter 
> > restart_all_process_for_same_master_node_in_failover.
> >
> >
> >
> > The following 3 files have been changed:
> > pool.h
> > pool_config.c
> > main.c
> >
> >
> >
> > File: pool.h    //added the parameter
> >
> >
> >
> > typedef struct
> > {
> > ....
> > int restart_all_process_for_same_master_node_in_failover;
> > } POOL_CONFIG;
> >
> >
> > File: pool_config.c  //read the pgpool.conf value set by the pgpool 
> > administrator
> >
> >
> >
> > int pool_init_config()
> > {
> > ...
> > pool_config->restart_all_process_for_same_master_node_in_failover = 1;
> > } //int pool_init_config()
> >
> > int pool_get_config(char *confpath, POOL_CONFIG_CONTEXT context)
> > {
> > ....
> > else if (!strcmp(key, 
> > "restart_all_process_for_same_master_node_in_failover") &&
> >                  CHECK_CONTEXT(INIT_CONFIG|RELOAD_CONFIG, context))
> >         {
> >             int v = eval_logical(yytext);
> >
> >             if (v < 0)
> >             {
> >                 pool_error("pool_config: invalid value %s for %s", yytext, 
> > key);
> >                 fclose(fd);
> >                 return(-1);
> >             }
> >             
> > pool_debug("restart_all_process_for_same_master_node_in_failover: %d", v);
> >             
> > pool_config->restart_all_process_for_same_master_node_in_failover = v;
> >         }
> > ....
> >
> > } //int pool_get_config(char *confpath, POOL_CONFIG_CONTEXT context)
> >
> >
> >
> >
> > File: main.c // added the parameterized functionality; the block "modified 
> > here" indicates our modification
> >
> >
> >
> > ...
> > static void failover(void)
> > {
> >
> >     ...
> >     if (new_master == pool_config->backend_desc->num_backends)
> >     {
> >         pool_error("failover_handler: no valid DB node found");
> >     }
> >
> > /*
> >  * Before we tried to minimize restarting pgpool to protect existing
> >  * connections from clients to pgpool children. What we did here was,
> >  * if children other than master went down, we did not fail over.
> >  * This is wrong. Think about following scenario. If someone
> >  * accidentally plugs out the network cable, the TCP/IP stack keeps
> >  * retrying for long time (typically 2 hours). The only way to stop
> >  * the retry is restarting the process.  Bottom line is, we need to
> >  * restart all children in any case.  See pgpool-general list posting
> >  * "TCP connections are *not* closed when a backend timeout" on Jul 13
> >  * 2008 for more details.
> >  * It has been parameterized as people may want behavior depending on their 
> > system.
> >  */
> >
> > ----------- modified here ------------------------
> > //#ifdef NOT_USED //
> >     else
> >     {
> >         if ( 
> > pool_config->restart_all_process_for_same_master_node_in_failover == 0 &&   
> > Req_info->master_node_id == new_master && *InRecovery == 0)
> >         {
> >             pool_log("failover_handler: do not restart pgpool. same master 
> > node %d was selected", new_master);
> >             if (Req_info->kind == NODE_UP_REQUEST)
> >             {
> >                 pool_log("failback done. reconnect host %s(%d)",
> >                          BACKEND_INFO(node_id).backend_hostname,
> >                          BACKEND_INFO(node_id).backend_port);
> >             }
> >             else
> >             {
> >                 pool_log("failover done. shutdown host %s(%d)",
> >                          BACKEND_INFO(node_id).backend_hostname,
> >                          BACKEND_INFO(node_id).backend_port);
> >             }
> >
> >             /* exec failover_command */
> >             for (i = 0; i < pool_config->backend_desc->num_backends; i++)
> >             {
> >                 if (nodes[i])
> >                     trigger_failover_command(i, 
> > pool_config->failover_command);
> >             }
> >
> >             pool_semaphore_unlock(REQUEST_INFO_SEM);
> >             switching = 0;
> >             kill(pcp_pid, SIGUSR2);
> >             switching = 0;
> >             return;
> >         }
> >     }
> > //#endif //
> > ----------- end of modification ------------------------
> >
> >     /* kill all children */
> >     for (i = 0; i < pool_config->num_init_children; i++)
> >     {
> >         pid_t pid = pids[i].pid;
> >         if (pid)
> >         {
> >             kill(pid, SIGQUIT);
> >             pool_debug("failover_handler: kill %d", pid);
> >         }
> >     }
> >
> >     ...
> > } // end of failover()
> > ...
_______________________________________________
Pgpool-hackers mailing list
[email protected]
http://pgfoundry.org/mailman/listinfo/pgpool-hackers

Reply via email to