00-README.conf | 8 +-
osaf/services/infrastructure/nid/scripts/configure_tipc.in | 83 +++++++++++++-
2 files changed, 87 insertions(+), 4 deletions(-)
Issue :
---------
Currently in Opensaf duplicate tipc-id is identified on network and eventually
both controllers crashing
and Cluster is going for reboot.
steps to reproduce:
----------------------------
1. Both controllers SC-1,SC-2 are up and running with SC-1 active and SC-2
standby.
2. Wrongly configure the slot_id of PL-3 with value 3 (same value as SC-2)
3. Start the opensaf on PL-3.
Fix :
------
Added verify duplicate TIPC node_id check before configuring TIPC/starting
Opensaf
This fix is done using tipc toos Usage:
tipc-config command [command ...]
valid commands:
-addr [=<addr>] Get/set node address
-b [=<bearerpat>] Get bearers
-bd =<bearerpat> Disable bearer
-be =<bearer>[/<domain>[/<priority>]]] Enable bearer
-dest =<addr> Command destination node
-help This usage list
-i Interactive set operations
-l [=<domain>|<linkpat>] Get links to domain
-log [=<size>] Dump/resize log
-lp =<linkpat>|<bearer>|<media>/<value> Set link priority
-ls [=<linkpat>] Get link statistics
-lsr =<linkpat> Reset link statistics
-lt =<linkpat>|<bearer>|<media>/<value> Set link tolerance
-lw =<linkpat>|<bearer>|<media>/<value> Set link window
-m Get media
-max_clusters [=<value>] Get/set max clusters in own zone
-max_nodes [=<value>] Get/set max nodes in own cluster
-max_ports [=<value>] Get/set max number of ports
-max_publ [=<value>] Get/set max publications
-max_remotes [=<value>] Get/set max non-cluster neighbors
-max_subscr [=<value>] Get/set max subscriptions
-max_zones [=<value>] Get/set max zones in own network
-mng [=enable|disable] Get/set remote management
-n [=<domain>] Get nodes in domain
-netid[=<value>] Get/set network id
-nt [=[<depth>,]<type>[,<low>[,<up>]]] Get name table
where <depth> = types|names|ports|all
-p Get port info
-r [=<domain>] Get routes to domain
-s Get TIPC status info
-v Verbose output
-V Get tipc-config version info
(tipc-config -nt option ) and
with in the scope of Opensaf startup scripts.
Note : This bug can alos fixed TIPC code , once it is available we can remove
this code.
We wishes to check whether the duplicate nodes are present in the cluster
before adding Opensaf newel configured node, this is accomplished by
adding a dummy node to the TIPC topology and this node has very short time
limit but will not match
any other Opensaf Node configuration.
The use of a dummy Node having the name sequence {1,1,2000} , this allows the
Opensaf to find the information at start-up by getting existing name table of
cluster
by using TIPC module not installed command.
diff --git a/00-README.conf b/00-README.conf
--- a/00-README.conf
+++ b/00-README.conf
@@ -128,13 +128,17 @@ Notes:
as: $ configure_tipc start <interface name> <TIPC netid>
For eg:- $ configure_tipc start eth0 9859
-(f) Setting MDS_TIPC_MCAST_ENABLED to 1 or 0, allows OpenSAF to enable or
+(h) Setting MDS_TIPC_MCAST_ENABLED to 1 or 0, allows OpenSAF to enable or
disable TIPC Multicast Messaging and this configuration is valid when
MDS_TRANSPORT is set to TIPC. By Default TIPC Multicast Messaging is
Enabled.
Note: In case of TIPC Multicast Messaging disabled (0), the performance
of OpenSAF will be considerably lower compared to Enabled (1).
-
+
+(i) To use TIPC duplicate node address detection in cluster, while starting
Opensaf
+ we needs to enabled TIPC_DUPLICATE_NODE_DETECT=YES in
+ `/usr/lib(64)/opensaf/configure_tipc` script.
+
*******************************************************************************
nodeinit.conf.<node_type>
diff --git a/osaf/services/infrastructure/nid/scripts/configure_tipc.in
b/osaf/services/infrastructure/nid/scripts/configure_tipc.in
--- a/osaf/services/infrastructure/nid/scripts/configure_tipc.in
+++ b/osaf/services/infrastructure/nid/scripts/configure_tipc.in
@@ -35,6 +35,16 @@ SUBSLOT_ID_FILE=$pkgsysconfdir/subslot_i
# of the address and the slot ID is shifted up 4 bits.
USE_SUBSLOT_ID=${TIPC_USE_SUBSLOT_ID:-"NO"}
+# Currently supported max nodes for Opensaf
+TIPC_MAX_NODES=2000
+
+# Support for enable disable duplicate node verification.
+# When TIPC_DUPLICATE_NODE_DETECT is set to "YES" (the default is "YES"),
+# at Opensaf start-up , it is per-verified whether any duplicate node
+# exist in the cluster, set TIPC_DUPLICATE_NODE_DETECT set to NO to disable
+# per-verification of duplicate node.
+DUPLICATE_NODE_DETECT=${TIPC_DUPLICATE_NODE_DETECT:-"YES"}
+
# constants
SHIFT4=4
@@ -112,6 +122,72 @@ else
TIPC_NODEID=$SLOT_ID
fi
+function tipc_duplicate_node_detect ()
+{
+ logger -t opensaf -s "Checking for duplicate Node: $TIPC_NODEID in
Cluster..."
+ if ! test -f "$TIPC_MODULE" ; then
+ modprobe tipc
+ else
+ insmod "$TIPC_MODULE"
+ fi
+
+ ret_val=$?
+ if [ $ret_val -ne 0 ] ; then
+ logger -p user.err " TIPC Module could not be loaded "
+ exit 1
+ fi
+
+ # max_nodes is not supported in TIPC 2.0
+ if ${tipc_config} -max_nodes 2> /dev/null; then
+ ${tipc_config} -max_nodes=$TIPC_MAX_NODES
+ ret_val=$?
+ if [ $ret_val -ne 0 ] ; then
+ modprobe -r tipc
+ exit 1
+ fi
+ fi
+
+ if [ $# -eq 1 ] ; then
+ ################ Address config and check #########
+ ${tipc_config} -a=1.1.$TIPC_MAX_NODES
+ ret_z1=$?
+ if [ $ret_z1 -ne 0 ] ; then
+ echo "Unable to Configure TIPC address, Please try again,
exiting"
+ modprobe -r tipc
+ exit 1
+ fi
+ else
+ ${tipc_config} -netid=$TIPC_NETID -a=1.1.$TIPC_MAX_NODES
+ ret_z2=$?
+ if [ $ret_z2 -ne 0 ] ; then
+ echo "Unable to Configure TIPC address, Please try again, exiting"
+ modprobe -r tipc
+ exit 1
+ fi
+ ${tipc_config} -be=$(echo $ETH_NAME | sed 's/^/eth:/;s/,/,eth:/g')
+ ret_z3=$?
+ if [ $ret_z3 -ne 0 ] ; then
+ echo "Unable to Configure TIPC bearer interface, Please try again,
exiting"
+ modprobe -r tipc
+ exit 1
+ else
+ ${tipc_config} -nt | grep cluster | grep "1.1.$TIPC_NODEID:" >
/dev/null
+ ret_z4=$?
+ if [ $ret_z4 -eq 0 ] ; then
+ logger -t opensaf -s "Unable to Configure TIPC Node, Duplicate
Node $TIPC_NODEID exist in cluster, exiting..."
+ modprobe -r tipc
+ exit 1
+ else
+ if ! test -f "$TIPC_MODULE" ; then
+ modprobe -r tipc
+ else
+ rmmod "$TIPC_MODULE"
+ fi
+ fi
+ fi
+ fi
+}
+
function tipc_configure ()
{
echo "Inserting TIPC mdoule..."
@@ -130,10 +206,10 @@ function tipc_configure ()
# max_nodes is not supported in TIPC 2.0
if ${tipc_config} -max_nodes 2> /dev/null; then
- ${tipc_config} -max_nodes=2000
+ ${tipc_config} -max_nodes=$TIPC_MAX_NODES
ret_val=$?
if [ $ret_val -ne 0 ] ; then
- echo "Unable to set the Max_nodes to 2000, exiting ....."
+ echo "Unable to set the Max_nodes to $TIPC_MAX_NODES, exiting
....."
modprobe -r tipc
exit 1
fi
@@ -177,6 +253,9 @@ function tipc_configure ()
# Consider that TIPC could be statically linked
if ! grep TIPC /proc/net/protocols >& /dev/null; then
+ if [ "$DUPLICATE_NODE_DETECT" = "YES" ]; then
+ tipc_duplicate_node_detect
+ fi
tipc_configure
else
# TIPC is already present, is it configured properly?
------------------------------------------------------------------------------
Monitor Your Dynamic Infrastructure at Any Scale With Datadog!
Get real-time metrics from all of your servers, apps and tools
in one place.
SourceForge users - Click here to start your Free Trial of Datadog now!
http://pubads.g.doubleclick.net/gampad/clk?id=241902991&iu=/4140
_______________________________________________
Opensaf-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/opensaf-devel