Hello,
I've discovered a bug in the bonding module of the Linux Kernel, which
appears
only in bonding-mode balance-alb.
Description:
You have to setup a box with at least two NICs, a bonding device
enslaving
those, assign at least two IPs to the bond and make some traffic from a
different machine to one of those IPs.
If you delete that IP, the box will regardlessly send ARP-replies to the
machine which communicated to that IP before removing it.
This comes from the rx_hashtbl and the receive load balancing algorithm.
The bug is very serious if bonding is used in a cluster-environment
using
two nodes which are connected to the same subnet. If an IP-bound service
has
to failover to the other node, the old node would announce its
MAC-address
for the IP which isn't owned by the node anymore. So client-traffic in
the
same net would hit the old node.
A possible workaround could be the usage of balance-tlb instead of
balance-alb.
I've made a little patch which removes every entry from the rx_hashtbl, if
the
according IP is removed from the bond. The patch was made for Linux Kernel
version 2.6.19.
---8---
diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.c
linux/drivers/net/bonding/bond_alb.c
--- linux-2.6.19/drivers/net/bonding/bond_alb.c 2006-11-29
22:57:37.0 +0100
+++ linux/drivers/net/bonding/bond_alb.c2007-01-16
17:23:53.0 +0100
@@ -1677,3 +1677,38 @@
}
}
+void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip) {
+ struct alb_bond_info *bond_info = (BOND_ALB_INFO(bond));
+ u32 curr_index;
+
+ dprintk(%s: removing entries from rx_hashtbl for IP %lx\n,
bond-dev-name, ip);
+ _lock_rx_hashtbl(bond);
+
+ curr_index = bond_info-rx_hashtbl_head;
+ while (curr_index != RLB_NULL_INDEX) {
+ struct rlb_client_info *curr =
(bond_info-rx_hashtbl[curr_index]);
+ u32 next_index = bond_info-rx_hashtbl[curr_index].next;
+ u32 prev_index = bond_info-rx_hashtbl[curr_index].prev;
+
+ if (curr-ip_src == ip) {
+ dprintk(%s: entry %u matched\n, bond-dev-name,
curr_index);
+
+ if (curr_index == bond_info-rx_hashtbl_head) {
+ bond_info-rx_hashtbl_head = next_index;
+ }
+ if (prev_index != RLB_NULL_INDEX) {
+ bond_info-rx_hashtbl[prev_index].next =
next_index;
+ }
+ if (next_index != RLB_NULL_INDEX) {
+ bond_info-rx_hashtbl[next_index].prev =
prev_index;
+ }
+
+ rlb_init_table_entry(curr);
+ }
+
+ curr_index = next_index;
+ }
+
+ _unlock_rx_hashtbl(bond);
+}
+
diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.h
linux/drivers/net/bonding/bond_alb.h
--- linux-2.6.19/drivers/net/bonding/bond_alb.h 2006-11-29
22:57:37.0 +0100
+++ linux/drivers/net/bonding/bond_alb.h2007-01-16
17:23:53.0 +0100
@@ -128,5 +128,6 @@
void bond_alb_monitor(struct bonding *bond);
int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
+void bond_alb_remove_ip_from_rlb(struct bonding *bond, u32 ip);
#endif /* __BOND_ALB_H__ */
diff -ur linux-2.6.19/drivers/net/bonding/bond_main.c
linux/drivers/net/bonding/bond_main.c
--- linux-2.6.19/drivers/net/bonding/bond_main.c2006-11-29
22:57:37.0 +0100
+++ linux/drivers/net/bonding/bond_main.c 2007-01-16
17:30:49.0 +0100
@@ -3356,6 +3356,12 @@
return NOTIFY_OK;
case NETDEV_DOWN:
bond-master_ip =
bond_glean_dev_ip(bond-dev);
+
+ /* remove IP from RLB hashtable if using
balance-alb mode: */
+ if (bond-params.mode == BOND_MODE_ALB) {
+ bond_alb_remove_ip_from_rlb(bond,
ifa-ifa_local);
+ }
+
return NOTIFY_OK;
default:
return NOTIFY_DONE;
---8---
The function bond_alb_remove_ip_from_rlb is heavily based on the function
rlb_clear_vlan.
And here's a useful patch for debugging purposes (it outputs the rx_hashtbl
in
the proc-file of the bond):
---8---
diff -ur linux-2.6.19/drivers/net/bonding/bond_alb.c
linux/drivers/net/bonding/bond_alb.c
--- linux-2.6.19/drivers/net/bonding/bond_alb.c 2007-01-16
18:59:32.0 +0100
+++ linux/drivers/net/bonding/bond_alb.c2007-01-16
18:48:15.0 +0100
@@ -26,6 +26,7 @@
#include linux/netdevice.h
#include linux/etherdevice.h
#include linux/pkt_sched.h
+#include linux/seq_file.h
#include linux/spinlock.h
#include linux/slab.h