On Wed, Feb 22, 2023 at 10:35 PM Han Zhou <hz...@ovn.org> wrote: > > Today ARP resolve flows (static mac-bindings) are programmed for virtual > ports once their virtual parent is claimed. As a result, during virtual > parent failover, the traffic won't switch to the new parent until the > ARP resolve flow is updated by ovn-northd, triggered by the port-binding > update. When the scale is big, the ovn-northd compute may take a long > time, e.g. 10s or even more, which is too long data plane break during > virtual parent failover. > > This patch removes the dependency of ovn-northd from the failover > scenario by removing the ARP resolve flows, so that it relies on dynamic > mac-bindings to resolve virtual parent's MAC for the virtual IP. This > avoids the logical flow recompute during failover thus make it much > faster. Functionally there is no difference. > > Signed-off-by: Han Zhou <hz...@ovn.org> > --- > northd/northd.c | 103 ---------------------------------------- > northd/ovn-northd.8.xml | 37 +++------------ > tests/ovn.at | 55 ++------------------- > tests/system-ovn.at | 1 - > 4 files changed, 11 insertions(+), 185 deletions(-) > > diff --git a/northd/northd.c b/northd/northd.c > index 770a5b50e2c0..65cfb7975d1b 100644 > --- a/northd/northd.c > +++ b/northd/northd.c > @@ -12455,109 +12455,6 @@ build_arp_resolve_flows_for_lrouter_port( > } > } > } > - } else if (op->od->n_router_ports && !lsp_is_router(op->nbsp) > - && !strcmp(op->nbsp->type, "virtual")) { > - /* This is a virtual port. Add ARP replies for the virtual ip with > - * the mac of the present active virtual parent. > - * If the logical port doesn't have virtual parent set in > - * Port_Binding table, then add the flow to set eth.dst to > - * 00:00:00:00:00:00 and advance to next table so that ARP is > - * resolved by router pipeline using the arp{} action. > - * The MAC_Binding entry for the virtual ip might be invalid. */ > - > - const char *vip = smap_get(&op->nbsp->options, > - "virtual-ip"); > - const char *virtual_parents = smap_get(&op->nbsp->options, > - "virtual-parents"); > - > - if (!vip || !virtual_parents || !op->sb) { > - return; > - } > - > - bool is_ipv4 = strchr(vip, '.') ? true : false; > - if (is_ipv4) { > - ovs_be32 ipv4; > - if (!ip_parse(vip, &ipv4)) { > - return; > - } > - } else { > - struct in6_addr ipv6; > - if (!ipv6_parse(vip, &ipv6)) { > - return; > - } > - } > - > - if (!op->sb->virtual_parent || !op->sb->virtual_parent[0] || > - !op->sb->chassis) { > - /* The virtual port is not claimed yet. */ > - for (size_t i = 0; i < op->od->n_router_ports; i++) { > - struct ovn_port *peer = ovn_port_get_peer( > - ports, op->od->router_ports[i]); > - if (!peer || !peer->nbrp) { > - continue; > - } > - > - if (find_lrp_member_ip(peer, vip)) { > - ds_clear(match); > - ds_put_format( > - match, "outport == %s && " "%s == %s", peer->json_key, > - is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6, vip); > - > - const char *arp_actions = > - "eth.dst = 00:00:00:00:00:00; next;"; > - ovn_lflow_add_with_hint(lflows, peer->od, > - S_ROUTER_IN_ARP_RESOLVE, 100, > - ds_cstr(match), > - arp_actions, > - &op->nbsp->header_); > - break; > - } > - } > - } else { > - struct ovn_port *vp = > - ovn_port_find(ports, op->sb->virtual_parent); > - if (!vp || !vp->nbsp) { > - return; > - } > - > - for (size_t i = 0; i < vp->n_lsp_addrs; i++) { > - bool found_vip_network = false; > - const char *ea_s = vp->lsp_addrs[i].ea_s; > - for (size_t j = 0; j < vp->od->n_router_ports; j++) { > - /* Get the Logical_Router_Port that the > - * Logical_Switch_Port is connected to, as > - * 'peer'. */ > - struct ovn_port *peer = > - ovn_port_get_peer(ports, vp->od->router_ports[j]); > - if (!peer || !peer->nbrp) { > - continue; > - } > - > - if (!find_lrp_member_ip(peer, vip)) { > - continue; > - } > - > - ds_clear(match); > - ds_put_format( > - match, "outport == %s && " "%s == %s", peer->json_key, > - is_ipv4 ? REG_NEXT_HOP_IPV4 : REG_NEXT_HOP_IPV6, vip); > - > - ds_clear(actions); > - ds_put_format(actions, "eth.dst = %s; next;", ea_s); > - ovn_lflow_add_with_hint(lflows, peer->od, > - S_ROUTER_IN_ARP_RESOLVE, 100, > - ds_cstr(match), > - ds_cstr(actions), > - &op->nbsp->header_); > - found_vip_network = true; > - break; > - } > - > - if (found_vip_network) { > - break; > - } > - } > - } > } else if (lsp_is_router(op->nbsp)) { > /* This is a logical switch port that connects to a router. */ > > diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml > index 2eab2c4ae094..574d358c570c 100644 > --- a/northd/ovn-northd.8.xml > +++ b/northd/ovn-northd.8.xml > @@ -4206,9 +4206,13 @@ outport = <var>P</var> > data in the <code>OVN_Northbound</code> database. For router ports > connected to logical switches, MAC bindings can be known statically > from the <code>addresses</code> column in the > - <code>Logical_Switch_Port</code> table. For router ports > - connected to other logical routers, MAC bindings can be known > - statically from the <code>mac</code> and <code>networks</code> > + <code>Logical_Switch_Port</code> table. (Note: the flow is not > + installed for IPs of logical switch ports of type > + <code>virtual</code>, and dynamic MAC binding is used for those IPs > + instead, so that virtual parent failover does not depend on <code> > + ovn-northd</code>, to achieve better failover performance.) For > + router ports connected to other logical routers, MAC bindings can be > + known statically from the <code>mac</code> and <code>networks</code> > column in the <code>Logical_Router_Port</code> table. (Note: the > flow is NOT installed for the IP addresses that belong to a neighbor > logical router port if the current router has the > @@ -4223,33 +4227,6 @@ outport = <var>P</var> > <code>eth.dst = <var>E</var>; next;</code>. > </p> > > - <p> > - For each virtual ip <var>A</var> configured on a logical port > - of type <code>virtual</code> and its virtual parent set in > - its corresponding <ref db="OVN_Southbound" table="Port_Binding"/> > - record and the virtual parent with the Ethernet address <var>E</var> > - and the virtual ip is reachable via the router port <var>P</var>, a > - priority-100 flow with match <code>outport === <var>P</var> > - && xxreg0/reg0 == <var>A</var></code> has actions > - <code>eth.dst = <var>E</var>; next;</code>. > - </p> > - > - <p> > - For each virtual ip <var>A</var> configured on a logical port > - of type <code>virtual</code> and its virtual parent <code>not</code> > - set in its corresponding > - <ref db="OVN_Southbound" table="Port_Binding"/> > - record and the virtual ip <var>A</var> is reachable via the > - router port <var>P</var>, a > - priority-100 flow with match <code>outport === <var>P</var> > - && xxreg0/reg0 == <var>A</var></code> has actions > - <code>eth.dst = <var>00:00:00:00:00:00</var>; next;</code>. > - This flow is added so that the ARP is always resolved for the > - virtual ip <var>A</var> by generating ARP request and > - <code>not</code> consulting the MAC_Binding table as it can have > - incorrect value for the virtual ip <var>A</var>. > - </p> > - > <p> > For each IPv6 address <var>A</var> whose host is known to have > Ethernet address <var>E</var> on router port <var>P</var>, a > diff --git a/tests/ovn.at b/tests/ovn.at > index e7542db42503..fc0428a84adb 100644 > --- a/tests/ovn.at > +++ b/tests/ovn.at > @@ -20966,16 +20966,6 @@ AT_CHECK([grep ls_in_arp_rsp sw0-flows | grep bind_vport | sed 's/table=../table > ovn-sbctl dump-flows lr0 > lr0-flows > AT_CAPTURE_FILE([lr0-flows]) > > -# Since the sw0-vir is not claimed by any chassis, eth.dst should be set to > -# zero if the ip4.dst is the virtual ip in the router pipeline. > -AT_CHECK([grep lr_in_arp_resolve lr0-flows | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) > -]) > - > -AT_CHECK([grep lr_in_arp_resolve lr0-flows | grep "xxreg0 == 1000::61d1" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && xxreg0 == 1000::61d1), action=(eth.dst = 00:00:00:00:00:00; next;) > -]) > - > hv1_ch_uuid=`ovn-sbctl --bare --columns _uuid find chassis name="hv1"` > hv2_ch_uuid=`ovn-sbctl --bare --columns _uuid find chassis name="hv2"` > > @@ -21053,17 +21043,8 @@ check_row_count Port_Binding 1 logical_port=sw0-vir6 virtual_parent=sw0-p1 > wait_for_ports_up sw0-vir6 > check ovn-nbctl --wait=hv sync > > -# There should be an arp resolve flow to resolve the virtual_ip with the > -# sw0-p1's MAC. > ovn-sbctl dump-flows lr0 > lr0-flows2 > AT_CAPTURE_FILE([lr0-flows2]) > -AT_CHECK([grep lr_in_arp_resolve lr0-flows2 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) > -]) > - > -AT_CHECK([grep lr_in_arp_resolve lr0-flows2 | grep "xxreg0 == 1000::61d1" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && xxreg0 == 1000::61d1), action=(eth.dst = 50:54:00:00:00:03; next;) > -]) > > # hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and > # arp responder flow in lr0 pipeline. > @@ -21185,15 +21166,10 @@ logical_port=sw0-vir) = xsw0-p3]) > > wait_for_ports_up sw0-vir > > -# There should be an arp resolve flow to resolve the virtual_ip with the > -# sw0-p3's MAC. > check ovn-nbctl --wait=hv sync > ovn-sbctl dump-flows lr0 > lr0-flows3 > AT_CAPTURE_FILE([lr0-flows3]) > cp ovn-sb/ovn-sb.db lr0-flows3.db > -AT_CHECK([grep lr_in_arp_resolve lr0-flows3 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:05; next;) > -]) > > # hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and > # arp responder flow in lr0 pipeline. > @@ -21218,14 +21194,9 @@ logical_port=sw0-vir) = xsw0-p2]) > > wait_for_ports_up sw0-vir > > -# There should be an arp resolve flow to resolve the virtual_ip with the > -# sw0-p2's MAC. > check ovn-nbctl --wait=hv sync > ovn-sbctl dump-flows lr0 > lr0-flows4 > AT_CAPTURE_FILE([lr0-flows4]) > -AT_CHECK([grep lr_in_arp_resolve lr0-flows4 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) > -]) > > # hv2 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and > # arp responder flow in lr0 pipeline. > @@ -21251,13 +21222,10 @@ logical_port=sw0-vir) = xsw0-p1]) > > wait_for_ports_up sw0-vir > > +check ovn-nbctl --wait=hv sync > ovn-sbctl dump-flows lr0 > lr0-flows5 > AT_CAPTURE_FILE([lr0-flows5]) > -AT_CHECK([grep lr_in_arp_resolve lr0-flows5 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:03; next;) > -]) > > -check ovn-nbctl --wait=hv sync > # hv1 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and > # arp responder flow in lr0 pipeline. > check_virtual_offlows_present hv1 > @@ -21277,15 +21245,10 @@ logical_port=sw0-vir) = x]) > > wait_row_count nb:Logical_Switch_Port 1 up=false name=sw0-vir > > -# Since the sw0-vir is not claimed by any chassis, eth.dst should be set to > -# zero if the ip4.dst is the virtual ip. > +check ovn-nbctl --wait=hv sync > ovn-sbctl dump-flows lr0 > lr0-flows6 > AT_CAPTURE_FILE([lr0-flows6]) > -AT_CHECK([grep lr_in_arp_resolve lr0-flows6 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) > -]) > > -check ovn-nbctl --wait=hv sync > # hv1 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir and > # arp responder flow in lr0 pipeline. > check_virtual_offlows_not_present hv1 > @@ -21311,13 +21274,10 @@ logical_port=sw0-vir) = xsw0-p2]) > > wait_for_ports_up sw0-vir > > +check ovn-nbctl --wait=hv sync > ovn-sbctl dump-flows lr0 > lr0-flows7 > AT_CAPTURE_FILE([lr0-flows7]) > -AT_CHECK([grep lr_in_arp_resolve lr0-flows7 | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 50:54:00:00:00:04; next;) > -]) > > -check ovn-nbctl --wait=hv sync > # hv2 should add the flow for the ACL with is_chassis_redirect check for sw0-vir and > # arp responder flow in lr0 pipeline. > check_virtual_offlows_present hv2 > @@ -21346,6 +21306,7 @@ AT_CHECK([grep ls_in_arp_rsp sw0-flows2 | grep bind_vport], [1]) > # Add back virtual_ip and clear virtual_parents. > ovn-nbctl --wait=hv set logical_switch_port sw0-vir options:virtual-ip=10.0.0.10 > > +check ovn-nbctl --wait=hv sync > ovn-sbctl dump-flows sw0 > sw0-flows3 > AT_CAPTURE_FILE([sw0-flows3]) > AT_CHECK([grep ls_in_arp_rsp sw0-flows3 | grep bind_vport | sed 's/table=../table=??/'], [0], [dnl > @@ -21353,7 +21314,6 @@ AT_CHECK([grep ls_in_arp_rsp sw0-flows3 | grep bind_vport | sed 's/table=../tabl > table=??(ls_in_arp_rsp ), priority=100 , match=(inport == "sw0-p3" && ((arp.op == 1 && arp.spa == 10.0.0.10 && arp.tpa == 10.0.0.10) || (arp.op == 2 && arp.spa == 10.0.0.10))), action=(bind_vport("sw0-vir", inport); next;) > ]) > > -check ovn-nbctl --wait=hv sync > # hv2 should remove the flow for the ACL with is_chassis_redirect check for sw0-vir and > # arp responder flow in lr0 pipeline. > check_virtual_offlows_not_present hv2 > @@ -21368,7 +21328,6 @@ AT_CHECK([grep ls_in_arp_rsp sw0-flows4 | grep bind_vport], [1]) > > ovn-sbctl dump-flows lr0 > lr0-flows8 > AT_CAPTURE_FILE([lr0-flows8]) > -AT_CHECK([grep lr_in_arp_resolve lr0-flows8 | grep "reg0 == 10.0.0.10"], [1]) > > # Delete sw0-vir and add again. > ovn-nbctl lsp-del sw0-vir > @@ -21396,12 +21355,6 @@ AT_CHECK([grep ls_in_arp_rsp sw0-flows | grep bind_vport | sed 's/table=../table > ovn-sbctl dump-flows lr0 > lr0-flows > AT_CAPTURE_FILE([lr0-flows]) > > -# Since the sw0-vir is not claimed by any chassis, eth.dst should be set to > -# zero if the ip4.dst is the virtual ip in the router pipeline. > -AT_CHECK([grep lr_in_arp_resolve lr0-flows | grep "reg0 == 10.0.0.10" | sed 's/table=../table=??/'], [0], [dnl > - table=??(lr_in_arp_resolve ), priority=100 , match=(outport == "lr0-sw0" && reg0 == 10.0.0.10), action=(eth.dst = 00:00:00:00:00:00; next;) > -]) > - > OVN_CLEANUP([hv1], [hv2]) > AT_CLEANUP > ]) > diff --git a/tests/system-ovn.at b/tests/system-ovn.at > index cccb8ec4aa95..0880c8d94b34 100644 > --- a/tests/system-ovn.at > +++ b/tests/system-ovn.at > @@ -10694,7 +10694,6 @@ start_daemon ovn-controller > > # Add routers > check ovn-nbctl lr-add lr1 > -check ovn-nbctl set logical_router lr1 options:always_learn_from_arp_request=false
Sorry, this line was a mistake. It shouldn't be necessary even in patch 2. I will move this line removal to patch 2 in the next version (or before merging). > > # Add switches > check ovn-nbctl ls-add public1 > -- > 2.30.2 > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev