> On 11/15/22 10:44, Lorenzo Bianconi wrote: > >> On Mon, Oct 24, 2022 at 11:29 AM Lorenzo Bianconi < > >> lorenzo.bianc...@redhat.com> wrote: > >> > >>> Improve buffered packet management in ovn-controller avoid useless loop > >>> in run_buffered_binding routine and using datapath key and output port > >>> key as buffered_packets_map hashmap hash. Add new selftest for buffered > >>> packets. > >>> > >>> Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com> > >>> --- > > Hi Lorenzo, > > Thanks for the patch! I have a few comments below. > > >>> Changes since v2: > >>> - improve hash function > >>> Changes since v1: > >>> - improve code readability > >>> --- > >>> controller/pinctrl.c | 118 ++++++++++++++++++++++++++++------------ > >>> tests/system-ovn.at | 124 +++++++++++++++++++++++++++++++++++++++++++ > >>> 2 files changed, 208 insertions(+), 34 deletions(-) > >>> > >>> diff --git a/controller/pinctrl.c b/controller/pinctrl.c > >>> index 8859cb080..dfcd0cea8 100644 > >>> --- a/controller/pinctrl.c > >>> +++ b/controller/pinctrl.c > >>> @@ -182,6 +182,7 @@ static void destroy_buffered_packets_map(void); > >>> static void > >>> run_buffered_binding(struct ovsdb_idl_index > >>> *sbrec_mac_binding_by_lport_ip, > >>> struct ovsdb_idl_index > >>> *sbrec_port_binding_by_datapath, > >>> + struct ovsdb_idl_index *sbrec_port_binding_by_name, > >>> const struct hmap *local_datapaths) > >>> OVS_REQUIRES(pinctrl_mutex); > >>> > >>> @@ -1430,6 +1431,9 @@ struct buffered_packets { > >>> struct in6_addr ip; > >>> struct eth_addr ea; > >>> > >>> + uint64_t dp_key; > >>> + uint64_t port_key; > >>> + > >>> long long int timestamp; > >>> > >>> struct buffer_info data[BUFFER_QUEUE_DEPTH]; > >>> @@ -1556,38 +1560,38 @@ buffered_packets_map_gc(void) > >>> } > >>> > >>> static struct buffered_packets * > >>> -pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash) > >>> +pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key, > >>> + uint32_t hash) > >>> { > >>> struct buffered_packets *qp; > >>> - > >>> - HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, > >>> - &buffered_packets_map) { > >>> - if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) { > >>> + HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, &buffered_packets_map) > >>> { > >>> + if (qp->dp_key == dp_key && qp->port_key == port_key) { > >>> return qp; > >>> } > >>> } > >>> return NULL; > >>> } > >>> > >>> +static uint32_t > >>> +pinctrl_buffer_apcket_hash(uint64_t dp_key, uint64_t port_key) > >>> > >> > >> nit: typo in the function name "pinctrl_buffer_apcket_hash" -> > >> "pinctrl_buffer_packet_hash". > >> I guess this can be fixed during merge, let's see if others agree. > >> > >> > >>> +{ > >>> + uint32_t hash = 0; > >>> + hash = hash_add64(hash, port_key); > >>> + hash = hash_add64(hash, dp_key); > >>> + return hash_finish(hash, 16); > >>> +} > >>> + > >>> /* Called with in the pinctrl_handler thread context. */ > >>> static int > >>> pinctrl_handle_buffered_packets(struct dp_packet *pkt_in, > >>> const struct match *md, bool is_arp) > >>> OVS_REQUIRES(pinctrl_mutex) > >>> { > >>> - struct buffered_packets *bp; > >>> - struct dp_packet *clone; > >>> - struct in6_addr addr; > >>> - > >>> - if (is_arp) { > >>> - addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); > >>> - } else { > >>> - ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0)); > >>> - memcpy(&addr, &ip6, sizeof addr); > >>> - } > >>> - > >>> - uint32_t hash = hash_bytes(&addr, sizeof addr, 0); > >>> - bp = pinctrl_find_buffered_packets(&addr, hash); > >>> + uint64_t dp_key = ntohll(md->flow.metadata); > >>> + uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0]; > >>> + uint32_t hash = pinctrl_buffer_apcket_hash(dp_key, oport_key); > >>> + struct buffered_packets *bp > >>> + = pinctrl_find_buffered_packets(dp_key, oport_key, hash); > >>> if (!bp) { > >>> if (hmap_count(&buffered_packets_map) >= 1000) { > > Before your patch we would hit this hard coded limit if there were 1000 > next hops for which we were bufferring packets. > > Now, with your change, we will hit his limit only if there are 1000 > ports for which we are bufferring packets. This seems very unlikely and > I wonder if this will ever happen in practice. Do we need to change its > value? Also, let's add a define for it somewhere to make it a bit less > "magic".
ack. I would prefer to be a bit more conservative and keep the condition. What is the right value to use up to you? :) > > >>> COVERAGE_INC(pinctrl_drop_buffered_packets_map); > >>> @@ -1597,12 +1601,20 @@ pinctrl_handle_buffered_packets(struct dp_packet > >>> *pkt_in, > >>> bp = xmalloc(sizeof *bp); > >>> hmap_insert(&buffered_packets_map, &bp->hmap_node, hash); > >>> bp->head = bp->tail = 0; > >>> - bp->ip = addr; > >>> + if (is_arp) { > >>> + bp->ip = in6_addr_mapped_ipv4(htonl(md->flow.regs[0])); > >>> + } else { > >>> + ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0)); > >>> + memcpy(&bp->ip, &ip6, sizeof bp->ip); > >>> + } > >>> + bp->dp_key = dp_key; > >>> + bp->port_key = oport_key; > >>> } > >>> bp->timestamp = time_msec(); > >>> /* clone the packet to send it later with correct L2 address */ > >>> - clone = dp_packet_clone_data(dp_packet_data(pkt_in), > >>> - dp_packet_size(pkt_in)); > >>> + struct dp_packet *clone > >>> + = dp_packet_clone_data(dp_packet_data(pkt_in), > >>> + dp_packet_size(pkt_in)); > >>> buffered_push_packet(bp, clone, md); > >>> > >>> return 0; > >>> @@ -3586,6 +3598,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, > >>> sbrec_ip_multicast_opts); > >>> run_buffered_binding(sbrec_mac_binding_by_lport_ip, > >>> sbrec_port_binding_by_datapath, > >>> + sbrec_port_binding_by_name, > >>> local_datapaths); > >>> sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, > >>> sbrec_port_binding_by_name, > >>> chassis); > >>> @@ -4354,15 +4367,28 @@ run_put_mac_bindings(struct ovsdb_idl_txn > >>> *ovnsb_idl_txn, > >>> } > >>> } > >>> > >>> +static struct buffered_packets * > >>> +pinctrl_get_buffered_packets(uint64_t dp_key, uint64_t port_key) > >>> +{ > >>> + uint32_t hash = pinctrl_buffer_apcket_hash(dp_key, port_key); > >>> + return pinctrl_find_buffered_packets(dp_key, port_key, hash); > >>> +} > >>> + > >>> static void > >>> run_buffered_binding(struct ovsdb_idl_index > >>> *sbrec_mac_binding_by_lport_ip, > >>> struct ovsdb_idl_index > >>> *sbrec_port_binding_by_datapath, > >>> + struct ovsdb_idl_index *sbrec_port_binding_by_name, > >>> const struct hmap *local_datapaths) > >>> OVS_REQUIRES(pinctrl_mutex) > >>> { > >>> const struct local_datapath *ld; > >>> bool notify = false; > >>> > >>> + if (!hmap_count(&buffered_packets_map)) { > >>> + /* No work to do. */ > >>> + return; > >>> + } > >>> + > >>> HMAP_FOR_EACH (ld, hmap_node, local_datapaths) { > >>> /* MAC_Binding.logical_port will always belong to a > >>> * a router datapath. Hence we can skip logical switch > >>> @@ -4381,20 +4407,44 @@ run_buffered_binding(struct ovsdb_idl_index > >>> *sbrec_mac_binding_by_lport_ip, > >>> if (strcmp(pb->type, "patch") && strcmp(pb->type, > >>> "l3gateway")) { > >>> continue; > >>> } > >>> - struct buffered_packets *cur_qp; > >>> - HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) > >>> { > >>> - struct ds ip_s = DS_EMPTY_INITIALIZER; > >>> - ipv6_format_mapped(&cur_qp->ip, &ip_s); > >>> - const struct sbrec_mac_binding *b = mac_binding_lookup( > >>> - sbrec_mac_binding_by_lport_ip, pb->logical_port, > >>> - ds_cstr(&ip_s)); > >>> - if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT, > >>> - ETH_ADDR_SCAN_ARGS(cur_qp->ea))) { > >>> - hmap_remove(&buffered_packets_map, > >>> &cur_qp->hmap_node); > >>> - ovs_list_push_back(&buffered_mac_bindings, > >>> &cur_qp->list); > >>> - notify = true; > >>> + > >>> + struct buffered_packets *cur_qp > >>> + = pinctrl_get_buffered_packets(ld->datapath->tunnel_key, > >>> + pb->tunnel_key); > >>> + if (!cur_qp) { > >>> + /* If primary lookup fails, check chassisredirect port > >>> + * for distributed gw router port use-case. */ > >>> + char *redirect_name = xasprintf("cr-%s", > >>> pb->logical_port); > >>> + const struct sbrec_port_binding *cr_pb > >>> + = lport_lookup_by_name(sbrec_port_binding_by_name, > >>> + redirect_name); > >>> + free(redirect_name); > >>> + if (cr_pb) { > >>> + cur_qp = pinctrl_get_buffered_packets( > >>> + ld->datapath->tunnel_key, cr_pb->tunnel_key); > >>> } > >>> - ds_destroy(&ip_s); > >>> + } > >>> + > >>> + if (!cur_qp) { > >>> + continue; > >>> + } > >>> + > >>> + struct ds ip_s = DS_EMPTY_INITIALIZER; > >>> + ipv6_format_mapped(&cur_qp->ip, &ip_s); > >>> + const struct sbrec_mac_binding *b = mac_binding_lookup( > >>> + sbrec_mac_binding_by_lport_ip, pb->logical_port, > >>> + ds_cstr(&ip_s)); > >>> + if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT, > >>> + ETH_ADDR_SCAN_ARGS(cur_qp->ea))) { > >>> + hmap_remove(&buffered_packets_map, &cur_qp->hmap_node); > >>> + ovs_list_push_back(&buffered_mac_bindings, > >>> &cur_qp->list); > >>> + notify = true; > >>> + } > >>> + ds_destroy(&ip_s); > >>> + > >>> + if (!hmap_count(&buffered_packets_map)) { > >>> + /* No more work to do. */ > >>> + break; > >>> } > >>> } > >>> sbrec_port_binding_index_destroy_row(target); > >>> diff --git a/tests/system-ovn.at b/tests/system-ovn.at > >>> index 20c058415..a186a6e63 100644 > >>> --- a/tests/system-ovn.at > >>> +++ b/tests/system-ovn.at > >>> @@ -8597,3 +8597,127 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port > >>> patch-.*/d > >>> /connection dropped.*/d"]) > >>> AT_CLEANUP > >>> ]) > >>> + > >>> +OVN_FOR_EACH_NORTHD([ > >>> +AT_SETUP([IP buffering]) > >>> +AT_KEYWORDS([ip-buffering]) > >>> +AT_SKIP_IF([test $HAVE_TCPDUMP = no]) > >>> + > >>> +ovn_start > >>> +OVS_TRAFFIC_VSWITCHD_START() > >>> + > >>> +ADD_BR([br-int]) > >>> +ADD_BR([br-ext]) > >>> + > >>> +ovs-ofctl add-flow br-ext action=normal > >>> +# Set external-ids in br-int needed for ovn-controller > >>> +ovs-vsctl \ > >>> + -- set Open_vSwitch . external-ids:system-id=hv1 \ > >>> + -- set Open_vSwitch . > >>> external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ > >>> + -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ > >>> + -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ > >>> + -- set bridge br-int fail-mode=secure > >>> other-config:disable-in-band=true > >>> + > >>> +# Start ovn-controller > >>> +start_daemon ovn-controller > >>> + > >>> +ADD_NAMESPACES(sw01) > >>> +ADD_VETH(sw01, sw01, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \ > >>> + "192.168.1.1") > >>> +ADD_NAMESPACES(sw11) > >>> +ADD_VETH(sw11, sw11, br-int, "192.168.2.2/24", "f0:00:00:02:02:03", \ > >>> + "192.168.2.1") > >>> +ADD_NAMESPACES(remote) > >>> +ADD_VETH(remote, remote, br-ext, "172.16.1.2/24", "f0:00:00:01:02:05", \ > >>> + "172.16.1.1") > >>> +ADD_NAMESPACES(remote1) > >>> +ADD_VETH(remote1, remote1, br-ext, "172.16.1.4/24", "f0:00:00:01:02:06", > >>> \ > >>> + "172.16.1.1") > >>> + > >>> +NS_CHECK_EXEC([remote], [tcpdump -c 3 -nneei remote -Q in src 192.168.1.2 > >>> and dst 172.16.1.2 and icmp > icmp.pcap &]) > >>> +NS_CHECK_EXEC([remote], [tcpdump -c 1 -nneei remote -Q in arp and > >>> arp[[24:4]]==0xac100102 > arp.pcap &]) > >>> +NS_CHECK_EXEC([remote1], [tcpdump -c 3 -nneei remote1 -Q in src > >>> 172.16.1.3 and dst 172.16.1.4 and icmp > icmp1.pcap 2>/dev/null &]) > >>> +NS_CHECK_EXEC([remote1], [tcpdump -c 1 -nneei remote1 -Q in arp and > >>> arp[[24:4]]==0xac100104 > arp1.pcap 2>/dev/null &]) > > Please use NETNS_DAEMONIZE() instead. ack, I will fix it > > >>> + > >>> +ovn-nbctl create Logical_Router name=R1 options:chassis=hv1 > > Please use "check ovn-nbctl .." everywhere applicable. ack, I will fix it Regards, Lorenzo > > >>> +ovn-nbctl ls-add sw0 > >>> +ovn-nbctl ls-add sw1 > >>> +ovn-nbctl ls-add public > >>> + > >>> +ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24 > >>> +ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24 > >>> +ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24 > >>> + > >>> +ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \ > >>> + type=router options:router-port=rp-sw0 \ > >>> + -- lsp-set-addresses sw0-rp router > >>> +ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \ > >>> + type=router options:router-port=rp-sw1 \ > >>> + -- lsp-set-addresses sw1-rp router > >>> + > >>> +ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \ > >>> + type=router options:router-port=rp-public \ > >>> + -- lsp-set-addresses public-rp router > >>> + > >>> +ovn-nbctl lsp-add sw0 sw01 \ > >>> + -- lsp-set-addresses sw01 "f0:00:00:01:02:03 192.168.1.2" > >>> + > >>> +ovn-nbctl lsp-add sw1 sw11 \ > >>> + -- lsp-set-addresses sw11 "f0:00:00:02:02:03 192.168.2.2" > >>> + > >>> +ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.2.2 sw11 > >>> 00:00:02:02:03:10 > >>> + > >>> +AT_CHECK([ovs-vsctl set Open_vSwitch . > >>> external-ids:ovn-bridge-mappings=phynet:br-ext]) > >>> +ovn-nbctl lsp-add public public1 \ > >>> + -- lsp-set-addresses public1 unknown \ > >>> + -- lsp-set-type public1 localnet \ > >>> + -- lsp-set-options public1 network_name=phynet > >>> + > >>> +OVN_POPULATE_ARP > >>> + > > We need to wait for the ports to be claimed and for OVS flows to be > installed. We're missing: > > wait_for_ports_up > check ovn-nbctl --wait=hv sync > > Without this the ping below fails on my machine. ack, thx. I will fix it. > > >>> +NS_CHECK_EXEC([sw01], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | > >>> FORMAT_PING], \ > >>> +[0], [dnl > >>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms > >>> +]) > >>> + > >>> +OVS_WAIT_UNTIL([ > >>> + total_arp_pkts=$(cat arp.pcap | wc -l) > >>> + test "${total_arp_pkts}" = "1" > >>> +]) > >>> + > >>> +OVS_WAIT_UNTIL([ > >>> + total_icmp_pkts=$(cat icmp.pcap | wc -l) > >>> + test "${total_icmp_pkts}" = "3" > >>> +]) > >>> + > >>> +NS_CHECK_EXEC([sw11], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | > >>> FORMAT_PING], \ > >>> +[0], [dnl > >>> +3 packets transmitted, 3 received, 0% packet loss, time 0ms > >>> +]) > >>> + > >>> +OVS_WAIT_UNTIL([ > >>> + total_arp1_pkts=$(cat arp1.pcap | wc -l) > >>> + test "${total_arp1_pkts}" = "1" > >>> +]) > >>> + > >>> +OVS_WAIT_UNTIL([ > >>> + total_icmp1_pkts=$(cat icmp1.pcap | wc -l) > >>> + test "${total_icmp1_pkts}" = "3" > >>> +]) > >>> + > >>> +OVS_APP_EXIT_AND_WAIT([ovn-controller]) > >>> + > >>> +as ovn-sb > >>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > >>> + > >>> +as ovn-nb > >>> +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) > >>> + > >>> +as northd > >>> +OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE]) > >>> + > >>> +as > >>> +OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d > >>> +/connection dropped.*/d"]) > >>> +AT_CLEANUP > >>> +]) > >>> -- > >>> 2.37.3 > >>> > >>> > >> Other than that it looks good, thanks. > >> > >> Acked-by: Ales Musil <amu...@redhat.com> > > > > ack, let's see if I need to repost. > > > > Regards, > > Lorenzo > > > > > Thanks, > Dumitru > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev