Improve buffered packet management in ovn-controller avoid useless loop
in run_buffered_binding routine and using datapath key and output port
key as buffered_packets_map hashmap hash. Add new selftest for buffered
packets.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianc...@redhat.com>
---
Changes since v2:
- improve hash function
Changes since v1:
- improve code readability
---
 controller/pinctrl.c | 118 ++++++++++++++++++++++++++++------------
 tests/system-ovn.at  | 124 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 208 insertions(+), 34 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index 8859cb080..dfcd0cea8 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -182,6 +182,7 @@ static void destroy_buffered_packets_map(void);
 static void
 run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
                      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
+                     struct ovsdb_idl_index *sbrec_port_binding_by_name,
                      const struct hmap *local_datapaths)
     OVS_REQUIRES(pinctrl_mutex);
 
@@ -1430,6 +1431,9 @@ struct buffered_packets {
     struct in6_addr ip;
     struct eth_addr ea;
 
+    uint64_t dp_key;
+    uint64_t port_key;
+
     long long int timestamp;
 
     struct buffer_info data[BUFFER_QUEUE_DEPTH];
@@ -1556,38 +1560,38 @@ buffered_packets_map_gc(void)
 }
 
 static struct buffered_packets *
-pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash)
+pinctrl_find_buffered_packets(uint64_t dp_key, uint64_t port_key,
+                              uint32_t hash)
 {
     struct buffered_packets *qp;
-
-    HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash,
-                             &buffered_packets_map) {
-        if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) {
+    HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, &buffered_packets_map) {
+        if (qp->dp_key == dp_key && qp->port_key == port_key) {
             return qp;
         }
     }
     return NULL;
 }
 
+static uint32_t
+pinctrl_buffer_apcket_hash(uint64_t dp_key, uint64_t port_key)
+{
+    uint32_t hash = 0;
+    hash = hash_add64(hash, port_key);
+    hash = hash_add64(hash, dp_key);
+    return hash_finish(hash, 16);
+}
+
 /* Called with in the pinctrl_handler thread context. */
 static int
 pinctrl_handle_buffered_packets(struct dp_packet *pkt_in,
                                 const struct match *md, bool is_arp)
     OVS_REQUIRES(pinctrl_mutex)
 {
-    struct buffered_packets *bp;
-    struct dp_packet *clone;
-    struct in6_addr addr;
-
-    if (is_arp) {
-        addr = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
-    } else {
-        ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0));
-        memcpy(&addr, &ip6, sizeof addr);
-    }
-
-    uint32_t hash = hash_bytes(&addr, sizeof addr, 0);
-    bp = pinctrl_find_buffered_packets(&addr, hash);
+    uint64_t dp_key = ntohll(md->flow.metadata);
+    uint64_t oport_key = md->flow.regs[MFF_LOG_OUTPORT - MFF_REG0];
+    uint32_t hash = pinctrl_buffer_apcket_hash(dp_key, oport_key);
+    struct buffered_packets *bp
+        = pinctrl_find_buffered_packets(dp_key, oport_key, hash);
     if (!bp) {
         if (hmap_count(&buffered_packets_map) >= 1000) {
             COVERAGE_INC(pinctrl_drop_buffered_packets_map);
@@ -1597,12 +1601,20 @@ pinctrl_handle_buffered_packets(struct dp_packet 
*pkt_in,
         bp = xmalloc(sizeof *bp);
         hmap_insert(&buffered_packets_map, &bp->hmap_node, hash);
         bp->head = bp->tail = 0;
-        bp->ip = addr;
+        if (is_arp) {
+            bp->ip = in6_addr_mapped_ipv4(htonl(md->flow.regs[0]));
+        } else {
+            ovs_be128 ip6 = hton128(flow_get_xxreg(&md->flow, 0));
+            memcpy(&bp->ip, &ip6, sizeof bp->ip);
+        }
+        bp->dp_key = dp_key;
+        bp->port_key = oport_key;
     }
     bp->timestamp = time_msec();
     /* clone the packet to send it later with correct L2 address */
-    clone = dp_packet_clone_data(dp_packet_data(pkt_in),
-                                 dp_packet_size(pkt_in));
+    struct dp_packet *clone
+        = dp_packet_clone_data(dp_packet_data(pkt_in),
+                               dp_packet_size(pkt_in));
     buffered_push_packet(bp, clone, md);
 
     return 0;
@@ -3586,6 +3598,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
                   sbrec_ip_multicast_opts);
     run_buffered_binding(sbrec_mac_binding_by_lport_ip,
                          sbrec_port_binding_by_datapath,
+                         sbrec_port_binding_by_name,
                          local_datapaths);
     sync_svc_monitors(ovnsb_idl_txn, svc_mon_table, sbrec_port_binding_by_name,
                       chassis);
@@ -4354,15 +4367,28 @@ run_put_mac_bindings(struct ovsdb_idl_txn 
*ovnsb_idl_txn,
     }
 }
 
+static struct buffered_packets *
+pinctrl_get_buffered_packets(uint64_t dp_key, uint64_t port_key)
+{
+    uint32_t hash = pinctrl_buffer_apcket_hash(dp_key, port_key);
+    return pinctrl_find_buffered_packets(dp_key, port_key, hash);
+}
+
 static void
 run_buffered_binding(struct ovsdb_idl_index *sbrec_mac_binding_by_lport_ip,
                      struct ovsdb_idl_index *sbrec_port_binding_by_datapath,
+                     struct ovsdb_idl_index *sbrec_port_binding_by_name,
                      const struct hmap *local_datapaths)
     OVS_REQUIRES(pinctrl_mutex)
 {
     const struct local_datapath *ld;
     bool notify = false;
 
+    if (!hmap_count(&buffered_packets_map)) {
+        /* No work to do. */
+        return;
+    }
+
     HMAP_FOR_EACH (ld, hmap_node, local_datapaths) {
         /* MAC_Binding.logical_port will always belong to a
          * a router datapath. Hence we can skip logical switch
@@ -4381,20 +4407,44 @@ run_buffered_binding(struct ovsdb_idl_index 
*sbrec_mac_binding_by_lport_ip,
             if (strcmp(pb->type, "patch") && strcmp(pb->type, "l3gateway")) {
                 continue;
             }
-            struct buffered_packets *cur_qp;
-            HMAP_FOR_EACH_SAFE (cur_qp, hmap_node, &buffered_packets_map) {
-                struct ds ip_s = DS_EMPTY_INITIALIZER;
-                ipv6_format_mapped(&cur_qp->ip, &ip_s);
-                const struct sbrec_mac_binding *b = mac_binding_lookup(
-                        sbrec_mac_binding_by_lport_ip, pb->logical_port,
-                        ds_cstr(&ip_s));
-                if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT,
-                                  ETH_ADDR_SCAN_ARGS(cur_qp->ea))) {
-                    hmap_remove(&buffered_packets_map, &cur_qp->hmap_node);
-                    ovs_list_push_back(&buffered_mac_bindings, &cur_qp->list);
-                    notify = true;
+
+            struct buffered_packets *cur_qp
+                = pinctrl_get_buffered_packets(ld->datapath->tunnel_key,
+                                               pb->tunnel_key);
+            if (!cur_qp) {
+                /* If primary lookup fails, check chassisredirect port
+                 * for distributed gw router port use-case. */
+                char *redirect_name = xasprintf("cr-%s", pb->logical_port);
+                const struct sbrec_port_binding *cr_pb
+                    = lport_lookup_by_name(sbrec_port_binding_by_name,
+                                           redirect_name);
+                free(redirect_name);
+                if (cr_pb) {
+                    cur_qp = pinctrl_get_buffered_packets(
+                            ld->datapath->tunnel_key, cr_pb->tunnel_key);
                 }
-                ds_destroy(&ip_s);
+            }
+
+            if (!cur_qp) {
+                continue;
+            }
+
+            struct ds ip_s = DS_EMPTY_INITIALIZER;
+            ipv6_format_mapped(&cur_qp->ip, &ip_s);
+            const struct sbrec_mac_binding *b = mac_binding_lookup(
+                    sbrec_mac_binding_by_lport_ip, pb->logical_port,
+                    ds_cstr(&ip_s));
+            if (b && ovs_scan(b->mac, ETH_ADDR_SCAN_FMT,
+                              ETH_ADDR_SCAN_ARGS(cur_qp->ea))) {
+                hmap_remove(&buffered_packets_map, &cur_qp->hmap_node);
+                ovs_list_push_back(&buffered_mac_bindings, &cur_qp->list);
+                notify = true;
+            }
+            ds_destroy(&ip_s);
+
+            if (!hmap_count(&buffered_packets_map)) {
+                /* No more work to do. */
+                break;
             }
         }
         sbrec_port_binding_index_destroy_row(target);
diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index 20c058415..a186a6e63 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -8597,3 +8597,127 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port 
patch-.*/d
 /connection dropped.*/d"])
 AT_CLEANUP
 ])
+
+OVN_FOR_EACH_NORTHD([
+AT_SETUP([IP buffering])
+AT_KEYWORDS([ip-buffering])
+AT_SKIP_IF([test $HAVE_TCPDUMP = no])
+
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+
+ADD_BR([br-int])
+ADD_BR([br-ext])
+
+ovs-ofctl add-flow br-ext action=normal
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+        -- set Open_vSwitch . external-ids:system-id=hv1 \
+        -- set Open_vSwitch . 
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+        -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+        -- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+        -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+ADD_NAMESPACES(sw01)
+ADD_VETH(sw01, sw01, br-int, "192.168.1.2/24", "f0:00:00:01:02:03", \
+         "192.168.1.1")
+ADD_NAMESPACES(sw11)
+ADD_VETH(sw11, sw11, br-int, "192.168.2.2/24", "f0:00:00:02:02:03", \
+         "192.168.2.1")
+ADD_NAMESPACES(remote)
+ADD_VETH(remote, remote, br-ext, "172.16.1.2/24", "f0:00:00:01:02:05", \
+         "172.16.1.1")
+ADD_NAMESPACES(remote1)
+ADD_VETH(remote1, remote1, br-ext, "172.16.1.4/24", "f0:00:00:01:02:06", \
+         "172.16.1.1")
+
+NS_CHECK_EXEC([remote], [tcpdump -c 3 -nneei remote -Q in src 192.168.1.2 and 
dst 172.16.1.2 and icmp > icmp.pcap &])
+NS_CHECK_EXEC([remote], [tcpdump -c 1 -nneei remote -Q in arp and 
arp[[24:4]]==0xac100102 > arp.pcap &])
+NS_CHECK_EXEC([remote1], [tcpdump -c 3 -nneei remote1 -Q in src 172.16.1.3 and 
dst 172.16.1.4 and icmp > icmp1.pcap 2>/dev/null &])
+NS_CHECK_EXEC([remote1], [tcpdump -c 1 -nneei remote1 -Q in arp and 
arp[[24:4]]==0xac100104 > arp1.pcap 2>/dev/null &])
+
+ovn-nbctl create Logical_Router name=R1 options:chassis=hv1
+ovn-nbctl ls-add sw0
+ovn-nbctl ls-add sw1
+ovn-nbctl ls-add public
+
+ovn-nbctl lrp-add R1 rp-sw0 00:00:01:01:02:03 192.168.1.1/24
+ovn-nbctl lrp-add R1 rp-sw1 00:00:03:01:02:03 192.168.2.1/24
+ovn-nbctl lrp-add R1 rp-public 00:00:02:01:02:03 172.16.1.1/24
+
+ovn-nbctl lsp-add sw0 sw0-rp -- set Logical_Switch_Port sw0-rp \
+    type=router options:router-port=rp-sw0 \
+    -- lsp-set-addresses sw0-rp router
+ovn-nbctl lsp-add sw1 sw1-rp -- set Logical_Switch_Port sw1-rp \
+    type=router options:router-port=rp-sw1 \
+    -- lsp-set-addresses sw1-rp router
+
+ovn-nbctl lsp-add public public-rp -- set Logical_Switch_Port public-rp \
+    type=router options:router-port=rp-public \
+    -- lsp-set-addresses public-rp router
+
+ovn-nbctl lsp-add sw0 sw01 \
+    -- lsp-set-addresses sw01 "f0:00:00:01:02:03 192.168.1.2"
+
+ovn-nbctl lsp-add sw1 sw11 \
+    -- lsp-set-addresses sw11 "f0:00:00:02:02:03 192.168.2.2"
+
+ovn-nbctl lr-nat-add R1 dnat_and_snat 172.16.1.3 192.168.2.2 sw11 
00:00:02:02:03:10
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . 
external-ids:ovn-bridge-mappings=phynet:br-ext])
+ovn-nbctl lsp-add public public1 \
+        -- lsp-set-addresses public1 unknown \
+        -- lsp-set-type public1 localnet \
+        -- lsp-set-options public1 network_name=phynet
+
+OVN_POPULATE_ARP
+
+NS_CHECK_EXEC([sw01], [ping -q -c 3 -i 0.3 -w 2 172.16.1.2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_WAIT_UNTIL([
+        total_arp_pkts=$(cat arp.pcap | wc -l)
+        test "${total_arp_pkts}" = "1"
+])
+
+OVS_WAIT_UNTIL([
+        total_icmp_pkts=$(cat icmp.pcap | wc -l)
+        test "${total_icmp_pkts}" = "3"
+])
+
+NS_CHECK_EXEC([sw11], [ping -q -c 3 -i 0.3 -w 2 172.16.1.4 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_WAIT_UNTIL([
+        total_arp1_pkts=$(cat arp1.pcap | wc -l)
+        test "${total_arp1_pkts}" = "1"
+])
+
+OVS_WAIT_UNTIL([
+        total_icmp1_pkts=$(cat icmp1.pcap | wc -l)
+        test "${total_icmp1_pkts}" = "3"
+])
+
+OVS_APP_EXIT_AND_WAIT([ovn-controller])
+
+as ovn-sb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as ovn-nb
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+
+as northd
+OVS_APP_EXIT_AND_WAIT([NORTHD_TYPE])
+
+as
+OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d
+/connection dropped.*/d"])
+AT_CLEANUP
+])
-- 
2.37.3

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to