Add the option for deployments that do not require multiple LB VIPs
sharing same backends for hairpin traffic, so that they can set this
to false to be able to better utilize HW offload capabilities.

Signed-off-by: Han Zhou <hz...@ovn.org>
---
 controller/lflow.c              | 46 +++++++++++++++++++++++++--------
 controller/lflow.h              |  1 +
 controller/ovn-controller.8.xml | 10 +++++++
 controller/ovn-controller.c     | 13 ++++++++++
 tests/ovn.at                    | 10 +++++++
 5 files changed, 69 insertions(+), 11 deletions(-)

diff --git a/controller/lflow.c b/controller/lflow.c
index a44f6d056..35de03a21 100644
--- a/controller/lflow.c
+++ b/controller/lflow.c
@@ -1939,6 +1939,7 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
                          struct ovn_lb_backend *lb_backend,
                          uint8_t lb_proto,
                          bool use_ct_mark,
+                         bool check_ct_dst,
                          struct ovn_desired_flow_table *flow_table)
 {
     uint64_t stub[1024 / 8];
@@ -1949,11 +1950,24 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
     put_load(&value, sizeof value, MFF_LOG_FLAGS,
              MLF_LOOKUP_LB_HAIRPIN_BIT, 1, &ofpacts);
 
-    /* Matching on ct_nw_dst()/ct_ipv6_dst()/ct_tp_dst() requires matching
-     * on ct_state first.
+    /* To detect the hairpin flow accurately, the CT dst IP and ports need to
+     * be matched, to distingiush when more than one VIPs share the same
+     * hairpin backend ip+port.
+     *
+     * Matching on ct_nw_dst()/ct_ipv6_dst()/ct_tp_dst() requires matching
+     * on ct_state == DNAT first.
+     *
+     * However, matching ct_state == DNAT may prevent the flows being
+     * offloaded to HW. The option "check_ct_dst" (default to true) is
+     * provided to disable this check and the related CT fields, so that for
+     * environments that doesn't require support of VIPs sharing same backends
+     * can fully utilize HW offload capability for better performance.
      */
-    uint32_t ct_state = OVS_CS_F_TRACKED | OVS_CS_F_DST_NAT;
-    match_set_ct_state_masked(&hairpin_match, ct_state, ct_state);
+
+    if (check_ct_dst) {
+        uint32_t ct_state = OVS_CS_F_TRACKED | OVS_CS_F_DST_NAT;
+        match_set_ct_state_masked(&hairpin_match, ct_state, ct_state);
+    }
 
     if (IN6_IS_ADDR_V4MAPPED(&lb_vip->vip)) {
         ovs_be32 bip4 = in6_addr_get_mapped_ipv4(&lb_backend->ip);
@@ -1965,7 +1979,9 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
         match_set_dl_type(&hairpin_match, htons(ETH_TYPE_IP));
         match_set_nw_src(&hairpin_match, bip4);
         match_set_nw_dst(&hairpin_match, bip4);
-        match_set_ct_nw_dst(&hairpin_match, vip4);
+        if (check_ct_dst) {
+            match_set_ct_nw_dst(&hairpin_match, vip4);
+        }
 
         add_lb_vip_hairpin_reply_action(NULL, snat_vip4, lb_proto,
                                         lb_backend->port,
@@ -1980,7 +1996,9 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
         match_set_dl_type(&hairpin_match, htons(ETH_TYPE_IPV6));
         match_set_ipv6_src(&hairpin_match, bip6);
         match_set_ipv6_dst(&hairpin_match, bip6);
-        match_set_ct_ipv6_dst(&hairpin_match, &lb_vip->vip);
+        if (check_ct_dst) {
+            match_set_ct_ipv6_dst(&hairpin_match, &lb_vip->vip);
+        }
 
         add_lb_vip_hairpin_reply_action(snat_vip6, 0, lb_proto,
                                         lb_backend->port,
@@ -1991,8 +2009,10 @@ add_lb_vip_hairpin_flows(struct ovn_controller_lb *lb,
     if (lb_backend->port) {
         match_set_nw_proto(&hairpin_match, lb_proto);
         match_set_tp_dst(&hairpin_match, htons(lb_backend->port));
-        match_set_ct_nw_proto(&hairpin_match, lb_proto);
-        match_set_ct_tp_dst(&hairpin_match, htons(lb_vip->vip_port));
+        if (check_ct_dst) {
+            match_set_ct_nw_proto(&hairpin_match, lb_proto);
+            match_set_ct_tp_dst(&hairpin_match, htons(lb_vip->vip_port));
+        }
     }
 
     /* In the original direction, only match on traffic that was already
@@ -2279,7 +2299,7 @@ add_lb_ct_snat_hairpin_flows(struct ovn_controller_lb *lb,
 static void
 consider_lb_hairpin_flows(const struct sbrec_load_balancer *sbrec_lb,
                           const struct hmap *local_datapaths,
-                          bool use_ct_mark,
+                          bool use_ct_mark, bool check_ct_dst,
                           struct ovn_desired_flow_table *flow_table,
                           struct simap *ids)
 {
@@ -2319,7 +2339,7 @@ consider_lb_hairpin_flows(const struct 
sbrec_load_balancer *sbrec_lb,
             struct ovn_lb_backend *lb_backend = &lb_vip->backends[j];
 
             add_lb_vip_hairpin_flows(lb, lb_vip, lb_backend, lb_proto,
-                                     use_ct_mark, flow_table);
+                                     use_ct_mark, check_ct_dst, flow_table);
         }
     }
 
@@ -2333,6 +2353,7 @@ consider_lb_hairpin_flows(const struct 
sbrec_load_balancer *sbrec_lb,
 static void
 add_lb_hairpin_flows(const struct sbrec_load_balancer_table *lb_table,
                      const struct hmap *local_datapaths, bool use_ct_mark,
+                     bool check_ct_dst,
                      struct ovn_desired_flow_table *flow_table,
                      struct simap *ids,
                      struct id_pool *pool)
@@ -2356,7 +2377,7 @@ add_lb_hairpin_flows(const struct 
sbrec_load_balancer_table *lb_table,
             simap_put(ids, lb->name, id);
         }
         consider_lb_hairpin_flows(lb, local_datapaths, use_ct_mark,
-                                  flow_table, ids);
+                                  check_ct_dst, flow_table, ids);
     }
 }
 
@@ -2494,6 +2515,7 @@ lflow_run(struct lflow_ctx_in *l_ctx_in, struct 
lflow_ctx_out *l_ctx_out)
                        l_ctx_out->flow_table);
     add_lb_hairpin_flows(l_ctx_in->lb_table, l_ctx_in->local_datapaths,
                          l_ctx_in->lb_hairpin_use_ct_mark,
+                         l_ctx_in->lb_hairpin_check_ct_dst,
                          l_ctx_out->flow_table,
                          l_ctx_out->hairpin_lb_ids,
                          l_ctx_out->hairpin_id_pool);
@@ -2658,6 +2680,7 @@ lflow_add_flows_for_datapath(const struct 
sbrec_datapath_binding *dp,
     for (size_t i = 0; i < n_dp_lbs; i++) {
         consider_lb_hairpin_flows(dp_lbs[i], l_ctx_in->local_datapaths,
                                   l_ctx_in->lb_hairpin_use_ct_mark,
+                                  l_ctx_in->lb_hairpin_check_ct_dst,
                                   l_ctx_out->flow_table,
                                   l_ctx_out->hairpin_lb_ids);
     }
@@ -2789,6 +2812,7 @@ lflow_handle_changed_lbs(struct lflow_ctx_in *l_ctx_in,
                  UUID_ARGS(&lb->header_.uuid));
         consider_lb_hairpin_flows(lb, l_ctx_in->local_datapaths,
                                   l_ctx_in->lb_hairpin_use_ct_mark,
+                                  l_ctx_in->lb_hairpin_check_ct_dst,
                                   l_ctx_out->flow_table,
                                   l_ctx_out->hairpin_lb_ids);
     }
diff --git a/controller/lflow.h b/controller/lflow.h
index 342967917..325ba3837 100644
--- a/controller/lflow.h
+++ b/controller/lflow.h
@@ -161,6 +161,7 @@ struct lflow_ctx_in {
     const struct shash *binding_lports;
     const struct hmap *chassis_tunnels;
     bool lb_hairpin_use_ct_mark;
+    bool lb_hairpin_check_ct_dst;
 };
 
 struct lflow_ctx_out {
diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml
index cb47c9bd1..f93cb1ace 100644
--- a/controller/ovn-controller.8.xml
+++ b/controller/ovn-controller.8.xml
@@ -352,6 +352,16 @@
         heplful to pin source outer IP for the tunnel when multiple interfaces
         are used on the host for overlay traffic.
       </dd>
+      <dt><code>external_ids:ovn-allow-vips-share-hairpin-backend</code></dt>
+      <dd>
+        If it is set to false, the behavior is undefined for hairpin traffic
+        when two or more LB VIPs share the same hairpin backend. The default
+        value is true - allowed. However, when set to true, there will be
+        flows that are not HW-offload friendly. So, for deployments that do not
+        require multiple LB VIPs sharing same backends for hairpin traffic, it
+        is recommended to set this to false to be able to better utilize HW
+        offload capabilities.
+      </dd>
     </dl>
 
     <p>
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index dacef0204..eb9c4e4eb 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -489,6 +489,17 @@ get_ovs_chassis_id(const struct ovsrec_open_vswitch_table 
*ovs_table)
     return chassis_id;
 }
 
+static bool
+get_allow_vips_share_hairpin_backend(
+    const struct ovsrec_open_vswitch_table *ovs_table)
+{
+    const struct ovsrec_open_vswitch *cfg
+        = ovsrec_open_vswitch_table_first(ovs_table);
+    return cfg ? smap_get_bool(&cfg->external_ids,
+                               "ovn-allow-vips-share-hairpin-backend", true)
+               : true;
+}
+
 static void
 update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
 {
@@ -2576,6 +2587,8 @@ init_lflow_ctx(struct engine_node *node,
     l_ctx_in->binding_lports = &rt_data->lbinding_data.lports;
     l_ctx_in->chassis_tunnels = &non_vif_data->chassis_tunnels;
     l_ctx_in->lb_hairpin_use_ct_mark = n_opts->lb_hairpin_use_ct_mark;
+    l_ctx_in->lb_hairpin_check_ct_dst =
+        get_allow_vips_share_hairpin_backend(ovs_table);
 
     l_ctx_out->flow_table = &fo->flow_table;
     l_ctx_out->group_table = &fo->group_table;
diff --git a/tests/ovn.at b/tests/ovn.at
index 0dd9a1c2e..d84f4a3d1 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -28808,6 +28808,16 @@ AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=70 
| ofctl_strip_all | grep -
  table=70, 
priority=100,ct_state=+trk+dnat,ct_nw_dst=88.88.88.88,ct_nw_proto=17,ct_tp_dst=4040,udp
 actions=ct(commit,zone=NXM_NX_REG12[[0..15]],nat(src=88.88.88.88))
 ])
 
+# When ovn-allow-vips-share-hairpin-backend=false, don't match ct state and 
dst fields in table 68.
+as hv1 ovn-appctl -t ovn-controller vlog/set file:dbg
+as hv1 check ovs-vsctl set open . 
external_ids:ovn-allow-vips-share-hairpin-backend=false
+check ovn-nbctl --wait=hv sync
+AT_CHECK([as hv1 ovs-ofctl dump-flows br-int table=68 | ofctl_strip_all | grep 
-v NXST], [0], [dnl
+ table=68, 
priority=100,ct_mark=0x2/0x2,tcp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=4041 
actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=6,NXM_OF_TCP_SRC[[]]=NXM_OF_TCP_DST[[]],load:0x1->NXM_NX_REG10[[7]])
+ table=68, 
priority=100,ct_mark=0x2/0x2,udp,nw_src=42.42.42.1,nw_dst=42.42.42.1,tp_dst=2021
 
actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x800,NXM_OF_IP_SRC[[]],ip_dst=88.88.88.88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]])
+ table=68, 
priority=100,ct_mark=0x2/0x2,udp6,ipv6_src=4200::1,ipv6_dst=4200::1,tp_dst=2021 
actions=load:0x1->NXM_NX_REG10[[7]],learn(table=69,delete_learned,OXM_OF_METADATA[[]],eth_type=0x86dd,NXM_NX_IPV6_SRC[[]],ipv6_dst=8800::88,nw_proto=17,NXM_OF_UDP_SRC[[]]=NXM_OF_UDP_DST[[]],load:0x1->NXM_NX_REG10[[7]])
+])
+
 check ovn-nbctl --wait=hv ls-del sw0
 check ovn-nbctl --wait=hv ls-del sw1
 
-- 
2.30.2

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to