Currently metadata transmitted by OVN over Geneve tunnels is
unprotected by any checksum other than the one provided by the link
layer - this includes both the VNI and data stored in options. Turning
on UDP checksums which cover this data has obvious benefits in terms of
integrity protection.

In terms of performance, this actually significantly increases throughput
in most common cases when running on Linux based hosts without NICs
supporting Geneve offload (around 60% for bulk traffic). The reason is
that generally all NICs are capable of offloading transmitted and received
UDP checksums (viewed as ordinary UDP packets and not as tunnels). The
benefit comes on the receive side where the validated outer UDP checksum
can be used to additionally validate an inner checksum (such as TCP), which
in turn allows aggregation of packets to be more efficiently handled by
the rest of the stack.

Not all devices see such a benefit. The most notable exception is hardware
VTEPs (currently using VXLAN but potentially Geneve in the future). These
devices are designed to not buffer entire packets in their switching engines
and are therefore unable to efficiently compute or validate UDP checksums.
In addition certain versions of the Linux kernel are not able to fully
take advantage of Geneve capable NIC offloads in the presence of checksums.
(This is actually a pretty narrow corner case though - earlier versions of
Linux don't support Geneve offloads at all and later versions support both
offloads and checksums well.)

In order avoid possible problems with these cases, efficient checksum
receive performance is exposed as an encap option in the southbound
database as a hint to remote senders. This currently defaults to off
for hardware VTEPs and on for all other cases.

Signed-off-by: Jesse Gross <je...@kernel.org>
---
 ovn/controller-vtep/gateway.c |  9 +++++++++
 ovn/controller/chassis.c      |  8 ++++++++
 ovn/controller/encaps.c       | 12 +++++++++++-
 ovn/ovn-sb.xml                | 10 ++++++++--
 ovn/utilities/ovn-sbctl.c     |  2 ++
 tests/ovn-controller-vtep.at  |  1 +
 tests/ovn-sbctl.at            |  3 +++
 7 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/ovn/controller-vtep/gateway.c b/ovn/controller-vtep/gateway.c
index ebe0087..e44b319 100644
--- a/ovn/controller-vtep/gateway.c
+++ b/ovn/controller-vtep/gateway.c
@@ -57,6 +57,8 @@ create_chassis_rec(struct ovsdb_idl_txn *txn, const char 
*name,
     encap_rec = sbrec_encap_insert(txn);
     sbrec_encap_set_type(encap_rec, OVN_SB_ENCAP_TYPE);
     sbrec_encap_set_ip(encap_rec, encap_ip);
+    const struct smap options = SMAP_CONST1(&options, "csum", "false");
+    sbrec_encap_set_options(encap_rec, &options);
     sbrec_chassis_set_encaps(chassis_rec, &encap_rec, 1);
 
     return chassis_rec;
@@ -107,6 +109,13 @@ revalidate_gateway(struct controller_vtep_ctx *ctx)
             if (strcmp(chassis_rec->encaps[0]->ip, encap_ip)) {
                 sbrec_encap_set_ip(chassis_rec->encaps[0], encap_ip);
             }
+            const char *csum = smap_get(&chassis_rec->encaps[0]->options,
+                                        "csum");
+            if (!csum || strcmp(csum, "false")) {
+                const struct smap options = SMAP_CONST1(&options, "csum",
+                                                                  "false");
+                sbrec_encap_set_options(chassis_rec->encaps[0], &options);
+            }
         } else {
             if (gw_node) {
                 VLOG_WARN("Chassis for VTEP physical switch (%s) disappears, "
diff --git a/ovn/controller/chassis.c b/ovn/controller/chassis.c
index b5f022e..beb393b 100644
--- a/ovn/controller/chassis.c
+++ b/ovn/controller/chassis.c
@@ -137,8 +137,13 @@ chassis_run(struct controller_ctx *ctx, const char 
*chassis_id)
         uint32_t cur_tunnels = 0;
         bool same = true;
         for (int i = 0; i < chassis_rec->n_encaps; i++) {
+            const char *csum;
+
             cur_tunnels |= get_tunnel_type(chassis_rec->encaps[i]->type);
             same = same && !strcmp(chassis_rec->encaps[i]->ip, encap_ip);
+
+            csum = smap_get(&chassis_rec->encaps[i]->options, "csum");
+            same = same && csum && !strcmp(csum, "true");
         }
         same = same && req_tunnels == cur_tunnels;
 
@@ -178,6 +183,8 @@ chassis_run(struct controller_ctx *ctx, const char 
*chassis_id)
 
     int n_encaps = count_1bits(req_tunnels);
     struct sbrec_encap **encaps = xmalloc(n_encaps * sizeof *encaps);
+    const struct smap options = SMAP_CONST1(&options, "csum", "true");
+
     for (int i = 0; i < n_encaps; i++) {
         const char *type = pop_tunnel_name(&req_tunnels);
 
@@ -185,6 +192,7 @@ chassis_run(struct controller_ctx *ctx, const char 
*chassis_id)
 
         sbrec_encap_set_type(encaps[i], type);
         sbrec_encap_set_ip(encaps[i], encap_ip);
+        sbrec_encap_set_options(encaps[i], &options);
     }
 
     sbrec_chassis_set_encaps(chassis_rec, encaps, n_encaps);
diff --git a/ovn/controller/encaps.c b/ovn/controller/encaps.c
index 52348c3..7ae6af8 100644
--- a/ovn/controller/encaps.c
+++ b/ovn/controller/encaps.c
@@ -191,6 +191,7 @@ tunnel_add(const struct sbrec_chassis *chassis_rec,
     struct smap options = SMAP_INITIALIZER(&options);
     struct ovsrec_port *port, **ports;
     struct ovsrec_interface *iface;
+    const char *csum = smap_get(&encap->options, "csum");
     char *port_name;
     size_t i;
 
@@ -206,6 +207,9 @@ tunnel_add(const struct sbrec_chassis *chassis_rec,
     ovsrec_interface_set_type(iface, encap->type);
     smap_add(&options, "remote_ip", encap->ip);
     smap_add(&options, "key", "flow");
+    if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) {
+        smap_add(&options, "csum", csum);
+    }
     ovsrec_interface_set_options(iface, &options);
     smap_destroy(&options);
 
@@ -303,15 +307,21 @@ check_and_update_tunnel(const struct ovsrec_port *port,
 {
     const struct sbrec_encap *encap = preferred_encap(chassis_rec);
     const struct ovsrec_interface *iface = port->interfaces[0];
+    const char *csum = smap_get(&encap->options, "csum");
+    const char *existing_csum = smap_get(&iface->options, "csum");
 
     if (strcmp(encap->type, iface->type)) {
         ovsrec_interface_set_type(iface, encap->type);
     }
     const char *ip = smap_get(&iface->options, "remote_ip");
-    if (!ip || strcmp(encap->ip, ip)) {
+    if (!ip || strcmp(encap->ip, ip) ||
+        (!!csum != !!existing_csum || (csum && strcmp(csum, existing_csum)))) {
         struct smap options = SMAP_INITIALIZER(&options);
         smap_add(&options, "remote_ip", encap->ip);
         smap_add(&options, "key", "flow");
+        if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) {
+            smap_add(&options, "csum", csum);
+        }
         ovsrec_interface_set_options(iface, &options);
         smap_destroy(&options);
     }
diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml
index 13c9526..1cc650f 100644
--- a/ovn/ovn-sb.xml
+++ b/ovn/ovn-sb.xml
@@ -274,8 +274,14 @@
     </column>
 
     <column name="options">
-      Options for configuring the encapsulation, e.g. IPsec parameters when
-      IPsec support is introduced.  No options are currently defined.
+      Options for configuring the encapsulation. Currently, the only
+      option that has been defined is <code>csum</code>. This indicates
+      that encapsulation checksums can be transmitted and received with
+      reasonable performance. It is a hint to senders transmitting data
+      to this chassis that they should used checksums to protect OVN
+      metadata. In addition, on Linux based hypervisors this will also
+      improve the throughput of encapsulated traffic in most cases. Set to
+      <code>true</code> to enable or <code>false</code> to disable.
     </column>
 
     <column name="ip">
diff --git a/ovn/utilities/ovn-sbctl.c b/ovn/utilities/ovn-sbctl.c
index 50c9f17..cb2d477 100644
--- a/ovn/utilities/ovn-sbctl.c
+++ b/ovn/utilities/ovn-sbctl.c
@@ -557,6 +557,7 @@ cmd_chassis_add(struct ctl_context *ctx)
 
     size_t n_encaps = sset_count(&encap_set);
     struct sbrec_encap **encaps = xmalloc(n_encaps * sizeof *encaps);
+    const struct smap options = SMAP_CONST1(&options, "csum", "true");
     const char *encap_type;
     int i = 0;
     SSET_FOR_EACH (encap_type, &encap_set){
@@ -564,6 +565,7 @@ cmd_chassis_add(struct ctl_context *ctx)
 
         sbrec_encap_set_type(encaps[i], encap_type);
         sbrec_encap_set_ip(encaps[i], encap_ip);
+        sbrec_encap_set_options(encaps[i], &options);
         i++;
     }
     sset_destroy(&encap_set);
diff --git a/tests/ovn-controller-vtep.at b/tests/ovn-controller-vtep.at
index 8eca16c..654c212 100644
--- a/tests/ovn-controller-vtep.at
+++ b/tests/ovn-controller-vtep.at
@@ -124,6 +124,7 @@ AT_CHECK([ovn-sbctl show], [0], [dnl
 Chassis br-vtep
     Encap vxlan
         ip: "1.2.3.4"
+        options: {csum="false"}
 ])
 
 # deletes the chassis via ovn-sbctl and check that it is readded back
diff --git a/tests/ovn-sbctl.at b/tests/ovn-sbctl.at
index 72dc441..9393eef 100644
--- a/tests/ovn-sbctl.at
+++ b/tests/ovn-sbctl.at
@@ -93,6 +93,7 @@ AT_CHECK([ovn-sbctl show], [0], [dnl
 Chassis "ch0"
     Encap stt
         ip: "1.2.3.5"
+        options: {csum="true"}
     Port_Binding "vif0"
 ])
 
@@ -105,6 +106,7 @@ AT_CHECK([ovn-sbctl show | sed 's/vif[[0-9]]/vif/'], [0], 
[dnl
 Chassis "ch0"
     Encap stt
         ip: "1.2.3.5"
+        options: {csum="true"}
     Port_Binding "vif"
     Port_Binding "vif"
 ])
@@ -116,6 +118,7 @@ AT_CHECK([ovn-sbctl show], [0], [dnl
 Chassis "ch0"
     Encap stt
         ip: "1.2.3.5"
+        options: {csum="true"}
     Port_Binding "vif0"
 ])
 
-- 
2.7.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to