Currently metadata transmitted by OVN over Geneve tunnels is unprotected by any checksum other than the one provided by the link layer - this includes both the VNI and data stored in options. Turning on UDP checksums which cover this data has obvious benefits in terms of integrity protection.
In terms of performance, this actually significantly increases throughput in most common cases when running on Linux based hosts without NICs supporting Geneve offload (around 60% for bulk traffic). The reason is that generally all NICs are capable of offloading transmitted and received UDP checksums (viewed as ordinary UDP packets and not as tunnels). The benefit comes on the receive side where the validated outer UDP checksum can be used to additionally validate an inner checksum (such as TCP), which in turn allows aggregation of packets to be more efficiently handled by the rest of the stack. Not all devices see such a benefit. The most notable exception is hardware VTEPs (currently using VXLAN but potentially Geneve in the future). These devices are designed to not buffer entire packets in their switching engines and are therefore unable to efficiently compute or validate UDP checksums. In addition certain versions of the Linux kernel are not able to fully take advantage of Geneve capable NIC offloads in the presence of checksums. (This is actually a pretty narrow corner case though - earlier versions of Linux don't support Geneve offloads at all and later versions support both offloads and checksums well.) In order avoid possible problems with these cases, efficient checksum receive performance is exposed as an encap option in the southbound database as a hint to remote senders. This currently defaults to off for hardware VTEPs and on for all other cases. Signed-off-by: Jesse Gross <je...@kernel.org> --- ovn/controller-vtep/gateway.c | 9 +++++++++ ovn/controller/chassis.c | 8 ++++++++ ovn/controller/encaps.c | 12 +++++++++++- ovn/ovn-sb.xml | 10 ++++++++-- ovn/utilities/ovn-sbctl.c | 2 ++ tests/ovn-controller-vtep.at | 1 + tests/ovn-sbctl.at | 3 +++ 7 files changed, 42 insertions(+), 3 deletions(-) diff --git a/ovn/controller-vtep/gateway.c b/ovn/controller-vtep/gateway.c index ebe0087..e44b319 100644 --- a/ovn/controller-vtep/gateway.c +++ b/ovn/controller-vtep/gateway.c @@ -57,6 +57,8 @@ create_chassis_rec(struct ovsdb_idl_txn *txn, const char *name, encap_rec = sbrec_encap_insert(txn); sbrec_encap_set_type(encap_rec, OVN_SB_ENCAP_TYPE); sbrec_encap_set_ip(encap_rec, encap_ip); + const struct smap options = SMAP_CONST1(&options, "csum", "false"); + sbrec_encap_set_options(encap_rec, &options); sbrec_chassis_set_encaps(chassis_rec, &encap_rec, 1); return chassis_rec; @@ -107,6 +109,13 @@ revalidate_gateway(struct controller_vtep_ctx *ctx) if (strcmp(chassis_rec->encaps[0]->ip, encap_ip)) { sbrec_encap_set_ip(chassis_rec->encaps[0], encap_ip); } + const char *csum = smap_get(&chassis_rec->encaps[0]->options, + "csum"); + if (!csum || strcmp(csum, "false")) { + const struct smap options = SMAP_CONST1(&options, "csum", + "false"); + sbrec_encap_set_options(chassis_rec->encaps[0], &options); + } } else { if (gw_node) { VLOG_WARN("Chassis for VTEP physical switch (%s) disappears, " diff --git a/ovn/controller/chassis.c b/ovn/controller/chassis.c index b5f022e..beb393b 100644 --- a/ovn/controller/chassis.c +++ b/ovn/controller/chassis.c @@ -137,8 +137,13 @@ chassis_run(struct controller_ctx *ctx, const char *chassis_id) uint32_t cur_tunnels = 0; bool same = true; for (int i = 0; i < chassis_rec->n_encaps; i++) { + const char *csum; + cur_tunnels |= get_tunnel_type(chassis_rec->encaps[i]->type); same = same && !strcmp(chassis_rec->encaps[i]->ip, encap_ip); + + csum = smap_get(&chassis_rec->encaps[i]->options, "csum"); + same = same && csum && !strcmp(csum, "true"); } same = same && req_tunnels == cur_tunnels; @@ -178,6 +183,8 @@ chassis_run(struct controller_ctx *ctx, const char *chassis_id) int n_encaps = count_1bits(req_tunnels); struct sbrec_encap **encaps = xmalloc(n_encaps * sizeof *encaps); + const struct smap options = SMAP_CONST1(&options, "csum", "true"); + for (int i = 0; i < n_encaps; i++) { const char *type = pop_tunnel_name(&req_tunnels); @@ -185,6 +192,7 @@ chassis_run(struct controller_ctx *ctx, const char *chassis_id) sbrec_encap_set_type(encaps[i], type); sbrec_encap_set_ip(encaps[i], encap_ip); + sbrec_encap_set_options(encaps[i], &options); } sbrec_chassis_set_encaps(chassis_rec, encaps, n_encaps); diff --git a/ovn/controller/encaps.c b/ovn/controller/encaps.c index 52348c3..7ae6af8 100644 --- a/ovn/controller/encaps.c +++ b/ovn/controller/encaps.c @@ -191,6 +191,7 @@ tunnel_add(const struct sbrec_chassis *chassis_rec, struct smap options = SMAP_INITIALIZER(&options); struct ovsrec_port *port, **ports; struct ovsrec_interface *iface; + const char *csum = smap_get(&encap->options, "csum"); char *port_name; size_t i; @@ -206,6 +207,9 @@ tunnel_add(const struct sbrec_chassis *chassis_rec, ovsrec_interface_set_type(iface, encap->type); smap_add(&options, "remote_ip", encap->ip); smap_add(&options, "key", "flow"); + if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) { + smap_add(&options, "csum", csum); + } ovsrec_interface_set_options(iface, &options); smap_destroy(&options); @@ -303,15 +307,21 @@ check_and_update_tunnel(const struct ovsrec_port *port, { const struct sbrec_encap *encap = preferred_encap(chassis_rec); const struct ovsrec_interface *iface = port->interfaces[0]; + const char *csum = smap_get(&encap->options, "csum"); + const char *existing_csum = smap_get(&iface->options, "csum"); if (strcmp(encap->type, iface->type)) { ovsrec_interface_set_type(iface, encap->type); } const char *ip = smap_get(&iface->options, "remote_ip"); - if (!ip || strcmp(encap->ip, ip)) { + if (!ip || strcmp(encap->ip, ip) || + (!!csum != !!existing_csum || (csum && strcmp(csum, existing_csum)))) { struct smap options = SMAP_INITIALIZER(&options); smap_add(&options, "remote_ip", encap->ip); smap_add(&options, "key", "flow"); + if (csum && (!strcmp(csum, "true") || !strcmp(csum, "false"))) { + smap_add(&options, "csum", csum); + } ovsrec_interface_set_options(iface, &options); smap_destroy(&options); } diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml index 13c9526..1cc650f 100644 --- a/ovn/ovn-sb.xml +++ b/ovn/ovn-sb.xml @@ -274,8 +274,14 @@ </column> <column name="options"> - Options for configuring the encapsulation, e.g. IPsec parameters when - IPsec support is introduced. No options are currently defined. + Options for configuring the encapsulation. Currently, the only + option that has been defined is <code>csum</code>. This indicates + that encapsulation checksums can be transmitted and received with + reasonable performance. It is a hint to senders transmitting data + to this chassis that they should used checksums to protect OVN + metadata. In addition, on Linux based hypervisors this will also + improve the throughput of encapsulated traffic in most cases. Set to + <code>true</code> to enable or <code>false</code> to disable. </column> <column name="ip"> diff --git a/ovn/utilities/ovn-sbctl.c b/ovn/utilities/ovn-sbctl.c index 50c9f17..cb2d477 100644 --- a/ovn/utilities/ovn-sbctl.c +++ b/ovn/utilities/ovn-sbctl.c @@ -557,6 +557,7 @@ cmd_chassis_add(struct ctl_context *ctx) size_t n_encaps = sset_count(&encap_set); struct sbrec_encap **encaps = xmalloc(n_encaps * sizeof *encaps); + const struct smap options = SMAP_CONST1(&options, "csum", "true"); const char *encap_type; int i = 0; SSET_FOR_EACH (encap_type, &encap_set){ @@ -564,6 +565,7 @@ cmd_chassis_add(struct ctl_context *ctx) sbrec_encap_set_type(encaps[i], encap_type); sbrec_encap_set_ip(encaps[i], encap_ip); + sbrec_encap_set_options(encaps[i], &options); i++; } sset_destroy(&encap_set); diff --git a/tests/ovn-controller-vtep.at b/tests/ovn-controller-vtep.at index 8eca16c..654c212 100644 --- a/tests/ovn-controller-vtep.at +++ b/tests/ovn-controller-vtep.at @@ -124,6 +124,7 @@ AT_CHECK([ovn-sbctl show], [0], [dnl Chassis br-vtep Encap vxlan ip: "1.2.3.4" + options: {csum="false"} ]) # deletes the chassis via ovn-sbctl and check that it is readded back diff --git a/tests/ovn-sbctl.at b/tests/ovn-sbctl.at index 72dc441..9393eef 100644 --- a/tests/ovn-sbctl.at +++ b/tests/ovn-sbctl.at @@ -93,6 +93,7 @@ AT_CHECK([ovn-sbctl show], [0], [dnl Chassis "ch0" Encap stt ip: "1.2.3.5" + options: {csum="true"} Port_Binding "vif0" ]) @@ -105,6 +106,7 @@ AT_CHECK([ovn-sbctl show | sed 's/vif[[0-9]]/vif/'], [0], [dnl Chassis "ch0" Encap stt ip: "1.2.3.5" + options: {csum="true"} Port_Binding "vif" Port_Binding "vif" ]) @@ -116,6 +118,7 @@ AT_CHECK([ovn-sbctl show], [0], [dnl Chassis "ch0" Encap stt ip: "1.2.3.5" + options: {csum="true"} Port_Binding "vif0" ]) -- 2.7.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev